2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
89 static const uint8_t coeff_token_len[4][4*17]={
92 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
99 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
106 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
113 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120 static const uint8_t coeff_token_bits[4][4*17]={
123 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
130 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
137 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
144 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
151 static const uint8_t total_zeros_len[16][16]= {
152 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155 {5,3,4,4,3,3,3,4,3,4,5,5,5},
156 {4,4,4,3,3,3,3,3,4,5,4,5},
157 {6,5,3,3,3,3,3,3,4,3,6},
158 {6,5,3,3,3,2,3,4,3,6},
169 static const uint8_t total_zeros_bits[16][16]= {
170 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173 {3,7,5,4,6,5,4,3,3,2,2,1,0},
174 {5,4,3,7,6,5,4,3,2,1,1,0},
175 {1,1,7,6,5,4,3,2,1,1,0},
176 {1,1,5,4,3,3,2,1,1,0},
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200 { 1, 3, 3, 4, 4, 4, 5, 5 },
201 { 3, 2, 3, 3, 3, 3, 3 },
202 { 3, 3, 2, 2, 3, 3 },
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210 { 1, 2, 3, 2, 3, 1, 1, 0 },
211 { 0, 1, 1, 4, 5, 6, 7 },
212 { 0, 1, 1, 2, 6, 7 },
219 static const uint8_t run_len[7][16]={
226 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
229 static const uint8_t run_bits[7][16]={
236 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
284 * Get the predicted number of non-zero coefficients.
285 * @param n block index
287 static inline int pred_non_zero_count(H264Context *h, int n){
288 const int index8= scan8[n];
289 const int left= h->non_zero_count_cache[index8 - 1];
290 const int top = h->non_zero_count_cache[index8 - 8];
293 if(i<64) i= (i+1)>>1;
295 tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
300 static av_cold void init_cavlc_level_tab(void){
304 for(suffix_length=0; suffix_length<7; suffix_length++){
305 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
308 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309 int level_code = (prefix << suffix_length) +
310 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311 int mask = -(level_code&1);
312 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313 cavlc_level_tab[suffix_length][i][0]= level_code;
314 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315 }else if(prefix + 1 <= LEVEL_TAB_BITS){
316 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
319 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
326 av_cold void ff_h264_decode_init_vlc(void){
334 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337 &chroma_dc_coeff_token_len [0], 1, 1,
338 &chroma_dc_coeff_token_bits[0], 1, 1,
339 INIT_VLC_USE_NEW_STATIC);
341 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344 &chroma422_dc_coeff_token_len [0], 1, 1,
345 &chroma422_dc_coeff_token_bits[0], 1, 1,
346 INIT_VLC_USE_NEW_STATIC);
350 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353 &coeff_token_len [i][0], 1, 1,
354 &coeff_token_bits[i][0], 1, 1,
355 INIT_VLC_USE_NEW_STATIC);
356 offset += coeff_token_vlc_tables_size[i];
359 * This is a one time safety check to make sure that
360 * the packed static coeff_token_vlc table sizes
361 * were initialized correctly.
363 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
366 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368 init_vlc(&chroma_dc_total_zeros_vlc[i],
369 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370 &chroma_dc_total_zeros_len [i][0], 1, 1,
371 &chroma_dc_total_zeros_bits[i][0], 1, 1,
372 INIT_VLC_USE_NEW_STATIC);
376 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378 init_vlc(&chroma422_dc_total_zeros_vlc[i],
379 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380 &chroma422_dc_total_zeros_len [i][0], 1, 1,
381 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382 INIT_VLC_USE_NEW_STATIC);
386 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388 init_vlc(&total_zeros_vlc[i],
389 TOTAL_ZEROS_VLC_BITS, 16,
390 &total_zeros_len [i][0], 1, 1,
391 &total_zeros_bits[i][0], 1, 1,
392 INIT_VLC_USE_NEW_STATIC);
396 run_vlc[i].table = run_vlc_tables[i];
397 run_vlc[i].table_allocated = run_vlc_tables_size;
398 init_vlc(&run_vlc[i],
400 &run_len [i][0], 1, 1,
401 &run_bits[i][0], 1, 1,
402 INIT_VLC_USE_NEW_STATIC);
404 run7_vlc.table = run7_vlc_table,
405 run7_vlc.table_allocated = run7_vlc_table_size;
406 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407 &run_len [6][0], 1, 1,
408 &run_bits[6][0], 1, 1,
409 INIT_VLC_USE_NEW_STATIC);
411 init_cavlc_level_tab();
418 static inline int get_level_prefix(GetBitContext *gb){
423 UPDATE_CACHE(re, gb);
424 buf=GET_CACHE(re, gb);
426 log= 32 - av_log2(buf);
428 print_bin(buf>>(32-log), log);
429 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
432 LAST_SKIP_BITS(re, gb, log);
433 CLOSE_READER(re, gb);
439 * Decode a residual block.
440 * @param n block index
441 * @param scantable scantable
442 * @param max_coeff number of coefficients in the block
443 * @return <0 if an error occurred
445 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
450 //FIXME put trailing_onex into the context
454 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
456 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
457 total_coeff= coeff_token>>2;
459 if(n >= LUMA_DC_BLOCK_INDEX){
460 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
461 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
462 total_coeff= coeff_token>>2;
464 total_coeff= pred_non_zero_count(h, n);
465 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466 total_coeff= coeff_token>>2;
469 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
471 //FIXME set last_non_zero?
475 if(total_coeff > (unsigned)max_coeff) {
476 av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
480 trailing_ones= coeff_token&3;
481 tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
482 assert(total_coeff<=16);
484 i = show_bits(gb, 3);
485 skip_bits(gb, trailing_ones);
486 level[0] = 1-((i&4)>>1);
487 level[1] = 1-((i&2) );
488 level[2] = 1-((i&1)<<1);
490 if(trailing_ones<total_coeff) {
492 int suffix_length = total_coeff > 10 & trailing_ones < 3;
493 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
494 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
496 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
497 if(level_code >= 100){
498 prefix= level_code - 100;
499 if(prefix == LEVEL_TAB_BITS)
500 prefix += get_level_prefix(gb);
502 //first coefficient has suffix_length equal to 0 or 1
503 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
505 level_code= (prefix<<1) + get_bits1(gb); //part
507 level_code= prefix; //part
508 }else if(prefix==14){
510 level_code= (prefix<<1) + get_bits1(gb); //part
512 level_code= prefix + get_bits(gb, 4); //part
514 level_code= 30 + get_bits(gb, prefix-3); //part
517 av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
520 level_code += (1<<(prefix-3))-4096;
524 if(trailing_ones < 3) level_code += 2;
527 mask= -(level_code&1);
528 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532 suffix_length = 1 + (level_code + 3U > 6U);
533 level[trailing_ones]= level_code;
536 //remaining coefficients have suffix_length > 0
537 for(i=trailing_ones+1;i<total_coeff;i++) {
538 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
539 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
540 level_code= cavlc_level_tab[suffix_length][bitsi][0];
542 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
543 if(level_code >= 100){
544 prefix= level_code - 100;
545 if(prefix == LEVEL_TAB_BITS){
546 prefix += get_level_prefix(gb);
549 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
553 level_code += (1<<(prefix-3))-4096;
555 mask= -(level_code&1);
556 level_code= (((2+level_code)>>1) ^ mask) - mask;
558 level[i]= level_code;
559 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
563 if(total_coeff == max_coeff)
566 if (max_coeff <= 8) {
568 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
569 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
571 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
572 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
574 zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
578 #define STORE_BLOCK(type) \
579 scantable += zeros_left + total_coeff - 1; \
580 if(n >= LUMA_DC_BLOCK_INDEX){ \
581 ((type*)block)[*scantable] = level[0]; \
582 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
584 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
586 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
587 zeros_left -= run_before; \
588 scantable -= 1 + run_before; \
589 ((type*)block)[*scantable]= level[i]; \
591 for(;i<total_coeff;i++) { \
593 ((type*)block)[*scantable]= level[i]; \
596 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
597 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
599 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
601 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
602 zeros_left -= run_before; \
603 scantable -= 1 + run_before; \
604 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
606 for(;i<total_coeff;i++) { \
608 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
612 if (zeros_left < 0) {
613 av_log(h->avctx, AV_LOG_ERROR,
614 "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
615 return AVERROR_INVALIDDATA;
618 if (h->pixel_shift) {
627 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
629 int qscale = p == 0 ? h->qscale : h->chroma_qp[p-1];
630 if(IS_INTRA16x16(mb_type)){
631 AV_ZERO128(h->mb_luma_dc[p]+0);
632 AV_ZERO128(h->mb_luma_dc[p]+8);
633 AV_ZERO128(h->mb_luma_dc[p]+16);
634 AV_ZERO128(h->mb_luma_dc[p]+24);
635 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
636 return -1; //FIXME continue if partitioned and other return -1 too
639 assert((cbp&15) == 0 || (cbp&15) == 15);
642 for(i8x8=0; i8x8<4; i8x8++){
643 for(i4x4=0; i4x4<4; i4x4++){
644 const int index= i4x4 + 4*i8x8 + p*16;
645 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
646 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
653 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
657 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
658 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
660 for(i8x8=0; i8x8<4; i8x8++){
662 if(IS_8x8DCT(mb_type)){
663 int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
665 for(i4x4=0; i4x4<4; i4x4++){
666 const int index= i4x4 + 4*i8x8 + p*16;
667 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
668 h->dequant8_coeff[cqm][qscale], 16) < 0 )
671 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
672 nnz[0] += nnz[1] + nnz[8] + nnz[9];
673 new_cbp |= !!nnz[0] << i8x8;
675 for(i4x4=0; i4x4<4; i4x4++){
676 const int index= i4x4 + 4*i8x8 + p*16;
677 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
678 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
681 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
685 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
686 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
693 int ff_h264_decode_mb_cavlc(H264Context *h){
696 unsigned int mb_type, cbp;
697 int dct8x8_allowed= h->pps.transform_8x8_mode;
698 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
699 const int pixel_shift = h->pixel_shift;
701 mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
703 tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
704 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
706 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
707 if(h->mb_skip_run==-1)
708 h->mb_skip_run= get_ue_golomb(&h->gb);
710 if (h->mb_skip_run--) {
711 if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
712 if(h->mb_skip_run==0)
713 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
719 if (FRAME_MBAFF(h)) {
720 if( (h->mb_y&1) == 0 )
721 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
724 h->prev_mb_skipped= 0;
726 mb_type= get_ue_golomb(&h->gb);
727 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
729 partition_count= b_mb_type_info[mb_type].partition_count;
730 mb_type= b_mb_type_info[mb_type].type;
733 goto decode_intra_mb;
735 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
737 partition_count= p_mb_type_info[mb_type].partition_count;
738 mb_type= p_mb_type_info[mb_type].type;
741 goto decode_intra_mb;
744 assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
745 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
749 av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
753 cbp= i_mb_type_info[mb_type].cbp;
754 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
755 mb_type= i_mb_type_info[mb_type].type;
759 mb_type |= MB_TYPE_INTERLACED;
761 h->slice_table[ mb_xy ]= h->slice_num;
763 if(IS_INTRA_PCM(mb_type)){
764 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
765 h->sps.bit_depth_luma;
767 // We assume these blocks are very rare so we do not optimize it.
768 h->intra_pcm_ptr = align_get_bits(&h->gb);
769 skip_bits_long(&h->gb, mb_size);
771 // In deblocking, the quantizer is 0
772 h->cur_pic.qscale_table[mb_xy] = 0;
773 // All coeffs are present
774 memset(h->non_zero_count[mb_xy], 16, 48);
776 h->cur_pic.mb_type[mb_xy] = mb_type;
780 fill_decode_neighbors(h, mb_type);
781 fill_decode_caches(h, mb_type);
784 if(IS_INTRA(mb_type)){
786 // init_top_left_availability(h);
787 if(IS_INTRA4x4(mb_type)){
790 if(dct8x8_allowed && get_bits1(&h->gb)){
791 mb_type |= MB_TYPE_8x8DCT;
795 // fill_intra4x4_pred_table(h);
796 for(i=0; i<16; i+=di){
797 int mode= pred_intra_mode(h, i);
799 if(!get_bits1(&h->gb)){
800 const int rem_mode= get_bits(&h->gb, 3);
801 mode = rem_mode + (rem_mode >= mode);
805 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
807 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
809 write_back_intra_pred_mode(h);
810 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
813 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
814 if(h->intra16x16_pred_mode < 0)
818 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
821 h->chroma_pred_mode= pred_mode;
823 h->chroma_pred_mode = DC_128_PRED8x8;
825 }else if(partition_count==4){
826 int i, j, sub_partition_count[4], list, ref[2][4];
828 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
830 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
831 if(h->sub_mb_type[i] >=13){
832 av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
835 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
836 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
838 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
839 ff_h264_pred_direct_motion(h, &mb_type);
840 h->ref_cache[0][scan8[4]] =
841 h->ref_cache[1][scan8[4]] =
842 h->ref_cache[0][scan8[12]] =
843 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
846 assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
848 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
849 if(h->sub_mb_type[i] >=4){
850 av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
853 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
854 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
858 for(list=0; list<h->list_count; list++){
859 int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF(h);
861 if(IS_DIRECT(h->sub_mb_type[i])) continue;
862 if(IS_DIR(h->sub_mb_type[i], 0, list)){
866 }else if(ref_count == 2){
867 tmp= get_bits1(&h->gb)^1;
869 tmp= get_ue_golomb_31(&h->gb);
871 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
884 dct8x8_allowed = get_dct8x8_allowed(h);
886 for(list=0; list<h->list_count; list++){
888 if(IS_DIRECT(h->sub_mb_type[i])) {
889 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
892 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
893 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
895 if(IS_DIR(h->sub_mb_type[i], 0, list)){
896 const int sub_mb_type= h->sub_mb_type[i];
897 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
898 for(j=0; j<sub_partition_count[i]; j++){
900 const int index= 4*i + block_width*j;
901 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
902 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
903 mx += get_se_golomb(&h->gb);
904 my += get_se_golomb(&h->gb);
905 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
907 if(IS_SUB_8X8(sub_mb_type)){
909 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
911 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
912 }else if(IS_SUB_8X4(sub_mb_type)){
913 mv_cache[ 1 ][0]= mx;
914 mv_cache[ 1 ][1]= my;
915 }else if(IS_SUB_4X8(sub_mb_type)){
916 mv_cache[ 8 ][0]= mx;
917 mv_cache[ 8 ][1]= my;
919 mv_cache[ 0 ][0]= mx;
920 mv_cache[ 0 ][1]= my;
923 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
929 }else if(IS_DIRECT(mb_type)){
930 ff_h264_pred_direct_motion(h, &mb_type);
931 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
934 //FIXME we should set ref_idx_l? to 0 if we use that later ...
935 if(IS_16X16(mb_type)){
936 for(list=0; list<h->list_count; list++){
938 if(IS_DIR(mb_type, 0, list)){
939 int rc = h->ref_count[list] << MB_MBAFF(h);
942 } else if (rc == 2) {
943 val= get_bits1(&h->gb)^1;
945 val= get_ue_golomb_31(&h->gb);
947 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
951 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
954 for(list=0; list<h->list_count; list++){
955 if(IS_DIR(mb_type, 0, list)){
956 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
957 mx += get_se_golomb(&h->gb);
958 my += get_se_golomb(&h->gb);
959 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
961 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
965 else if(IS_16X8(mb_type)){
966 for(list=0; list<h->list_count; list++){
969 if(IS_DIR(mb_type, i, list)){
970 int rc = h->ref_count[list] << MB_MBAFF(h);
973 } else if (rc == 2) {
974 val= get_bits1(&h->gb)^1;
976 val= get_ue_golomb_31(&h->gb);
978 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
983 val= LIST_NOT_USED&0xFF;
984 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
987 for(list=0; list<h->list_count; list++){
990 if(IS_DIR(mb_type, i, list)){
991 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
992 mx += get_se_golomb(&h->gb);
993 my += get_se_golomb(&h->gb);
994 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
996 val= pack16to32(mx,my);
999 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1003 assert(IS_8X16(mb_type));
1004 for(list=0; list<h->list_count; list++){
1007 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1008 int rc = h->ref_count[list] << MB_MBAFF(h);
1011 } else if (rc == 2) {
1012 val= get_bits1(&h->gb)^1;
1014 val= get_ue_golomb_31(&h->gb);
1016 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1021 val= LIST_NOT_USED&0xFF;
1022 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1025 for(list=0; list<h->list_count; list++){
1028 if(IS_DIR(mb_type, i, list)){
1029 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1030 mx += get_se_golomb(&h->gb);
1031 my += get_se_golomb(&h->gb);
1032 tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1034 val= pack16to32(mx,my);
1037 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1043 if(IS_INTER(mb_type))
1044 write_back_motion(h, mb_type);
1046 if(!IS_INTRA16x16(mb_type)){
1047 cbp= get_ue_golomb(&h->gb);
1051 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1054 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1055 else cbp= golomb_to_inter_cbp [cbp];
1058 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1061 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1062 else cbp= golomb_to_inter_cbp_gray[cbp];
1066 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1067 mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1070 h->cbp_table[mb_xy]= cbp;
1071 h->cur_pic.mb_type[mb_xy] = mb_type;
1073 if(cbp || IS_INTRA16x16(mb_type)){
1074 int i4x4, i8x8, chroma_idx;
1077 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1078 const uint8_t *scan, *scan8x8;
1079 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1081 if(IS_INTERLACED(mb_type)){
1082 scan8x8= h->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1083 scan= h->qscale ? h->field_scan : h->field_scan_q0;
1085 scan8x8= h->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1086 scan= h->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1089 dquant= get_se_golomb(&h->gb);
1091 h->qscale += dquant;
1093 if(((unsigned)h->qscale) > max_qp){
1094 if(h->qscale<0) h->qscale+= max_qp+1;
1095 else h->qscale-= max_qp+1;
1096 if(((unsigned)h->qscale) > max_qp){
1097 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1102 h->chroma_qp[0]= get_chroma_qp(h, 0, h->qscale);
1103 h->chroma_qp[1]= get_chroma_qp(h, 1, h->qscale);
1105 if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1108 h->cbp_table[mb_xy] |= ret << 12;
1110 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1113 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1116 } else if (CHROMA422) {
1118 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1119 if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1120 CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1127 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1128 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1129 int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1130 for (i8x8 = 0; i8x8 < 2; i8x8++) {
1131 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1132 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1133 if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1135 mb += 16 << pixel_shift;
1140 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1141 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1143 } else /* yuv420 */ {
1145 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1146 if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1152 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1153 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1154 for(i4x4=0; i4x4<4; i4x4++){
1155 const int index= 16 + 16*chroma_idx + i4x4;
1156 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1162 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1163 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1167 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1168 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1169 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1171 h->cur_pic.qscale_table[mb_xy] = h->qscale;
1172 write_back_non_zero_count(h);