2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
89 static const uint8_t coeff_token_len[4][4*17]={
92 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
99 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
106 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
113 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120 static const uint8_t coeff_token_bits[4][4*17]={
123 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
130 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
137 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
144 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
151 static const uint8_t total_zeros_len[16][16]= {
152 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155 {5,3,4,4,3,3,3,4,3,4,5,5,5},
156 {4,4,4,3,3,3,3,3,4,5,4,5},
157 {6,5,3,3,3,3,3,3,4,3,6},
158 {6,5,3,3,3,2,3,4,3,6},
169 static const uint8_t total_zeros_bits[16][16]= {
170 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173 {3,7,5,4,6,5,4,3,3,2,2,1,0},
174 {5,4,3,7,6,5,4,3,2,1,1,0},
175 {1,1,7,6,5,4,3,2,1,1,0},
176 {1,1,5,4,3,3,2,1,1,0},
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200 { 1, 3, 3, 4, 4, 4, 5, 5 },
201 { 3, 2, 3, 3, 3, 3, 3 },
202 { 3, 3, 2, 2, 3, 3 },
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210 { 1, 2, 3, 2, 3, 1, 1, 0 },
211 { 0, 1, 1, 4, 5, 6, 7 },
212 { 0, 1, 1, 2, 6, 7 },
219 static const uint8_t run_len[7][16]={
226 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
229 static const uint8_t run_bits[7][16]={
236 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
284 * Get the predicted number of non-zero coefficients.
285 * @param n block index
287 static inline int pred_non_zero_count(H264Context *h, int n){
288 const int index8= scan8[n];
289 const int left= h->non_zero_count_cache[index8 - 1];
290 const int top = h->non_zero_count_cache[index8 - 8];
293 if(i<64) i= (i+1)>>1;
295 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
300 static av_cold void init_cavlc_level_tab(void){
304 for(suffix_length=0; suffix_length<7; suffix_length++){
305 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
308 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309 int level_code = (prefix << suffix_length) +
310 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311 int mask = -(level_code&1);
312 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313 cavlc_level_tab[suffix_length][i][0]= level_code;
314 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315 }else if(prefix + 1 <= LEVEL_TAB_BITS){
316 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
319 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
326 av_cold void ff_h264_decode_init_vlc(void){
334 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337 &chroma_dc_coeff_token_len [0], 1, 1,
338 &chroma_dc_coeff_token_bits[0], 1, 1,
339 INIT_VLC_USE_NEW_STATIC);
341 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344 &chroma422_dc_coeff_token_len [0], 1, 1,
345 &chroma422_dc_coeff_token_bits[0], 1, 1,
346 INIT_VLC_USE_NEW_STATIC);
350 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353 &coeff_token_len [i][0], 1, 1,
354 &coeff_token_bits[i][0], 1, 1,
355 INIT_VLC_USE_NEW_STATIC);
356 offset += coeff_token_vlc_tables_size[i];
359 * This is a one time safety check to make sure that
360 * the packed static coeff_token_vlc table sizes
361 * were initialized correctly.
363 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
366 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368 init_vlc(&chroma_dc_total_zeros_vlc[i],
369 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370 &chroma_dc_total_zeros_len [i][0], 1, 1,
371 &chroma_dc_total_zeros_bits[i][0], 1, 1,
372 INIT_VLC_USE_NEW_STATIC);
376 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378 init_vlc(&chroma422_dc_total_zeros_vlc[i],
379 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380 &chroma422_dc_total_zeros_len [i][0], 1, 1,
381 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382 INIT_VLC_USE_NEW_STATIC);
386 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388 init_vlc(&total_zeros_vlc[i],
389 TOTAL_ZEROS_VLC_BITS, 16,
390 &total_zeros_len [i][0], 1, 1,
391 &total_zeros_bits[i][0], 1, 1,
392 INIT_VLC_USE_NEW_STATIC);
396 run_vlc[i].table = run_vlc_tables[i];
397 run_vlc[i].table_allocated = run_vlc_tables_size;
398 init_vlc(&run_vlc[i],
400 &run_len [i][0], 1, 1,
401 &run_bits[i][0], 1, 1,
402 INIT_VLC_USE_NEW_STATIC);
404 run7_vlc.table = run7_vlc_table,
405 run7_vlc.table_allocated = run7_vlc_table_size;
406 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407 &run_len [6][0], 1, 1,
408 &run_bits[6][0], 1, 1,
409 INIT_VLC_USE_NEW_STATIC);
411 init_cavlc_level_tab();
418 static inline int get_level_prefix(GetBitContext *gb){
423 UPDATE_CACHE(re, gb);
424 buf=GET_CACHE(re, gb);
426 log= 32 - av_log2(buf);
428 print_bin(buf>>(32-log), log);
429 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
432 LAST_SKIP_BITS(re, gb, log);
433 CLOSE_READER(re, gb);
439 * Decode a residual block.
440 * @param n block index
441 * @param scantable scantable
442 * @param max_coeff number of coefficients in the block
443 * @return <0 if an error occurred
445 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446 MpegEncContext * const s = &h->s;
447 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
449 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
451 //FIXME put trailing_onex into the context
455 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
457 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
458 total_coeff= coeff_token>>2;
460 if(n >= LUMA_DC_BLOCK_INDEX){
461 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
462 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
463 total_coeff= coeff_token>>2;
465 total_coeff= pred_non_zero_count(h, n);
466 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467 total_coeff= coeff_token>>2;
470 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
472 //FIXME set last_non_zero?
476 if(total_coeff > (unsigned)max_coeff) {
477 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
481 trailing_ones= coeff_token&3;
482 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
483 assert(total_coeff<=16);
485 i = show_bits(gb, 3);
486 skip_bits(gb, trailing_ones);
487 level[0] = 1-((i&4)>>1);
488 level[1] = 1-((i&2) );
489 level[2] = 1-((i&1)<<1);
491 if(trailing_ones<total_coeff) {
493 int suffix_length = total_coeff > 10 & trailing_ones < 3;
494 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
495 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
497 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
498 if(level_code >= 100){
499 prefix= level_code - 100;
500 if(prefix == LEVEL_TAB_BITS)
501 prefix += get_level_prefix(gb);
503 //first coefficient has suffix_length equal to 0 or 1
504 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
506 level_code= (prefix<<1) + get_bits1(gb); //part
508 level_code= prefix; //part
509 }else if(prefix==14){
511 level_code= (prefix<<1) + get_bits1(gb); //part
513 level_code= prefix + get_bits(gb, 4); //part
515 level_code= 30 + get_bits(gb, prefix-3); //part
518 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
521 level_code += (1<<(prefix-3))-4096;
525 if(trailing_ones < 3) level_code += 2;
528 mask= -(level_code&1);
529 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
531 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
533 suffix_length = 1 + (level_code + 3U > 6U);
534 level[trailing_ones]= level_code;
537 //remaining coefficients have suffix_length > 0
538 for(i=trailing_ones+1;i<total_coeff;i++) {
539 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541 level_code= cavlc_level_tab[suffix_length][bitsi][0];
543 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544 if(level_code >= 100){
545 prefix= level_code - 100;
546 if(prefix == LEVEL_TAB_BITS){
547 prefix += get_level_prefix(gb);
550 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
552 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
554 level_code += (1<<(prefix-3))-4096;
556 mask= -(level_code&1);
557 level_code= (((2+level_code)>>1) ^ mask) - mask;
559 level[i]= level_code;
560 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
564 if(total_coeff == max_coeff)
567 if (max_coeff <= 8) {
569 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
570 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
572 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
573 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
575 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
579 #define STORE_BLOCK(type) \
580 scantable += zeros_left + total_coeff - 1; \
581 if(n >= LUMA_DC_BLOCK_INDEX){ \
582 ((type*)block)[*scantable] = level[0]; \
583 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
585 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
587 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
588 zeros_left -= run_before; \
589 scantable -= 1 + run_before; \
590 ((type*)block)[*scantable]= level[i]; \
592 for(;i<total_coeff;i++) { \
594 ((type*)block)[*scantable]= level[i]; \
597 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
598 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
600 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
602 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
603 zeros_left -= run_before; \
604 scantable -= 1 + run_before; \
605 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
607 for(;i<total_coeff;i++) { \
609 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
613 if (h->pixel_shift) {
620 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
627 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
629 MpegEncContext * const s = &h->s;
630 int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
631 if(IS_INTRA16x16(mb_type)){
632 AV_ZERO128(h->mb_luma_dc[p]+0);
633 AV_ZERO128(h->mb_luma_dc[p]+8);
634 AV_ZERO128(h->mb_luma_dc[p]+16);
635 AV_ZERO128(h->mb_luma_dc[p]+24);
636 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
637 return -1; //FIXME continue if partitioned and other return -1 too
640 assert((cbp&15) == 0 || (cbp&15) == 15);
643 for(i8x8=0; i8x8<4; i8x8++){
644 for(i4x4=0; i4x4<4; i4x4++){
645 const int index= i4x4 + 4*i8x8 + p*16;
646 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
647 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
654 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
658 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
659 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
661 for(i8x8=0; i8x8<4; i8x8++){
663 if(IS_8x8DCT(mb_type)){
664 DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
666 for(i4x4=0; i4x4<4; i4x4++){
667 const int index= i4x4 + 4*i8x8 + p*16;
668 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
669 h->dequant8_coeff[cqm][qscale], 16) < 0 )
672 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
673 nnz[0] += nnz[1] + nnz[8] + nnz[9];
674 new_cbp |= !!nnz[0] << i8x8;
676 for(i4x4=0; i4x4<4; i4x4++){
677 const int index= i4x4 + 4*i8x8 + p*16;
678 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
679 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
682 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
686 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
687 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
694 int ff_h264_decode_mb_cavlc(H264Context *h){
695 MpegEncContext * const s = &h->s;
698 unsigned int mb_type, cbp;
699 int dct8x8_allowed= h->pps.transform_8x8_mode;
700 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
701 const int pixel_shift = h->pixel_shift;
703 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
705 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
706 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
708 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
709 if(s->mb_skip_run==-1)
710 s->mb_skip_run= get_ue_golomb(&s->gb);
712 if (s->mb_skip_run--) {
713 if(FRAME_MBAFF && (s->mb_y&1) == 0){
714 if(s->mb_skip_run==0)
715 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
722 if( (s->mb_y&1) == 0 )
723 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
726 h->prev_mb_skipped= 0;
728 mb_type= get_ue_golomb(&s->gb);
729 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
731 partition_count= b_mb_type_info[mb_type].partition_count;
732 mb_type= b_mb_type_info[mb_type].type;
735 goto decode_intra_mb;
737 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
739 partition_count= p_mb_type_info[mb_type].partition_count;
740 mb_type= p_mb_type_info[mb_type].type;
743 goto decode_intra_mb;
746 assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
747 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
751 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
755 cbp= i_mb_type_info[mb_type].cbp;
756 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
757 mb_type= i_mb_type_info[mb_type].type;
761 mb_type |= MB_TYPE_INTERLACED;
763 h->slice_table[ mb_xy ]= h->slice_num;
765 if(IS_INTRA_PCM(mb_type)){
767 const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
768 h->sps.bit_depth_luma >> 3;
770 // We assume these blocks are very rare so we do not optimize it.
771 align_get_bits(&s->gb);
773 // The pixels are stored in the same order as levels in h->mb array.
774 for(x=0; x < mb_size; x++){
775 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
778 // In deblocking, the quantizer is 0
779 s->current_picture.f.qscale_table[mb_xy] = 0;
780 // All coeffs are present
781 memset(h->non_zero_count[mb_xy], 16, 48);
783 s->current_picture.f.mb_type[mb_xy] = mb_type;
787 fill_decode_neighbors(h, mb_type);
788 fill_decode_caches(h, mb_type);
791 if(IS_INTRA(mb_type)){
793 // init_top_left_availability(h);
794 if(IS_INTRA4x4(mb_type)){
797 if(dct8x8_allowed && get_bits1(&s->gb)){
798 mb_type |= MB_TYPE_8x8DCT;
802 // fill_intra4x4_pred_table(h);
803 for(i=0; i<16; i+=di){
804 int mode= pred_intra_mode(h, i);
806 if(!get_bits1(&s->gb)){
807 const int rem_mode= get_bits(&s->gb, 3);
808 mode = rem_mode + (rem_mode >= mode);
812 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
814 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
816 write_back_intra_pred_mode(h);
817 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
820 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
821 if(h->intra16x16_pred_mode < 0)
825 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
828 h->chroma_pred_mode= pred_mode;
830 h->chroma_pred_mode = DC_128_PRED8x8;
832 }else if(partition_count==4){
833 int i, j, sub_partition_count[4], list, ref[2][4];
835 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
837 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
838 if(h->sub_mb_type[i] >=13){
839 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
842 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
843 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
845 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
846 ff_h264_pred_direct_motion(h, &mb_type);
847 h->ref_cache[0][scan8[4]] =
848 h->ref_cache[1][scan8[4]] =
849 h->ref_cache[0][scan8[12]] =
850 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
853 assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
855 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
856 if(h->sub_mb_type[i] >=4){
857 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
860 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
861 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
865 for(list=0; list<h->list_count; list++){
866 int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF;
868 if(IS_DIRECT(h->sub_mb_type[i])) continue;
869 if(IS_DIR(h->sub_mb_type[i], 0, list)){
873 }else if(ref_count == 2){
874 tmp= get_bits1(&s->gb)^1;
876 tmp= get_ue_golomb_31(&s->gb);
878 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
891 dct8x8_allowed = get_dct8x8_allowed(h);
893 for(list=0; list<h->list_count; list++){
895 if(IS_DIRECT(h->sub_mb_type[i])) {
896 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
899 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
900 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
902 if(IS_DIR(h->sub_mb_type[i], 0, list)){
903 const int sub_mb_type= h->sub_mb_type[i];
904 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
905 for(j=0; j<sub_partition_count[i]; j++){
907 const int index= 4*i + block_width*j;
908 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
909 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
910 mx += get_se_golomb(&s->gb);
911 my += get_se_golomb(&s->gb);
912 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
914 if(IS_SUB_8X8(sub_mb_type)){
916 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
918 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
919 }else if(IS_SUB_8X4(sub_mb_type)){
920 mv_cache[ 1 ][0]= mx;
921 mv_cache[ 1 ][1]= my;
922 }else if(IS_SUB_4X8(sub_mb_type)){
923 mv_cache[ 8 ][0]= mx;
924 mv_cache[ 8 ][1]= my;
926 mv_cache[ 0 ][0]= mx;
927 mv_cache[ 0 ][1]= my;
930 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
936 }else if(IS_DIRECT(mb_type)){
937 ff_h264_pred_direct_motion(h, &mb_type);
938 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
941 //FIXME we should set ref_idx_l? to 0 if we use that later ...
942 if(IS_16X16(mb_type)){
943 for(list=0; list<h->list_count; list++){
945 if(IS_DIR(mb_type, 0, list)){
946 int rc = h->ref_count[list] << MB_MBAFF;
949 } else if (rc == 2) {
950 val= get_bits1(&s->gb)^1;
952 val= get_ue_golomb_31(&s->gb);
954 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
958 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
961 for(list=0; list<h->list_count; list++){
962 if(IS_DIR(mb_type, 0, list)){
963 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
964 mx += get_se_golomb(&s->gb);
965 my += get_se_golomb(&s->gb);
966 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
968 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
972 else if(IS_16X8(mb_type)){
973 for(list=0; list<h->list_count; list++){
976 if(IS_DIR(mb_type, i, list)){
977 int rc = h->ref_count[list] << MB_MBAFF;
980 } else if (rc == 2) {
981 val= get_bits1(&s->gb)^1;
983 val= get_ue_golomb_31(&s->gb);
985 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
990 val= LIST_NOT_USED&0xFF;
991 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
994 for(list=0; list<h->list_count; list++){
997 if(IS_DIR(mb_type, i, list)){
998 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
999 mx += get_se_golomb(&s->gb);
1000 my += get_se_golomb(&s->gb);
1001 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1003 val= pack16to32(mx,my);
1006 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1010 assert(IS_8X16(mb_type));
1011 for(list=0; list<h->list_count; list++){
1014 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1015 int rc = h->ref_count[list] << MB_MBAFF;
1018 } else if (rc == 2) {
1019 val= get_bits1(&s->gb)^1;
1021 val= get_ue_golomb_31(&s->gb);
1023 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1028 val= LIST_NOT_USED&0xFF;
1029 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1032 for(list=0; list<h->list_count; list++){
1035 if(IS_DIR(mb_type, i, list)){
1036 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1037 mx += get_se_golomb(&s->gb);
1038 my += get_se_golomb(&s->gb);
1039 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1041 val= pack16to32(mx,my);
1044 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1050 if(IS_INTER(mb_type))
1051 write_back_motion(h, mb_type);
1053 if(!IS_INTRA16x16(mb_type)){
1054 cbp= get_ue_golomb(&s->gb);
1058 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1061 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1062 else cbp= golomb_to_inter_cbp [cbp];
1065 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1068 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1069 else cbp= golomb_to_inter_cbp_gray[cbp];
1073 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1074 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1077 h->cbp_table[mb_xy]= cbp;
1078 s->current_picture.f.mb_type[mb_xy] = mb_type;
1080 if(cbp || IS_INTRA16x16(mb_type)){
1081 int i4x4, i8x8, chroma_idx;
1084 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1085 const uint8_t *scan, *scan8x8;
1086 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1088 if(IS_INTERLACED(mb_type)){
1089 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1090 scan= s->qscale ? h->field_scan : h->field_scan_q0;
1092 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1093 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1096 dquant= get_se_golomb(&s->gb);
1098 s->qscale += dquant;
1100 if(((unsigned)s->qscale) > max_qp){
1101 if(s->qscale<0) s->qscale+= max_qp+1;
1102 else s->qscale-= max_qp+1;
1103 if(((unsigned)s->qscale) > max_qp){
1104 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1109 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1110 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1112 if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1115 h->cbp_table[mb_xy] |= ret << 12;
1117 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1120 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1123 } else if (CHROMA422) {
1125 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1126 if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1127 CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1134 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1135 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1136 DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1137 for (i8x8 = 0; i8x8 < 2; i8x8++) {
1138 for (i4x4 = 0; i4x4 < 4; i4x4++) {
1139 const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1140 if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1142 mb += 16 << pixel_shift;
1147 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1148 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1150 } else /* yuv420 */ {
1152 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1153 if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1159 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1160 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1161 for(i4x4=0; i4x4<4; i4x4++){
1162 const int index= 16 + 16*chroma_idx + i4x4;
1163 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1169 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1170 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1174 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1175 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1176 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1178 s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1179 write_back_non_zero_count(h);