2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
89 static const uint8_t coeff_token_len[4][4*17]={
92 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
99 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
106 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
113 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120 static const uint8_t coeff_token_bits[4][4*17]={
123 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
130 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
137 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
144 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
151 static const uint8_t total_zeros_len[16][16]= {
152 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155 {5,3,4,4,3,3,3,4,3,4,5,5,5},
156 {4,4,4,3,3,3,3,3,4,5,4,5},
157 {6,5,3,3,3,3,3,3,4,3,6},
158 {6,5,3,3,3,2,3,4,3,6},
169 static const uint8_t total_zeros_bits[16][16]= {
170 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173 {3,7,5,4,6,5,4,3,3,2,2,1,0},
174 {5,4,3,7,6,5,4,3,2,1,1,0},
175 {1,1,7,6,5,4,3,2,1,1,0},
176 {1,1,5,4,3,3,2,1,1,0},
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200 { 1, 3, 3, 4, 4, 4, 5, 5 },
201 { 3, 2, 3, 3, 3, 3, 3 },
202 { 3, 3, 2, 2, 3, 3 },
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210 { 1, 2, 3, 2, 3, 1, 1, 0 },
211 { 0, 1, 1, 4, 5, 6, 7 },
212 { 0, 1, 1, 2, 6, 7 },
219 static const uint8_t run_len[7][16]={
226 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
229 static const uint8_t run_bits[7][16]={
236 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
284 * gets the predicted number of non-zero coefficients.
285 * @param n block index
287 static inline int pred_non_zero_count(H264Context *h, int n){
288 const int index8= scan8[n];
289 const int left= h->non_zero_count_cache[index8 - 1];
290 const int top = h->non_zero_count_cache[index8 - 8];
293 if(i<64) i= (i+1)>>1;
295 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
300 static av_cold void init_cavlc_level_tab(void){
301 int suffix_length, mask;
304 for(suffix_length=0; suffix_length<7; suffix_length++){
305 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
309 mask= -(level_code&1);
310 level_code= (((2+level_code)>>1) ^ mask) - mask;
311 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
312 cavlc_level_tab[suffix_length][i][0]= level_code;
313 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
314 }else if(prefix + 1 <= LEVEL_TAB_BITS){
315 cavlc_level_tab[suffix_length][i][0]= prefix+100;
316 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
319 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
325 av_cold void ff_h264_decode_init_vlc(void){
333 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
334 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
335 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
336 &chroma_dc_coeff_token_len [0], 1, 1,
337 &chroma_dc_coeff_token_bits[0], 1, 1,
338 INIT_VLC_USE_NEW_STATIC);
340 chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
341 chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
342 init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
343 &chroma422_dc_coeff_token_len [0], 1, 1,
344 &chroma422_dc_coeff_token_bits[0], 1, 1,
345 INIT_VLC_USE_NEW_STATIC);
349 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
350 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
351 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
352 &coeff_token_len [i][0], 1, 1,
353 &coeff_token_bits[i][0], 1, 1,
354 INIT_VLC_USE_NEW_STATIC);
355 offset += coeff_token_vlc_tables_size[i];
358 * This is a one time safety check to make sure that
359 * the packed static coeff_token_vlc table sizes
360 * were initialized correctly.
362 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
365 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
366 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
367 init_vlc(&chroma_dc_total_zeros_vlc[i],
368 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
369 &chroma_dc_total_zeros_len [i][0], 1, 1,
370 &chroma_dc_total_zeros_bits[i][0], 1, 1,
371 INIT_VLC_USE_NEW_STATIC);
375 chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
376 chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
377 init_vlc(&chroma422_dc_total_zeros_vlc[i],
378 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
379 &chroma422_dc_total_zeros_len [i][0], 1, 1,
380 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
381 INIT_VLC_USE_NEW_STATIC);
385 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
386 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
387 init_vlc(&total_zeros_vlc[i],
388 TOTAL_ZEROS_VLC_BITS, 16,
389 &total_zeros_len [i][0], 1, 1,
390 &total_zeros_bits[i][0], 1, 1,
391 INIT_VLC_USE_NEW_STATIC);
395 run_vlc[i].table = run_vlc_tables[i];
396 run_vlc[i].table_allocated = run_vlc_tables_size;
397 init_vlc(&run_vlc[i],
399 &run_len [i][0], 1, 1,
400 &run_bits[i][0], 1, 1,
401 INIT_VLC_USE_NEW_STATIC);
403 run7_vlc.table = run7_vlc_table,
404 run7_vlc.table_allocated = run7_vlc_table_size;
405 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
406 &run_len [6][0], 1, 1,
407 &run_bits[6][0], 1, 1,
408 INIT_VLC_USE_NEW_STATIC);
410 init_cavlc_level_tab();
417 static inline int get_level_prefix(GetBitContext *gb){
422 UPDATE_CACHE(re, gb);
423 buf=GET_CACHE(re, gb);
425 log= 32 - av_log2(buf);
427 print_bin(buf>>(32-log), log);
428 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
431 LAST_SKIP_BITS(re, gb, log);
432 CLOSE_READER(re, gb);
438 * decodes a residual block.
439 * @param n block index
440 * @param scantable scantable
441 * @param max_coeff number of coefficients in the block
442 * @return <0 if an error occurred
444 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
445 MpegEncContext * const s = &h->s;
446 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
450 //FIXME put trailing_onex into the context
454 coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
456 coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
457 total_coeff= coeff_token>>2;
459 if(n >= LUMA_DC_BLOCK_INDEX){
460 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
461 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
462 total_coeff= coeff_token>>2;
464 total_coeff= pred_non_zero_count(h, n);
465 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466 total_coeff= coeff_token>>2;
469 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
471 //FIXME set last_non_zero?
475 if(total_coeff > (unsigned)max_coeff) {
476 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
480 trailing_ones= coeff_token&3;
481 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
482 assert(total_coeff<=16);
484 i = show_bits(gb, 3);
485 skip_bits(gb, trailing_ones);
486 level[0] = 1-((i&4)>>1);
487 level[1] = 1-((i&2) );
488 level[2] = 1-((i&1)<<1);
490 if(trailing_ones<total_coeff) {
492 int suffix_length = total_coeff > 10 & trailing_ones < 3;
493 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
494 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
496 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
497 if(level_code >= 100){
498 prefix= level_code - 100;
499 if(prefix == LEVEL_TAB_BITS)
500 prefix += get_level_prefix(gb);
502 //first coefficient has suffix_length equal to 0 or 1
503 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
505 level_code= (prefix<<1) + get_bits1(gb); //part
507 level_code= prefix; //part
508 }else if(prefix==14){
510 level_code= (prefix<<1) + get_bits1(gb); //part
512 level_code= prefix + get_bits(gb, 4); //part
514 level_code= 30 + get_bits(gb, prefix-3); //part
517 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
520 level_code += (1<<(prefix-3))-4096;
524 if(trailing_ones < 3) level_code += 2;
527 mask= -(level_code&1);
528 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532 suffix_length = 1 + (level_code + 3U > 6U);
533 level[trailing_ones]= level_code;
536 //remaining coefficients have suffix_length > 0
537 for(i=trailing_ones+1;i<total_coeff;i++) {
538 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
539 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
540 level_code= cavlc_level_tab[suffix_length][bitsi][0];
542 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
543 if(level_code >= 100){
544 prefix= level_code - 100;
545 if(prefix == LEVEL_TAB_BITS){
546 prefix += get_level_prefix(gb);
549 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
553 level_code += (1<<(prefix-3))-4096;
555 mask= -(level_code&1);
556 level_code= (((2+level_code)>>1) ^ mask) - mask;
558 level[i]= level_code;
559 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
563 if(total_coeff == max_coeff)
566 if (max_coeff <= 8) {
568 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
569 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
571 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
572 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
574 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
578 #define STORE_BLOCK(type) \
579 scantable += zeros_left + total_coeff - 1; \
580 if(n >= LUMA_DC_BLOCK_INDEX){ \
581 ((type*)block)[*scantable] = level[0]; \
582 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
584 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
586 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
587 zeros_left -= run_before; \
588 scantable -= 1 + run_before; \
589 ((type*)block)[*scantable]= level[i]; \
591 for(;i<total_coeff;i++) { \
593 ((type*)block)[*scantable]= level[i]; \
596 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
597 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
599 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
601 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
602 zeros_left -= run_before; \
603 scantable -= 1 + run_before; \
604 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
606 for(;i<total_coeff;i++) { \
608 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
612 if (h->pixel_shift) {
619 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
626 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
628 MpegEncContext * const s = &h->s;
629 int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
630 if(IS_INTRA16x16(mb_type)){
631 AV_ZERO128(h->mb_luma_dc[p]+0);
632 AV_ZERO128(h->mb_luma_dc[p]+8);
633 AV_ZERO128(h->mb_luma_dc[p]+16);
634 AV_ZERO128(h->mb_luma_dc[p]+24);
635 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
636 return -1; //FIXME continue if partitioned and other return -1 too
639 assert((cbp&15) == 0 || (cbp&15) == 15);
642 for(i8x8=0; i8x8<4; i8x8++){
643 for(i4x4=0; i4x4<4; i4x4++){
644 const int index= i4x4 + 4*i8x8 + p*16;
645 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
646 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
653 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
657 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
658 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
660 for(i8x8=0; i8x8<4; i8x8++){
662 if(IS_8x8DCT(mb_type)){
663 DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
665 for(i4x4=0; i4x4<4; i4x4++){
666 const int index= i4x4 + 4*i8x8 + p*16;
667 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
668 h->dequant8_coeff[cqm][qscale], 16) < 0 )
671 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
672 nnz[0] += nnz[1] + nnz[8] + nnz[9];
673 new_cbp |= !!nnz[0] << i8x8;
675 for(i4x4=0; i4x4<4; i4x4++){
676 const int index= i4x4 + 4*i8x8 + p*16;
677 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
678 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
681 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
685 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
686 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
693 int ff_h264_decode_mb_cavlc(H264Context *h){
694 MpegEncContext * const s = &h->s;
697 unsigned int mb_type, cbp;
698 int dct8x8_allowed= h->pps.transform_8x8_mode;
699 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
700 const int pixel_shift = h->pixel_shift;
702 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
704 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
705 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
707 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
708 if(s->mb_skip_run==-1)
709 s->mb_skip_run= get_ue_golomb(&s->gb);
711 if (s->mb_skip_run--) {
712 if(FRAME_MBAFF && (s->mb_y&1) == 0){
713 if(s->mb_skip_run==0)
714 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
721 if( (s->mb_y&1) == 0 )
722 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
725 h->prev_mb_skipped= 0;
727 mb_type= get_ue_golomb(&s->gb);
728 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
730 partition_count= b_mb_type_info[mb_type].partition_count;
731 mb_type= b_mb_type_info[mb_type].type;
734 goto decode_intra_mb;
736 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
738 partition_count= p_mb_type_info[mb_type].partition_count;
739 mb_type= p_mb_type_info[mb_type].type;
742 goto decode_intra_mb;
745 assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
746 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
750 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
754 cbp= i_mb_type_info[mb_type].cbp;
755 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
756 mb_type= i_mb_type_info[mb_type].type;
760 mb_type |= MB_TYPE_INTERLACED;
762 h->slice_table[ mb_xy ]= h->slice_num;
764 if(IS_INTRA_PCM(mb_type)){
766 static const uint16_t mb_sizes[4] = {256,384,512,768};
767 const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
769 // We assume these blocks are very rare so we do not optimize it.
770 align_get_bits(&s->gb);
772 // The pixels are stored in the same order as levels in h->mb array.
773 for(x=0; x < mb_size; x++){
774 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
777 // In deblocking, the quantizer is 0
778 s->current_picture.f.qscale_table[mb_xy] = 0;
779 // All coeffs are present
780 memset(h->non_zero_count[mb_xy], 16, 48);
782 s->current_picture.f.mb_type[mb_xy] = mb_type;
787 h->ref_count[0] <<= 1;
788 h->ref_count[1] <<= 1;
791 fill_decode_neighbors(h, mb_type);
792 fill_decode_caches(h, mb_type);
795 if(IS_INTRA(mb_type)){
797 // init_top_left_availability(h);
798 if(IS_INTRA4x4(mb_type)){
801 if(dct8x8_allowed && get_bits1(&s->gb)){
802 mb_type |= MB_TYPE_8x8DCT;
806 // fill_intra4x4_pred_table(h);
807 for(i=0; i<16; i+=di){
808 int mode= pred_intra_mode(h, i);
810 if(!get_bits1(&s->gb)){
811 const int rem_mode= get_bits(&s->gb, 3);
812 mode = rem_mode + (rem_mode >= mode);
816 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
818 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
820 write_back_intra_pred_mode(h);
821 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
824 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
825 if(h->intra16x16_pred_mode < 0)
829 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
832 h->chroma_pred_mode= pred_mode;
834 h->chroma_pred_mode = DC_128_PRED8x8;
836 }else if(partition_count==4){
837 int i, j, sub_partition_count[4], list, ref[2][4];
839 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
841 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
842 if(h->sub_mb_type[i] >=13){
843 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
846 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
847 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
849 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
850 ff_h264_pred_direct_motion(h, &mb_type);
851 h->ref_cache[0][scan8[4]] =
852 h->ref_cache[1][scan8[4]] =
853 h->ref_cache[0][scan8[12]] =
854 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
857 assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
859 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
860 if(h->sub_mb_type[i] >=4){
861 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
864 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
865 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
869 for(list=0; list<h->list_count; list++){
870 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
872 if(IS_DIRECT(h->sub_mb_type[i])) continue;
873 if(IS_DIR(h->sub_mb_type[i], 0, list)){
877 }else if(ref_count == 2){
878 tmp= get_bits1(&s->gb)^1;
880 tmp= get_ue_golomb_31(&s->gb);
882 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
895 dct8x8_allowed = get_dct8x8_allowed(h);
897 for(list=0; list<h->list_count; list++){
899 if(IS_DIRECT(h->sub_mb_type[i])) {
900 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
903 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
904 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
906 if(IS_DIR(h->sub_mb_type[i], 0, list)){
907 const int sub_mb_type= h->sub_mb_type[i];
908 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
909 for(j=0; j<sub_partition_count[i]; j++){
911 const int index= 4*i + block_width*j;
912 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
913 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
914 mx += get_se_golomb(&s->gb);
915 my += get_se_golomb(&s->gb);
916 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
918 if(IS_SUB_8X8(sub_mb_type)){
920 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
922 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
923 }else if(IS_SUB_8X4(sub_mb_type)){
924 mv_cache[ 1 ][0]= mx;
925 mv_cache[ 1 ][1]= my;
926 }else if(IS_SUB_4X8(sub_mb_type)){
927 mv_cache[ 8 ][0]= mx;
928 mv_cache[ 8 ][1]= my;
930 mv_cache[ 0 ][0]= mx;
931 mv_cache[ 0 ][1]= my;
934 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
940 }else if(IS_DIRECT(mb_type)){
941 ff_h264_pred_direct_motion(h, &mb_type);
942 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
945 //FIXME we should set ref_idx_l? to 0 if we use that later ...
946 if(IS_16X16(mb_type)){
947 for(list=0; list<h->list_count; list++){
949 if(IS_DIR(mb_type, 0, list)){
950 if(h->ref_count[list]==1){
952 }else if(h->ref_count[list]==2){
953 val= get_bits1(&s->gb)^1;
955 val= get_ue_golomb_31(&s->gb);
956 if(val >= h->ref_count[list]){
957 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
961 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
964 for(list=0; list<h->list_count; list++){
965 if(IS_DIR(mb_type, 0, list)){
966 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
967 mx += get_se_golomb(&s->gb);
968 my += get_se_golomb(&s->gb);
969 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
971 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
975 else if(IS_16X8(mb_type)){
976 for(list=0; list<h->list_count; list++){
979 if(IS_DIR(mb_type, i, list)){
980 if(h->ref_count[list] == 1){
982 }else if(h->ref_count[list] == 2){
983 val= get_bits1(&s->gb)^1;
985 val= get_ue_golomb_31(&s->gb);
986 if(val >= h->ref_count[list]){
987 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
992 val= LIST_NOT_USED&0xFF;
993 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
996 for(list=0; list<h->list_count; list++){
999 if(IS_DIR(mb_type, i, list)){
1000 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1001 mx += get_se_golomb(&s->gb);
1002 my += get_se_golomb(&s->gb);
1003 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1005 val= pack16to32(mx,my);
1008 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1012 assert(IS_8X16(mb_type));
1013 for(list=0; list<h->list_count; list++){
1016 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1017 if(h->ref_count[list]==1){
1019 }else if(h->ref_count[list]==2){
1020 val= get_bits1(&s->gb)^1;
1022 val= get_ue_golomb_31(&s->gb);
1023 if(val >= h->ref_count[list]){
1024 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1029 val= LIST_NOT_USED&0xFF;
1030 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1033 for(list=0; list<h->list_count; list++){
1036 if(IS_DIR(mb_type, i, list)){
1037 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1038 mx += get_se_golomb(&s->gb);
1039 my += get_se_golomb(&s->gb);
1040 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1042 val= pack16to32(mx,my);
1045 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1051 if(IS_INTER(mb_type))
1052 write_back_motion(h, mb_type);
1054 if(!IS_INTRA16x16(mb_type)){
1055 cbp= get_ue_golomb(&s->gb);
1059 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1062 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1063 else cbp= golomb_to_inter_cbp [cbp];
1066 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1069 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1070 else cbp= golomb_to_inter_cbp_gray[cbp];
1074 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1075 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1078 h->cbp_table[mb_xy]= cbp;
1079 s->current_picture.f.mb_type[mb_xy] = mb_type;
1081 if(cbp || IS_INTRA16x16(mb_type)){
1082 int i4x4, i8x8, chroma_idx;
1085 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1086 const uint8_t *scan, *scan8x8;
1087 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1089 if(IS_INTERLACED(mb_type)){
1090 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1091 scan= s->qscale ? h->field_scan : h->field_scan_q0;
1093 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1094 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1097 dquant= get_se_golomb(&s->gb);
1099 s->qscale += dquant;
1101 if(((unsigned)s->qscale) > max_qp){
1102 if(s->qscale<0) s->qscale+= max_qp+1;
1103 else s->qscale-= max_qp+1;
1104 if(((unsigned)s->qscale) > max_qp){
1105 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1110 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1111 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1113 if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1116 h->cbp_table[mb_xy] |= ret << 12;
1118 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1121 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1125 const int num_c8x8 = h->sps.chroma_format_idc;
1128 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1129 if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1130 CHROMA_DC_BLOCK_INDEX+chroma_idx,
1131 CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1132 NULL, 4*num_c8x8) < 0) {
1138 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1139 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1140 DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1141 for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1142 for (i4x4=0; i4x4<4; i4x4++) {
1143 const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1144 if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1146 mb += 16<<pixel_shift;
1151 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1152 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1156 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1157 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1158 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1160 s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1161 write_back_non_zero_count(h);
1164 h->ref_count[0] >>= 1;
1165 h->ref_count[1] >>= 1;