2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t coeff_token_len[4][4*17]={
68 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
69 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
70 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
71 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
75 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
76 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
77 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
78 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
82 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
83 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
84 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
85 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
89 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
92 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
96 static const uint8_t coeff_token_bits[4][4*17]={
99 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
100 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
101 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
102 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
106 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
107 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
108 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
109 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
113 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
114 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
115 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
116 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
120 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
121 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
122 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
123 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
127 static const uint8_t total_zeros_len[16][16]= {
128 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131 {5,3,4,4,3,3,3,4,3,4,5,5,5},
132 {4,4,4,3,3,3,3,3,4,5,4,5},
133 {6,5,3,3,3,3,3,3,4,3,6},
134 {6,5,3,3,3,2,3,4,3,6},
145 static const uint8_t total_zeros_bits[16][16]= {
146 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149 {3,7,5,4,6,5,4,3,3,2,2,1,0},
150 {5,4,3,7,6,5,4,3,2,1,1,0},
151 {1,1,7,6,5,4,3,2,1,1,0},
152 {1,1,5,4,3,3,2,1,1,0},
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
175 static const uint8_t run_len[7][16]={
182 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
185 static const uint8_t run_bits[7][16]={
192 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
224 * gets the predicted number of non-zero coefficients.
225 * @param n block index
227 static inline int pred_non_zero_count(H264Context *h, int n){
228 const int index8= scan8[n];
229 const int left= h->non_zero_count_cache[index8 - 1];
230 const int top = h->non_zero_count_cache[index8 - 8];
233 if(i<64) i= (i+1)>>1;
235 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
240 static av_cold void init_cavlc_level_tab(void){
244 for(suffix_length=0; suffix_length<7; suffix_length++){
245 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
248 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
249 int level_code = (prefix << suffix_length) +
250 (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
251 int mask = -(level_code&1);
252 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
253 cavlc_level_tab[suffix_length][i][0]= level_code;
254 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
255 }else if(prefix + 1 <= LEVEL_TAB_BITS){
256 cavlc_level_tab[suffix_length][i][0]= prefix+100;
257 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
259 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
260 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
266 av_cold void ff_h264_decode_init_vlc(void){
274 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
275 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
276 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
277 &chroma_dc_coeff_token_len [0], 1, 1,
278 &chroma_dc_coeff_token_bits[0], 1, 1,
279 INIT_VLC_USE_NEW_STATIC);
283 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
284 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
285 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
286 &coeff_token_len [i][0], 1, 1,
287 &coeff_token_bits[i][0], 1, 1,
288 INIT_VLC_USE_NEW_STATIC);
289 offset += coeff_token_vlc_tables_size[i];
292 * This is a one time safety check to make sure that
293 * the packed static coeff_token_vlc table sizes
294 * were initialized correctly.
296 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
299 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
300 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
301 init_vlc(&chroma_dc_total_zeros_vlc[i],
302 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
303 &chroma_dc_total_zeros_len [i][0], 1, 1,
304 &chroma_dc_total_zeros_bits[i][0], 1, 1,
305 INIT_VLC_USE_NEW_STATIC);
308 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
309 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
310 init_vlc(&total_zeros_vlc[i],
311 TOTAL_ZEROS_VLC_BITS, 16,
312 &total_zeros_len [i][0], 1, 1,
313 &total_zeros_bits[i][0], 1, 1,
314 INIT_VLC_USE_NEW_STATIC);
318 run_vlc[i].table = run_vlc_tables[i];
319 run_vlc[i].table_allocated = run_vlc_tables_size;
320 init_vlc(&run_vlc[i],
322 &run_len [i][0], 1, 1,
323 &run_bits[i][0], 1, 1,
324 INIT_VLC_USE_NEW_STATIC);
326 run7_vlc.table = run7_vlc_table,
327 run7_vlc.table_allocated = run7_vlc_table_size;
328 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
329 &run_len [6][0], 1, 1,
330 &run_bits[6][0], 1, 1,
331 INIT_VLC_USE_NEW_STATIC);
333 init_cavlc_level_tab();
340 static inline int get_level_prefix(GetBitContext *gb){
345 UPDATE_CACHE(re, gb);
346 buf=GET_CACHE(re, gb);
348 log= 32 - av_log2(buf);
350 print_bin(buf>>(32-log), log);
351 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
354 LAST_SKIP_BITS(re, gb, log);
355 CLOSE_READER(re, gb);
361 * decodes a residual block.
362 * @param n block index
363 * @param scantable scantable
364 * @param max_coeff number of coefficients in the block
365 * @return <0 if an error occurred
367 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
368 MpegEncContext * const s = &h->s;
369 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
371 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
373 //FIXME put trailing_onex into the context
376 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
377 total_coeff= coeff_token>>2;
379 if(n >= LUMA_DC_BLOCK_INDEX){
380 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
381 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
382 total_coeff= coeff_token>>2;
384 total_coeff= pred_non_zero_count(h, n);
385 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
386 total_coeff= coeff_token>>2;
389 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
391 //FIXME set last_non_zero?
395 if(total_coeff > (unsigned)max_coeff) {
396 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
400 trailing_ones= coeff_token&3;
401 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
402 assert(total_coeff<=16);
404 i = show_bits(gb, 3);
405 skip_bits(gb, trailing_ones);
406 level[0] = 1-((i&4)>>1);
407 level[1] = 1-((i&2) );
408 level[2] = 1-((i&1)<<1);
410 if(trailing_ones<total_coeff) {
412 int suffix_length = total_coeff > 10 & trailing_ones < 3;
413 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
414 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
416 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
417 if(level_code >= 100){
418 prefix= level_code - 100;
419 if(prefix == LEVEL_TAB_BITS)
420 prefix += get_level_prefix(gb);
422 //first coefficient has suffix_length equal to 0 or 1
423 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
425 level_code= (prefix<<1) + get_bits1(gb); //part
427 level_code= prefix; //part
428 }else if(prefix==14){
430 level_code= (prefix<<1) + get_bits1(gb); //part
432 level_code= prefix + get_bits(gb, 4); //part
434 level_code= 30 + get_bits(gb, prefix-3); //part
437 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
440 level_code += (1<<(prefix-3))-4096;
444 if(trailing_ones < 3) level_code += 2;
447 mask= -(level_code&1);
448 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
450 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
452 suffix_length = 1 + (level_code + 3U > 6U);
453 level[trailing_ones]= level_code;
456 //remaining coefficients have suffix_length > 0
457 for(i=trailing_ones+1;i<total_coeff;i++) {
458 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
459 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
460 level_code= cavlc_level_tab[suffix_length][bitsi][0];
462 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
463 if(level_code >= 100){
464 prefix= level_code - 100;
465 if(prefix == LEVEL_TAB_BITS){
466 prefix += get_level_prefix(gb);
469 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
471 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
473 level_code += (1<<(prefix-3))-4096;
475 mask= -(level_code&1);
476 level_code= (((2+level_code)>>1) ^ mask) - mask;
478 level[i]= level_code;
479 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
483 if(total_coeff == max_coeff)
486 /* FIXME: we don't actually support 4:2:2 yet. */
488 zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
490 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
493 #define STORE_BLOCK(type) \
494 scantable += zeros_left + total_coeff - 1; \
495 if(n >= LUMA_DC_BLOCK_INDEX){ \
496 ((type*)block)[*scantable] = level[0]; \
497 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
499 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
501 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
502 zeros_left -= run_before; \
503 scantable -= 1 + run_before; \
504 ((type*)block)[*scantable]= level[i]; \
506 for(;i<total_coeff;i++) { \
508 ((type*)block)[*scantable]= level[i]; \
511 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
512 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
514 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
516 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
517 zeros_left -= run_before; \
518 scantable -= 1 + run_before; \
519 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
521 for(;i<total_coeff;i++) { \
523 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
527 if (h->pixel_shift) {
534 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
541 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
543 MpegEncContext * const s = &h->s;
544 int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
545 if(IS_INTRA16x16(mb_type)){
546 AV_ZERO128(h->mb_luma_dc[p]+0);
547 AV_ZERO128(h->mb_luma_dc[p]+8);
548 AV_ZERO128(h->mb_luma_dc[p]+16);
549 AV_ZERO128(h->mb_luma_dc[p]+24);
550 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
551 return -1; //FIXME continue if partitioned and other return -1 too
554 assert((cbp&15) == 0 || (cbp&15) == 15);
557 for(i8x8=0; i8x8<4; i8x8++){
558 for(i4x4=0; i4x4<4; i4x4++){
559 const int index= i4x4 + 4*i8x8 + p*16;
560 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
561 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
568 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
572 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
573 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
575 for(i8x8=0; i8x8<4; i8x8++){
577 if(IS_8x8DCT(mb_type)){
578 DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
580 for(i4x4=0; i4x4<4; i4x4++){
581 const int index= i4x4 + 4*i8x8 + p*16;
582 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
583 h->dequant8_coeff[cqm][qscale], 16) < 0 )
586 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
587 nnz[0] += nnz[1] + nnz[8] + nnz[9];
588 new_cbp |= !!nnz[0] << i8x8;
590 for(i4x4=0; i4x4<4; i4x4++){
591 const int index= i4x4 + 4*i8x8 + p*16;
592 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
593 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
596 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
600 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
601 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
608 int ff_h264_decode_mb_cavlc(H264Context *h){
609 MpegEncContext * const s = &h->s;
612 unsigned int mb_type, cbp;
613 int dct8x8_allowed= h->pps.transform_8x8_mode;
614 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
615 const int pixel_shift = h->pixel_shift;
617 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
619 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
620 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
622 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
623 if(s->mb_skip_run==-1)
624 s->mb_skip_run= get_ue_golomb(&s->gb);
626 if (s->mb_skip_run--) {
627 if(FRAME_MBAFF && (s->mb_y&1) == 0){
628 if(s->mb_skip_run==0)
629 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
636 if( (s->mb_y&1) == 0 )
637 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
640 h->prev_mb_skipped= 0;
642 mb_type= get_ue_golomb(&s->gb);
643 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
645 partition_count= b_mb_type_info[mb_type].partition_count;
646 mb_type= b_mb_type_info[mb_type].type;
649 goto decode_intra_mb;
651 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
653 partition_count= p_mb_type_info[mb_type].partition_count;
654 mb_type= p_mb_type_info[mb_type].type;
657 goto decode_intra_mb;
660 assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
661 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
665 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
669 cbp= i_mb_type_info[mb_type].cbp;
670 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
671 mb_type= i_mb_type_info[mb_type].type;
675 mb_type |= MB_TYPE_INTERLACED;
677 h->slice_table[ mb_xy ]= h->slice_num;
679 if(IS_INTRA_PCM(mb_type)){
681 static const uint16_t mb_sizes[4] = {256,384,512,768};
682 const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
684 // We assume these blocks are very rare so we do not optimize it.
685 align_get_bits(&s->gb);
687 // The pixels are stored in the same order as levels in h->mb array.
688 for(x=0; x < mb_size; x++){
689 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
692 // In deblocking, the quantizer is 0
693 s->current_picture.f.qscale_table[mb_xy] = 0;
694 // All coeffs are present
695 memset(h->non_zero_count[mb_xy], 16, 48);
697 s->current_picture.f.mb_type[mb_xy] = mb_type;
702 h->ref_count[0] <<= 1;
703 h->ref_count[1] <<= 1;
706 fill_decode_neighbors(h, mb_type);
707 fill_decode_caches(h, mb_type);
710 if(IS_INTRA(mb_type)){
712 // init_top_left_availability(h);
713 if(IS_INTRA4x4(mb_type)){
716 if(dct8x8_allowed && get_bits1(&s->gb)){
717 mb_type |= MB_TYPE_8x8DCT;
721 // fill_intra4x4_pred_table(h);
722 for(i=0; i<16; i+=di){
723 int mode= pred_intra_mode(h, i);
725 if(!get_bits1(&s->gb)){
726 const int rem_mode= get_bits(&s->gb, 3);
727 mode = rem_mode + (rem_mode >= mode);
731 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
733 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
735 write_back_intra_pred_mode(h);
736 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
739 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
740 if(h->intra16x16_pred_mode < 0)
744 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
747 h->chroma_pred_mode= pred_mode;
749 h->chroma_pred_mode = DC_128_PRED8x8;
751 }else if(partition_count==4){
752 int i, j, sub_partition_count[4], list, ref[2][4];
754 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
756 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
757 if(h->sub_mb_type[i] >=13){
758 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
761 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
762 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
764 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
765 ff_h264_pred_direct_motion(h, &mb_type);
766 h->ref_cache[0][scan8[4]] =
767 h->ref_cache[1][scan8[4]] =
768 h->ref_cache[0][scan8[12]] =
769 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
772 assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
774 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
775 if(h->sub_mb_type[i] >=4){
776 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
779 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
780 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
784 for(list=0; list<h->list_count; list++){
785 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
787 if(IS_DIRECT(h->sub_mb_type[i])) continue;
788 if(IS_DIR(h->sub_mb_type[i], 0, list)){
792 }else if(ref_count == 2){
793 tmp= get_bits1(&s->gb)^1;
795 tmp= get_ue_golomb_31(&s->gb);
797 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
810 dct8x8_allowed = get_dct8x8_allowed(h);
812 for(list=0; list<h->list_count; list++){
814 if(IS_DIRECT(h->sub_mb_type[i])) {
815 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
818 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
819 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
821 if(IS_DIR(h->sub_mb_type[i], 0, list)){
822 const int sub_mb_type= h->sub_mb_type[i];
823 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
824 for(j=0; j<sub_partition_count[i]; j++){
826 const int index= 4*i + block_width*j;
827 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
828 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
829 mx += get_se_golomb(&s->gb);
830 my += get_se_golomb(&s->gb);
831 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
833 if(IS_SUB_8X8(sub_mb_type)){
835 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
837 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
838 }else if(IS_SUB_8X4(sub_mb_type)){
839 mv_cache[ 1 ][0]= mx;
840 mv_cache[ 1 ][1]= my;
841 }else if(IS_SUB_4X8(sub_mb_type)){
842 mv_cache[ 8 ][0]= mx;
843 mv_cache[ 8 ][1]= my;
845 mv_cache[ 0 ][0]= mx;
846 mv_cache[ 0 ][1]= my;
849 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
855 }else if(IS_DIRECT(mb_type)){
856 ff_h264_pred_direct_motion(h, &mb_type);
857 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
860 //FIXME we should set ref_idx_l? to 0 if we use that later ...
861 if(IS_16X16(mb_type)){
862 for(list=0; list<h->list_count; list++){
864 if(IS_DIR(mb_type, 0, list)){
865 if(h->ref_count[list]==1){
867 }else if(h->ref_count[list]==2){
868 val= get_bits1(&s->gb)^1;
870 val= get_ue_golomb_31(&s->gb);
871 if(val >= h->ref_count[list]){
872 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
876 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
879 for(list=0; list<h->list_count; list++){
880 if(IS_DIR(mb_type, 0, list)){
881 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
882 mx += get_se_golomb(&s->gb);
883 my += get_se_golomb(&s->gb);
884 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
886 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
890 else if(IS_16X8(mb_type)){
891 for(list=0; list<h->list_count; list++){
894 if(IS_DIR(mb_type, i, list)){
895 if(h->ref_count[list] == 1){
897 }else if(h->ref_count[list] == 2){
898 val= get_bits1(&s->gb)^1;
900 val= get_ue_golomb_31(&s->gb);
901 if(val >= h->ref_count[list]){
902 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
907 val= LIST_NOT_USED&0xFF;
908 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
911 for(list=0; list<h->list_count; list++){
914 if(IS_DIR(mb_type, i, list)){
915 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
916 mx += get_se_golomb(&s->gb);
917 my += get_se_golomb(&s->gb);
918 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
920 val= pack16to32(mx,my);
923 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
927 assert(IS_8X16(mb_type));
928 for(list=0; list<h->list_count; list++){
931 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
932 if(h->ref_count[list]==1){
934 }else if(h->ref_count[list]==2){
935 val= get_bits1(&s->gb)^1;
937 val= get_ue_golomb_31(&s->gb);
938 if(val >= h->ref_count[list]){
939 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
944 val= LIST_NOT_USED&0xFF;
945 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
948 for(list=0; list<h->list_count; list++){
951 if(IS_DIR(mb_type, i, list)){
952 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
953 mx += get_se_golomb(&s->gb);
954 my += get_se_golomb(&s->gb);
955 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
957 val= pack16to32(mx,my);
960 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
966 if(IS_INTER(mb_type))
967 write_back_motion(h, mb_type);
969 if(!IS_INTRA16x16(mb_type)){
970 cbp= get_ue_golomb(&s->gb);
974 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
977 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
978 else cbp= golomb_to_inter_cbp [cbp];
981 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
984 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
985 else cbp= golomb_to_inter_cbp_gray[cbp];
989 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
990 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
993 h->cbp_table[mb_xy]= cbp;
994 s->current_picture.f.mb_type[mb_xy] = mb_type;
996 if(cbp || IS_INTRA16x16(mb_type)){
997 int i4x4, chroma_idx;
1000 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1001 const uint8_t *scan, *scan8x8;
1002 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1004 if(IS_INTERLACED(mb_type)){
1005 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1006 scan= s->qscale ? h->field_scan : h->field_scan_q0;
1008 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1009 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1012 dquant= get_se_golomb(&s->gb);
1014 s->qscale += dquant;
1016 if(((unsigned)s->qscale) > max_qp){
1017 if(s->qscale<0) s->qscale+= max_qp+1;
1018 else s->qscale-= max_qp+1;
1019 if(((unsigned)s->qscale) > max_qp){
1020 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1025 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1026 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1028 if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1031 h->cbp_table[mb_xy] |= ret << 12;
1033 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1036 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1041 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1042 if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1048 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1049 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1050 for(i4x4=0; i4x4<4; i4x4++){
1051 const int index= 16 + 16*chroma_idx + i4x4;
1052 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1058 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1059 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1063 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1064 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1065 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1067 s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1068 write_back_non_zero_count(h);
1071 h->ref_count[0] >>= 1;
1072 h->ref_count[1] >>= 1;