2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
65 static const uint8_t coeff_token_len[4][4*17]={
68 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
69 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
70 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
71 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
75 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
76 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
77 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
78 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
82 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
83 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
84 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
85 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
89 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
92 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
96 static const uint8_t coeff_token_bits[4][4*17]={
99 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
100 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
101 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
102 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
106 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
107 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
108 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
109 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
113 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
114 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
115 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
116 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
120 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
121 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
122 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
123 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
127 static const uint8_t total_zeros_len[16][16]= {
128 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131 {5,3,4,4,3,3,3,4,3,4,5,5,5},
132 {4,4,4,3,3,3,3,3,4,5,4,5},
133 {6,5,3,3,3,3,3,3,4,3,6},
134 {6,5,3,3,3,2,3,4,3,6},
145 static const uint8_t total_zeros_bits[16][16]= {
146 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149 {3,7,5,4,6,5,4,3,3,2,2,1,0},
150 {5,4,3,7,6,5,4,3,2,1,1,0},
151 {1,1,7,6,5,4,3,2,1,1,0},
152 {1,1,5,4,3,3,2,1,1,0},
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
175 static const uint8_t run_len[7][16]={
182 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
185 static const uint8_t run_bits[7][16]={
192 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
224 * gets the predicted number of non-zero coefficients.
225 * @param n block index
227 static inline int pred_non_zero_count(H264Context *h, int n){
228 const int index8= scan8[n];
229 const int left= h->non_zero_count_cache[index8 - 1];
230 const int top = h->non_zero_count_cache[index8 - 8];
233 if(i<64) i= (i+1)>>1;
235 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
240 static av_cold void init_cavlc_level_tab(void){
241 int suffix_length, mask;
244 for(suffix_length=0; suffix_length<7; suffix_length++){
245 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
249 mask= -(level_code&1);
250 level_code= (((2+level_code)>>1) ^ mask) - mask;
251 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252 cavlc_level_tab[suffix_length][i][0]= level_code;
253 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254 }else if(prefix + 1 <= LEVEL_TAB_BITS){
255 cavlc_level_tab[suffix_length][i][0]= prefix+100;
256 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
258 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
265 av_cold void ff_h264_decode_init_vlc(void){
273 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276 &chroma_dc_coeff_token_len [0], 1, 1,
277 &chroma_dc_coeff_token_bits[0], 1, 1,
278 INIT_VLC_USE_NEW_STATIC);
282 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285 &coeff_token_len [i][0], 1, 1,
286 &coeff_token_bits[i][0], 1, 1,
287 INIT_VLC_USE_NEW_STATIC);
288 offset += coeff_token_vlc_tables_size[i];
291 * This is a one time safety check to make sure that
292 * the packed static coeff_token_vlc table sizes
293 * were initialized correctly.
295 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
298 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300 init_vlc(&chroma_dc_total_zeros_vlc[i],
301 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302 &chroma_dc_total_zeros_len [i][0], 1, 1,
303 &chroma_dc_total_zeros_bits[i][0], 1, 1,
304 INIT_VLC_USE_NEW_STATIC);
307 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309 init_vlc(&total_zeros_vlc[i],
310 TOTAL_ZEROS_VLC_BITS, 16,
311 &total_zeros_len [i][0], 1, 1,
312 &total_zeros_bits[i][0], 1, 1,
313 INIT_VLC_USE_NEW_STATIC);
317 run_vlc[i].table = run_vlc_tables[i];
318 run_vlc[i].table_allocated = run_vlc_tables_size;
319 init_vlc(&run_vlc[i],
321 &run_len [i][0], 1, 1,
322 &run_bits[i][0], 1, 1,
323 INIT_VLC_USE_NEW_STATIC);
325 run7_vlc.table = run7_vlc_table,
326 run7_vlc.table_allocated = run7_vlc_table_size;
327 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328 &run_len [6][0], 1, 1,
329 &run_bits[6][0], 1, 1,
330 INIT_VLC_USE_NEW_STATIC);
332 init_cavlc_level_tab();
339 static inline int get_level_prefix(GetBitContext *gb){
344 UPDATE_CACHE(re, gb);
345 buf=GET_CACHE(re, gb);
347 log= 32 - av_log2(buf);
349 print_bin(buf>>(32-log), log);
350 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
353 LAST_SKIP_BITS(re, gb, log);
354 CLOSE_READER(re, gb);
360 * decodes a residual block.
361 * @param n block index
362 * @param scantable scantable
363 * @param max_coeff number of coefficients in the block
364 * @return <0 if an error occurred
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367 MpegEncContext * const s = &h->s;
368 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
370 int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
372 //FIXME put trailing_onex into the context
375 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376 total_coeff= coeff_token>>2;
378 if(n >= LUMA_DC_BLOCK_INDEX){
379 total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
380 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381 total_coeff= coeff_token>>2;
383 total_coeff= pred_non_zero_count(h, n);
384 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385 total_coeff= coeff_token>>2;
388 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
390 //FIXME set last_non_zero?
394 if(total_coeff > (unsigned)max_coeff) {
395 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
399 trailing_ones= coeff_token&3;
400 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401 assert(total_coeff<=16);
403 i = show_bits(gb, 3);
404 skip_bits(gb, trailing_ones);
405 level[0] = 1-((i&4)>>1);
406 level[1] = 1-((i&2) );
407 level[2] = 1-((i&1)<<1);
409 if(trailing_ones<total_coeff) {
411 int suffix_length = total_coeff > 10 & trailing_ones < 3;
412 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
415 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416 if(level_code >= 100){
417 prefix= level_code - 100;
418 if(prefix == LEVEL_TAB_BITS)
419 prefix += get_level_prefix(gb);
421 //first coefficient has suffix_length equal to 0 or 1
422 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
424 level_code= (prefix<<1) + get_bits1(gb); //part
426 level_code= prefix; //part
427 }else if(prefix==14){
429 level_code= (prefix<<1) + get_bits1(gb); //part
431 level_code= prefix + get_bits(gb, 4); //part
433 level_code= 30 + get_bits(gb, prefix-3); //part
436 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
439 level_code += (1<<(prefix-3))-4096;
443 if(trailing_ones < 3) level_code += 2;
446 mask= -(level_code&1);
447 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
449 level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
451 suffix_length = 1 + (level_code + 3U > 6U);
452 level[trailing_ones]= level_code;
455 //remaining coefficients have suffix_length > 0
456 for(i=trailing_ones+1;i<total_coeff;i++) {
457 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
458 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
459 level_code= cavlc_level_tab[suffix_length][bitsi][0];
461 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
462 if(level_code >= 100){
463 prefix= level_code - 100;
464 if(prefix == LEVEL_TAB_BITS){
465 prefix += get_level_prefix(gb);
468 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
470 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
472 level_code += (1<<(prefix-3))-4096;
474 mask= -(level_code&1);
475 level_code= (((2+level_code)>>1) ^ mask) - mask;
477 level[i]= level_code;
478 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
482 if(total_coeff == max_coeff)
485 /* FIXME: we don't actually support 4:2:2 yet. */
487 zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
489 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
492 #define STORE_BLOCK(type) \
493 scantable += zeros_left + total_coeff - 1; \
494 if(n >= LUMA_DC_BLOCK_INDEX){ \
495 ((type*)block)[*scantable] = level[0]; \
496 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
498 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
500 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
501 zeros_left -= run_before; \
502 scantable -= 1 + run_before; \
503 ((type*)block)[*scantable]= level[i]; \
505 for(;i<total_coeff;i++) { \
507 ((type*)block)[*scantable]= level[i]; \
510 ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
511 for(i=1;i<total_coeff && zeros_left > 0;i++) { \
513 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
515 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
516 zeros_left -= run_before; \
517 scantable -= 1 + run_before; \
518 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
520 for(;i<total_coeff;i++) { \
522 ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
526 if (h->pixel_shift) {
533 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
540 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
542 MpegEncContext * const s = &h->s;
543 int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
544 if(IS_INTRA16x16(mb_type)){
545 AV_ZERO128(h->mb_luma_dc[p]+0);
546 AV_ZERO128(h->mb_luma_dc[p]+8);
547 AV_ZERO128(h->mb_luma_dc[p]+16);
548 AV_ZERO128(h->mb_luma_dc[p]+24);
549 if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
550 return -1; //FIXME continue if partitioned and other return -1 too
553 assert((cbp&15) == 0 || (cbp&15) == 15);
556 for(i8x8=0; i8x8<4; i8x8++){
557 for(i4x4=0; i4x4<4; i4x4++){
558 const int index= i4x4 + 4*i8x8 + p*16;
559 if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
560 index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
567 fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
571 int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
572 /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
574 for(i8x8=0; i8x8<4; i8x8++){
576 if(IS_8x8DCT(mb_type)){
577 DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
579 for(i4x4=0; i4x4<4; i4x4++){
580 const int index= i4x4 + 4*i8x8 + p*16;
581 if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
582 h->dequant8_coeff[cqm][qscale], 16) < 0 )
585 nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
586 nnz[0] += nnz[1] + nnz[8] + nnz[9];
587 new_cbp |= !!nnz[0] << i8x8;
589 for(i4x4=0; i4x4<4; i4x4++){
590 const int index= i4x4 + 4*i8x8 + p*16;
591 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
592 scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
595 new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
599 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
600 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
607 int ff_h264_decode_mb_cavlc(H264Context *h){
608 MpegEncContext * const s = &h->s;
611 unsigned int mb_type, cbp;
612 int dct8x8_allowed= h->pps.transform_8x8_mode;
613 int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
614 const int pixel_shift = h->pixel_shift;
616 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
618 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
619 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
621 if(h->slice_type_nos != AV_PICTURE_TYPE_I){
622 if(s->mb_skip_run==-1)
623 s->mb_skip_run= get_ue_golomb(&s->gb);
625 if (s->mb_skip_run--) {
626 if(FRAME_MBAFF && (s->mb_y&1) == 0){
627 if(s->mb_skip_run==0)
628 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
635 if( (s->mb_y&1) == 0 )
636 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
639 h->prev_mb_skipped= 0;
641 mb_type= get_ue_golomb(&s->gb);
642 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
644 partition_count= b_mb_type_info[mb_type].partition_count;
645 mb_type= b_mb_type_info[mb_type].type;
648 goto decode_intra_mb;
650 }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
652 partition_count= p_mb_type_info[mb_type].partition_count;
653 mb_type= p_mb_type_info[mb_type].type;
656 goto decode_intra_mb;
659 assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
660 if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
664 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
668 cbp= i_mb_type_info[mb_type].cbp;
669 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
670 mb_type= i_mb_type_info[mb_type].type;
674 mb_type |= MB_TYPE_INTERLACED;
676 h->slice_table[ mb_xy ]= h->slice_num;
678 if(IS_INTRA_PCM(mb_type)){
680 static const uint16_t mb_sizes[4] = {256,384,512,768};
681 const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
683 // We assume these blocks are very rare so we do not optimize it.
684 align_get_bits(&s->gb);
686 // The pixels are stored in the same order as levels in h->mb array.
687 for(x=0; x < mb_size; x++){
688 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
691 // In deblocking, the quantizer is 0
692 s->current_picture.f.qscale_table[mb_xy] = 0;
693 // All coeffs are present
694 memset(h->non_zero_count[mb_xy], 16, 48);
696 s->current_picture.f.mb_type[mb_xy] = mb_type;
701 h->ref_count[0] <<= 1;
702 h->ref_count[1] <<= 1;
705 fill_decode_neighbors(h, mb_type);
706 fill_decode_caches(h, mb_type);
709 if(IS_INTRA(mb_type)){
711 // init_top_left_availability(h);
712 if(IS_INTRA4x4(mb_type)){
715 if(dct8x8_allowed && get_bits1(&s->gb)){
716 mb_type |= MB_TYPE_8x8DCT;
720 // fill_intra4x4_pred_table(h);
721 for(i=0; i<16; i+=di){
722 int mode= pred_intra_mode(h, i);
724 if(!get_bits1(&s->gb)){
725 const int rem_mode= get_bits(&s->gb, 3);
726 mode = rem_mode + (rem_mode >= mode);
730 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
732 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
734 write_back_intra_pred_mode(h);
735 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
738 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
739 if(h->intra16x16_pred_mode < 0)
743 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
746 h->chroma_pred_mode= pred_mode;
748 h->chroma_pred_mode = DC_128_PRED8x8;
750 }else if(partition_count==4){
751 int i, j, sub_partition_count[4], list, ref[2][4];
753 if(h->slice_type_nos == AV_PICTURE_TYPE_B){
755 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
756 if(h->sub_mb_type[i] >=13){
757 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
760 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
761 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
763 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
764 ff_h264_pred_direct_motion(h, &mb_type);
765 h->ref_cache[0][scan8[4]] =
766 h->ref_cache[1][scan8[4]] =
767 h->ref_cache[0][scan8[12]] =
768 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
771 assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
773 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
774 if(h->sub_mb_type[i] >=4){
775 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
778 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
779 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
783 for(list=0; list<h->list_count; list++){
784 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
786 if(IS_DIRECT(h->sub_mb_type[i])) continue;
787 if(IS_DIR(h->sub_mb_type[i], 0, list)){
791 }else if(ref_count == 2){
792 tmp= get_bits1(&s->gb)^1;
794 tmp= get_ue_golomb_31(&s->gb);
796 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
809 dct8x8_allowed = get_dct8x8_allowed(h);
811 for(list=0; list<h->list_count; list++){
813 if(IS_DIRECT(h->sub_mb_type[i])) {
814 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
817 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
818 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
820 if(IS_DIR(h->sub_mb_type[i], 0, list)){
821 const int sub_mb_type= h->sub_mb_type[i];
822 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
823 for(j=0; j<sub_partition_count[i]; j++){
825 const int index= 4*i + block_width*j;
826 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
827 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
828 mx += get_se_golomb(&s->gb);
829 my += get_se_golomb(&s->gb);
830 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
832 if(IS_SUB_8X8(sub_mb_type)){
834 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
836 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
837 }else if(IS_SUB_8X4(sub_mb_type)){
838 mv_cache[ 1 ][0]= mx;
839 mv_cache[ 1 ][1]= my;
840 }else if(IS_SUB_4X8(sub_mb_type)){
841 mv_cache[ 8 ][0]= mx;
842 mv_cache[ 8 ][1]= my;
844 mv_cache[ 0 ][0]= mx;
845 mv_cache[ 0 ][1]= my;
848 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
854 }else if(IS_DIRECT(mb_type)){
855 ff_h264_pred_direct_motion(h, &mb_type);
856 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
859 //FIXME we should set ref_idx_l? to 0 if we use that later ...
860 if(IS_16X16(mb_type)){
861 for(list=0; list<h->list_count; list++){
863 if(IS_DIR(mb_type, 0, list)){
864 if(h->ref_count[list]==1){
866 }else if(h->ref_count[list]==2){
867 val= get_bits1(&s->gb)^1;
869 val= get_ue_golomb_31(&s->gb);
870 if(val >= h->ref_count[list]){
871 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
875 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
878 for(list=0; list<h->list_count; list++){
879 if(IS_DIR(mb_type, 0, list)){
880 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
881 mx += get_se_golomb(&s->gb);
882 my += get_se_golomb(&s->gb);
883 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
885 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
889 else if(IS_16X8(mb_type)){
890 for(list=0; list<h->list_count; list++){
893 if(IS_DIR(mb_type, i, list)){
894 if(h->ref_count[list] == 1){
896 }else if(h->ref_count[list] == 2){
897 val= get_bits1(&s->gb)^1;
899 val= get_ue_golomb_31(&s->gb);
900 if(val >= h->ref_count[list]){
901 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
906 val= LIST_NOT_USED&0xFF;
907 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
910 for(list=0; list<h->list_count; list++){
913 if(IS_DIR(mb_type, i, list)){
914 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
915 mx += get_se_golomb(&s->gb);
916 my += get_se_golomb(&s->gb);
917 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
919 val= pack16to32(mx,my);
922 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
926 assert(IS_8X16(mb_type));
927 for(list=0; list<h->list_count; list++){
930 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
931 if(h->ref_count[list]==1){
933 }else if(h->ref_count[list]==2){
934 val= get_bits1(&s->gb)^1;
936 val= get_ue_golomb_31(&s->gb);
937 if(val >= h->ref_count[list]){
938 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
943 val= LIST_NOT_USED&0xFF;
944 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
947 for(list=0; list<h->list_count; list++){
950 if(IS_DIR(mb_type, i, list)){
951 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
952 mx += get_se_golomb(&s->gb);
953 my += get_se_golomb(&s->gb);
954 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
956 val= pack16to32(mx,my);
959 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
965 if(IS_INTER(mb_type))
966 write_back_motion(h, mb_type);
968 if(!IS_INTRA16x16(mb_type)){
969 cbp= get_ue_golomb(&s->gb);
973 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
976 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
977 else cbp= golomb_to_inter_cbp [cbp];
980 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
983 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
984 else cbp= golomb_to_inter_cbp_gray[cbp];
988 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
989 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
992 h->cbp_table[mb_xy]= cbp;
993 s->current_picture.f.mb_type[mb_xy] = mb_type;
995 if(cbp || IS_INTRA16x16(mb_type)){
996 int i4x4, chroma_idx;
999 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1000 const uint8_t *scan, *scan8x8;
1001 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1003 if(IS_INTERLACED(mb_type)){
1004 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1005 scan= s->qscale ? h->field_scan : h->field_scan_q0;
1007 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1008 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1011 dquant= get_se_golomb(&s->gb);
1013 s->qscale += dquant;
1015 if(((unsigned)s->qscale) > max_qp){
1016 if(s->qscale<0) s->qscale+= max_qp+1;
1017 else s->qscale-= max_qp+1;
1018 if(((unsigned)s->qscale) > max_qp){
1019 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1024 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1025 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1027 if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1030 h->cbp_table[mb_xy] |= ret << 12;
1032 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1035 if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1040 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1041 if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1047 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1048 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1049 for(i4x4=0; i4x4<4; i4x4++){
1050 const int index= 16 + 16*chroma_idx + i4x4;
1051 if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1057 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1058 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1062 fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1063 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1064 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1066 s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1067 write_back_non_zero_count(h);
1070 h->ref_count[0] >>= 1;
1071 h->ref_count[1] >>= 1;