2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264_cavlc.c
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
32 #include "h264data.h" // FIXME FIXME FIXME
33 #include "h264_mvpred.h"
37 #include "x86/h264_i386.h"
43 static const uint8_t golomb_to_intra4x4_cbp[48]={
44 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46,
45 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4,
46 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41
49 static const uint8_t golomb_to_inter_cbp[48]={
50 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13,
51 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
52 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
55 static const uint8_t golomb_to_inter_cbp_gray[16]={
56 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
59 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
60 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
63 static const uint8_t chroma_dc_coeff_token_len[4*5]={
71 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
79 static const uint8_t coeff_token_len[4][4*17]={
82 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
83 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
84 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
85 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
89 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
90 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
91 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
92 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
96 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
97 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
98 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
99 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
103 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
104 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
105 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
106 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
110 static const uint8_t coeff_token_bits[4][4*17]={
113 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
114 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
115 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
116 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
120 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
121 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
122 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
123 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
127 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
128 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
129 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
130 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
134 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
135 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
136 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
137 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
141 static const uint8_t total_zeros_len[16][16]= {
142 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
143 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
144 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
145 {5,3,4,4,3,3,3,4,3,4,5,5,5},
146 {4,4,4,3,3,3,3,3,4,5,4,5},
147 {6,5,3,3,3,3,3,3,4,3,6},
148 {6,5,3,3,3,2,3,4,3,6},
159 static const uint8_t total_zeros_bits[16][16]= {
160 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
161 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
162 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
163 {3,7,5,4,6,5,4,3,3,2,2,1,0},
164 {5,4,3,7,6,5,4,3,2,1,1,0},
165 {1,1,7,6,5,4,3,2,1,1,0},
166 {1,1,5,4,3,3,2,1,1,0},
177 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
183 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
189 static const uint8_t run_len[7][16]={
196 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
199 static const uint8_t run_bits[7][16]={
206 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
209 static VLC coeff_token_vlc[4];
210 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
211 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
213 static VLC chroma_dc_coeff_token_vlc;
214 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
215 static const int chroma_dc_coeff_token_vlc_table_size = 256;
217 static VLC total_zeros_vlc[15];
218 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
219 static const int total_zeros_vlc_tables_size = 512;
221 static VLC chroma_dc_total_zeros_vlc[3];
222 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
223 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
225 static VLC run_vlc[6];
226 static VLC_TYPE run_vlc_tables[6][8][2];
227 static const int run_vlc_tables_size = 8;
230 static VLC_TYPE run7_vlc_table[96][2];
231 static const int run7_vlc_table_size = 96;
233 #define LEVEL_TAB_BITS 8
234 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
238 * gets the predicted number of non-zero coefficients.
239 * @param n block index
241 static inline int pred_non_zero_count(H264Context *h, int n){
242 const int index8= scan8[n];
243 const int left= h->non_zero_count_cache[index8 - 1];
244 const int top = h->non_zero_count_cache[index8 - 8];
247 if(i<64) i= (i+1)>>1;
249 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
254 static av_cold void init_cavlc_level_tab(void){
255 int suffix_length, mask;
258 for(suffix_length=0; suffix_length<7; suffix_length++){
259 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
260 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
261 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
263 mask= -(level_code&1);
264 level_code= (((2+level_code)>>1) ^ mask) - mask;
265 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
266 cavlc_level_tab[suffix_length][i][0]= level_code;
267 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
268 }else if(prefix + 1 <= LEVEL_TAB_BITS){
269 cavlc_level_tab[suffix_length][i][0]= prefix+100;
270 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
272 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
273 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
279 av_cold void ff_h264_decode_init_vlc(void){
287 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
288 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
289 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
290 &chroma_dc_coeff_token_len [0], 1, 1,
291 &chroma_dc_coeff_token_bits[0], 1, 1,
292 INIT_VLC_USE_NEW_STATIC);
296 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
297 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
298 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
299 &coeff_token_len [i][0], 1, 1,
300 &coeff_token_bits[i][0], 1, 1,
301 INIT_VLC_USE_NEW_STATIC);
302 offset += coeff_token_vlc_tables_size[i];
305 * This is a one time safety check to make sure that
306 * the packed static coeff_token_vlc table sizes
307 * were initialized correctly.
309 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
312 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
313 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
314 init_vlc(&chroma_dc_total_zeros_vlc[i],
315 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
316 &chroma_dc_total_zeros_len [i][0], 1, 1,
317 &chroma_dc_total_zeros_bits[i][0], 1, 1,
318 INIT_VLC_USE_NEW_STATIC);
321 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
322 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
323 init_vlc(&total_zeros_vlc[i],
324 TOTAL_ZEROS_VLC_BITS, 16,
325 &total_zeros_len [i][0], 1, 1,
326 &total_zeros_bits[i][0], 1, 1,
327 INIT_VLC_USE_NEW_STATIC);
331 run_vlc[i].table = run_vlc_tables[i];
332 run_vlc[i].table_allocated = run_vlc_tables_size;
333 init_vlc(&run_vlc[i],
335 &run_len [i][0], 1, 1,
336 &run_bits[i][0], 1, 1,
337 INIT_VLC_USE_NEW_STATIC);
339 run7_vlc.table = run7_vlc_table,
340 run7_vlc.table_allocated = run7_vlc_table_size;
341 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
342 &run_len [6][0], 1, 1,
343 &run_bits[6][0], 1, 1,
344 INIT_VLC_USE_NEW_STATIC);
346 init_cavlc_level_tab();
353 static inline int get_level_prefix(GetBitContext *gb){
358 UPDATE_CACHE(re, gb);
359 buf=GET_CACHE(re, gb);
361 log= 32 - av_log2(buf);
363 print_bin(buf>>(32-log), log);
364 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
367 LAST_SKIP_BITS(re, gb, log);
368 CLOSE_READER(re, gb);
374 * decodes a residual block.
375 * @param n block index
376 * @param scantable scantable
377 * @param max_coeff number of coefficients in the block
378 * @return <0 if an error occurred
380 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
381 MpegEncContext * const s = &h->s;
382 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
384 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
386 //FIXME put trailing_onex into the context
388 if(n == CHROMA_DC_BLOCK_INDEX){
389 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
390 total_coeff= coeff_token>>2;
392 if(n == LUMA_DC_BLOCK_INDEX){
393 total_coeff= pred_non_zero_count(h, 0);
394 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
395 total_coeff= coeff_token>>2;
397 total_coeff= pred_non_zero_count(h, n);
398 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
399 total_coeff= coeff_token>>2;
400 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
404 //FIXME set last_non_zero?
408 if(total_coeff > (unsigned)max_coeff) {
409 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
413 trailing_ones= coeff_token&3;
414 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
415 assert(total_coeff<=16);
417 i = show_bits(gb, 3);
418 skip_bits(gb, trailing_ones);
419 level[0] = 1-((i&4)>>1);
420 level[1] = 1-((i&2) );
421 level[2] = 1-((i&1)<<1);
423 if(trailing_ones<total_coeff) {
425 int suffix_length = total_coeff > 10 && trailing_ones < 3;
426 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
427 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
429 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
430 if(level_code >= 100){
431 prefix= level_code - 100;
432 if(prefix == LEVEL_TAB_BITS)
433 prefix += get_level_prefix(gb);
435 //first coefficient has suffix_length equal to 0 or 1
436 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
438 level_code= (prefix<<1) + get_bits1(gb); //part
440 level_code= prefix; //part
441 }else if(prefix==14){
443 level_code= (prefix<<1) + get_bits1(gb); //part
445 level_code= prefix + get_bits(gb, 4); //part
447 level_code= 30 + get_bits(gb, prefix-3); //part
449 level_code += (1<<(prefix-3))-4096;
452 if(trailing_ones < 3) level_code += 2;
455 mask= -(level_code&1);
456 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
458 if(trailing_ones < 3) level_code += (level_code>>31)|1;
461 if(level_code + 3U > 6U)
463 level[trailing_ones]= level_code;
466 //remaining coefficients have suffix_length > 0
467 for(i=trailing_ones+1;i<total_coeff;i++) {
468 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
469 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
470 level_code= cavlc_level_tab[suffix_length][bitsi][0];
472 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
473 if(level_code >= 100){
474 prefix= level_code - 100;
475 if(prefix == LEVEL_TAB_BITS){
476 prefix += get_level_prefix(gb);
479 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
481 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
483 level_code += (1<<(prefix-3))-4096;
485 mask= -(level_code&1);
486 level_code= (((2+level_code)>>1) ^ mask) - mask;
488 level[i]= level_code;
490 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
495 if(total_coeff == max_coeff)
498 if(n == CHROMA_DC_BLOCK_INDEX)
499 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
501 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
504 coeff_num = zeros_left + total_coeff - 1;
505 j = scantable[coeff_num];
508 for(i=1;i<total_coeff;i++) {
511 else if(zeros_left < 7){
512 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
514 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
516 zeros_left -= run_before;
517 coeff_num -= 1 + run_before;
518 j= scantable[ coeff_num ];
523 block[j] = (level[0] * qmul[j] + 32)>>6;
524 for(i=1;i<total_coeff;i++) {
527 else if(zeros_left < 7){
528 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
530 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
532 zeros_left -= run_before;
533 coeff_num -= 1 + run_before;
534 j= scantable[ coeff_num ];
536 block[j]= (level[i] * qmul[j] + 32)>>6;
541 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
548 int ff_h264_decode_mb_cavlc(H264Context *h){
549 MpegEncContext * const s = &h->s;
552 unsigned int mb_type, cbp;
553 int dct8x8_allowed= h->pps.transform_8x8_mode;
555 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
557 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
558 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
560 if(h->slice_type_nos != FF_I_TYPE){
561 if(s->mb_skip_run==-1)
562 s->mb_skip_run= get_ue_golomb(&s->gb);
564 if (s->mb_skip_run--) {
565 if(FRAME_MBAFF && (s->mb_y&1) == 0){
566 if(s->mb_skip_run==0)
567 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
569 predict_field_decoding_flag(h);
576 if( (s->mb_y&1) == 0 )
577 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
580 h->prev_mb_skipped= 0;
582 mb_type= get_ue_golomb(&s->gb);
583 if(h->slice_type_nos == FF_B_TYPE){
585 partition_count= b_mb_type_info[mb_type].partition_count;
586 mb_type= b_mb_type_info[mb_type].type;
589 goto decode_intra_mb;
591 }else if(h->slice_type_nos == FF_P_TYPE){
593 partition_count= p_mb_type_info[mb_type].partition_count;
594 mb_type= p_mb_type_info[mb_type].type;
597 goto decode_intra_mb;
600 assert(h->slice_type_nos == FF_I_TYPE);
601 if(h->slice_type == FF_SI_TYPE && mb_type)
605 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
609 cbp= i_mb_type_info[mb_type].cbp;
610 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
611 mb_type= i_mb_type_info[mb_type].type;
615 mb_type |= MB_TYPE_INTERLACED;
617 h->slice_table[ mb_xy ]= h->slice_num;
619 if(IS_INTRA_PCM(mb_type)){
622 // We assume these blocks are very rare so we do not optimize it.
623 align_get_bits(&s->gb);
625 // The pixels are stored in the same order as levels in h->mb array.
626 for(x=0; x < (CHROMA ? 384 : 256); x++){
627 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
630 // In deblocking, the quantizer is 0
631 s->current_picture.qscale_table[mb_xy]= 0;
632 // All coeffs are present
633 memset(h->non_zero_count[mb_xy], 16, 16);
635 s->current_picture.mb_type[mb_xy]= mb_type;
640 h->ref_count[0] <<= 1;
641 h->ref_count[1] <<= 1;
644 fill_caches(h, mb_type, 0);
647 if(IS_INTRA(mb_type)){
649 // init_top_left_availability(h);
650 if(IS_INTRA4x4(mb_type)){
653 if(dct8x8_allowed && get_bits1(&s->gb)){
654 mb_type |= MB_TYPE_8x8DCT;
658 // fill_intra4x4_pred_table(h);
659 for(i=0; i<16; i+=di){
660 int mode= pred_intra_mode(h, i);
662 if(!get_bits1(&s->gb)){
663 const int rem_mode= get_bits(&s->gb, 3);
664 mode = rem_mode + (rem_mode >= mode);
668 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
670 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
672 ff_h264_write_back_intra_pred_mode(h);
673 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
676 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
677 if(h->intra16x16_pred_mode < 0)
681 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
684 h->chroma_pred_mode= pred_mode;
686 }else if(partition_count==4){
687 int i, j, sub_partition_count[4], list, ref[2][4];
689 if(h->slice_type_nos == FF_B_TYPE){
691 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
692 if(h->sub_mb_type[i] >=13){
693 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
696 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
697 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
699 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
700 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
701 ff_h264_pred_direct_motion(h, &mb_type);
702 h->ref_cache[0][scan8[4]] =
703 h->ref_cache[1][scan8[4]] =
704 h->ref_cache[0][scan8[12]] =
705 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
708 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
710 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
711 if(h->sub_mb_type[i] >=4){
712 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
715 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
716 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
720 for(list=0; list<h->list_count; list++){
721 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
723 if(IS_DIRECT(h->sub_mb_type[i])) continue;
724 if(IS_DIR(h->sub_mb_type[i], 0, list)){
728 }else if(ref_count == 2){
729 tmp= get_bits1(&s->gb)^1;
731 tmp= get_ue_golomb_31(&s->gb);
733 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
746 dct8x8_allowed = get_dct8x8_allowed(h);
748 for(list=0; list<h->list_count; list++){
750 if(IS_DIRECT(h->sub_mb_type[i])) {
751 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
754 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
755 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
757 if(IS_DIR(h->sub_mb_type[i], 0, list)){
758 const int sub_mb_type= h->sub_mb_type[i];
759 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
760 for(j=0; j<sub_partition_count[i]; j++){
762 const int index= 4*i + block_width*j;
763 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
764 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
765 mx += get_se_golomb(&s->gb);
766 my += get_se_golomb(&s->gb);
767 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
769 if(IS_SUB_8X8(sub_mb_type)){
771 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
773 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
774 }else if(IS_SUB_8X4(sub_mb_type)){
775 mv_cache[ 1 ][0]= mx;
776 mv_cache[ 1 ][1]= my;
777 }else if(IS_SUB_4X8(sub_mb_type)){
778 mv_cache[ 8 ][0]= mx;
779 mv_cache[ 8 ][1]= my;
781 mv_cache[ 0 ][0]= mx;
782 mv_cache[ 0 ][1]= my;
785 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
791 }else if(IS_DIRECT(mb_type)){
792 ff_h264_pred_direct_motion(h, &mb_type);
793 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
796 //FIXME we should set ref_idx_l? to 0 if we use that later ...
797 if(IS_16X16(mb_type)){
798 for(list=0; list<h->list_count; list++){
800 if(IS_DIR(mb_type, 0, list)){
801 if(h->ref_count[list]==1){
803 }else if(h->ref_count[list]==2){
804 val= get_bits1(&s->gb)^1;
806 val= get_ue_golomb_31(&s->gb);
807 if(val >= h->ref_count[list]){
808 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
813 val= LIST_NOT_USED&0xFF;
814 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
816 for(list=0; list<h->list_count; list++){
818 if(IS_DIR(mb_type, 0, list)){
819 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
820 mx += get_se_golomb(&s->gb);
821 my += get_se_golomb(&s->gb);
822 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
824 val= pack16to32(mx,my);
827 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
830 else if(IS_16X8(mb_type)){
831 for(list=0; list<h->list_count; list++){
834 if(IS_DIR(mb_type, i, list)){
835 if(h->ref_count[list] == 1){
837 }else if(h->ref_count[list] == 2){
838 val= get_bits1(&s->gb)^1;
840 val= get_ue_golomb_31(&s->gb);
841 if(val >= h->ref_count[list]){
842 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
847 val= LIST_NOT_USED&0xFF;
848 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
851 for(list=0; list<h->list_count; list++){
854 if(IS_DIR(mb_type, i, list)){
855 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
856 mx += get_se_golomb(&s->gb);
857 my += get_se_golomb(&s->gb);
858 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
860 val= pack16to32(mx,my);
863 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
867 assert(IS_8X16(mb_type));
868 for(list=0; list<h->list_count; list++){
871 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
872 if(h->ref_count[list]==1){
874 }else if(h->ref_count[list]==2){
875 val= get_bits1(&s->gb)^1;
877 val= get_ue_golomb_31(&s->gb);
878 if(val >= h->ref_count[list]){
879 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
884 val= LIST_NOT_USED&0xFF;
885 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
888 for(list=0; list<h->list_count; list++){
891 if(IS_DIR(mb_type, i, list)){
892 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
893 mx += get_se_golomb(&s->gb);
894 my += get_se_golomb(&s->gb);
895 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
897 val= pack16to32(mx,my);
900 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
906 if(IS_INTER(mb_type))
907 write_back_motion(h, mb_type);
909 if(!IS_INTRA16x16(mb_type)){
910 cbp= get_ue_golomb(&s->gb);
912 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
917 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
918 else cbp= golomb_to_inter_cbp [cbp];
920 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
921 else cbp= golomb_to_inter_cbp_gray[cbp];
926 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
927 if(get_bits1(&s->gb)){
928 mb_type |= MB_TYPE_8x8DCT;
929 h->cbp_table[mb_xy]= cbp;
932 s->current_picture.mb_type[mb_xy]= mb_type;
934 if(cbp || IS_INTRA16x16(mb_type)){
935 int i8x8, i4x4, chroma_idx;
937 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
938 const uint8_t *scan, *scan8x8, *dc_scan;
940 // fill_non_zero_count_cache(h);
942 if(IS_INTERLACED(mb_type)){
943 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
944 scan= s->qscale ? h->field_scan : h->field_scan_q0;
945 dc_scan= luma_dc_field_scan;
947 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
948 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
949 dc_scan= luma_dc_zigzag_scan;
952 dquant= get_se_golomb(&s->gb);
954 if( dquant > 25 || dquant < -26 ){
955 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
960 if(((unsigned)s->qscale) > 51){
961 if(s->qscale<0) s->qscale+= 52;
965 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
966 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
967 if(IS_INTRA16x16(mb_type)){
968 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
969 return -1; //FIXME continue if partitioned and other return -1 too
972 assert((cbp&15) == 0 || (cbp&15) == 15);
975 for(i8x8=0; i8x8<4; i8x8++){
976 for(i4x4=0; i4x4<4; i4x4++){
977 const int index= i4x4 + 4*i8x8;
978 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
984 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
987 for(i8x8=0; i8x8<4; i8x8++){
989 if(IS_8x8DCT(mb_type)){
990 DCTELEM *buf = &h->mb[64*i8x8];
992 for(i4x4=0; i4x4<4; i4x4++){
993 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
994 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
997 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
998 nnz[0] += nnz[1] + nnz[8] + nnz[9];
1000 for(i4x4=0; i4x4<4; i4x4++){
1001 const int index= i4x4 + 4*i8x8;
1003 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
1009 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
1010 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
1016 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1017 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
1023 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1024 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1025 for(i4x4=0; i4x4<4; i4x4++){
1026 const int index= 16 + 4*chroma_idx + i4x4;
1027 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1033 uint8_t * const nnz= &h->non_zero_count_cache[0];
1034 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1035 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1038 uint8_t * const nnz= &h->non_zero_count_cache[0];
1039 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1040 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1041 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1043 s->current_picture.qscale_table[mb_xy]= s->qscale;
1044 write_back_non_zero_count(h);
1047 h->ref_count[0] >>= 1;
1048 h->ref_count[1] >>= 1;