git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c

   1 /*
   2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
   3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding.
  25  * @author Michael Niedermayer <michaelni@gmx.at>
  26  */
  27
  28 #define CABAC(h) 0
  29
  30 #include "internal.h"
  31 #include "avcodec.h"
  32 #include "h264dec.h"
  33 #include "h264_mvpred.h"
  34 #include "h264data.h"
  35 #include "golomb_legacy.h"
  36 #include "mpegutils.h"
  37
  38 #include <assert.h>
  39
  40 static const uint8_t golomb_to_inter_cbp_gray[16]={
  41  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
  42 };
  43
  44 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
  45 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
  46 };
  47
  48 static const uint8_t chroma_dc_coeff_token_len[4*5]={
  49  2, 0, 0, 0,
  50  6, 1, 0, 0,
  51  6, 6, 3, 0,
  52  6, 7, 7, 6,
  53  6, 8, 8, 7,
  54 };
  55
  56 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
  57  1, 0, 0, 0,
  58  7, 1, 0, 0,
  59  4, 6, 1, 0,
  60  3, 3, 2, 5,
  61  2, 3, 2, 0,
  62 };
  63
  64 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
  65   1,  0,  0,  0,
  66   7,  2,  0,  0,
  67   7,  7,  3,  0,
  68   9,  7,  7,  5,
  69   9,  9,  7,  6,
  70  10, 10,  9,  7,
  71  11, 11, 10,  7,
  72  12, 12, 11, 10,
  73  13, 12, 12, 11,
  74 };
  75
  76 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
  77   1,   0,  0, 0,
  78  15,   1,  0, 0,
  79  14,  13,  1, 0,
  80   7,  12, 11, 1,
  81   6,   5, 10, 1,
  82   7,   6,  4, 9,
  83   7,   6,  5, 8,
  84   7,   6,  5, 4,
  85   7,   5,  4, 4,
  86 };
  87
  88 static const uint8_t coeff_token_len[4][4*17]={
  89 {
  90      1, 0, 0, 0,
  91      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
  92     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
  93     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
  94     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
  95 },
  96 {
  97      2, 0, 0, 0,
  98      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
  99      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
 100     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
 101     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
 102 },
 103 {
 104      4, 0, 0, 0,
 105      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
 106      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
 107      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
 108     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
 109 },
 110 {
 111      6, 0, 0, 0,
 112      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
 113      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
 114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
 115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
 116 }
 117 };
 118
 119 static const uint8_t coeff_token_bits[4][4*17]={
 120 {
 121      1, 0, 0, 0,
 122      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
 123      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
 124     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
 125     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
 126 },
 127 {
 128      3, 0, 0, 0,
 129     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
 130      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
 131     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
 132     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
 133 },
 134 {
 135     15, 0, 0, 0,
 136     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
 137     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
 138     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
 139     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
 140 },
 141 {
 142      3, 0, 0, 0,
 143      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
 144     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
 145     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
 146     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
 147 }
 148 };
 149
 150 static const uint8_t total_zeros_len[16][16]= {
 151     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
 152     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
 153     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
 154     {5,3,4,4,3,3,3,4,3,4,5,5,5},
 155     {4,4,4,3,3,3,3,3,4,5,4,5},
 156     {6,5,3,3,3,3,3,3,4,3,6},
 157     {6,5,3,3,3,2,3,4,3,6},
 158     {6,4,5,3,2,2,3,3,6},
 159     {6,6,4,2,2,3,2,5},
 160     {5,5,3,2,2,2,4},
 161     {4,4,3,3,1,3},
 162     {4,4,2,1,3},
 163     {3,3,1,2},
 164     {2,2,1},
 165     {1,1},
 166 };
 167
 168 static const uint8_t total_zeros_bits[16][16]= {
 169     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
 170     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
 171     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
 172     {3,7,5,4,6,5,4,3,3,2,2,1,0},
 173     {5,4,3,7,6,5,4,3,2,1,1,0},
 174     {1,1,7,6,5,4,3,2,1,1,0},
 175     {1,1,5,4,3,3,2,1,1,0},
 176     {1,1,1,3,3,2,2,1,0},
 177     {1,0,1,3,2,1,1,1},
 178     {1,0,1,3,2,1,1},
 179     {0,1,1,2,1,3},
 180     {0,1,1,1,1},
 181     {0,1,1,1},
 182     {0,1,1},
 183     {0,1},
 184 };
 185
 186 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
 187     { 1, 2, 3, 3,},
 188     { 1, 2, 2, 0,},
 189     { 1, 1, 0, 0,},
 190 };
 191
 192 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
 193     { 1, 1, 1, 0,},
 194     { 1, 1, 0, 0,},
 195     { 1, 0, 0, 0,},
 196 };
 197
 198 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
 199     { 1, 3, 3, 4, 4, 4, 5, 5 },
 200     { 3, 2, 3, 3, 3, 3, 3 },
 201     { 3, 3, 2, 2, 3, 3 },
 202     { 3, 2, 2, 2, 3 },
 203     { 2, 2, 2, 2 },
 204     { 2, 2, 1 },
 205     { 1, 1 },
 206 };
 207
 208 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
 209     { 1, 2, 3, 2, 3, 1, 1, 0 },
 210     { 0, 1, 1, 4, 5, 6, 7 },
 211     { 0, 1, 1, 2, 6, 7 },
 212     { 6, 0, 1, 2, 7 },
 213     { 0, 1, 2, 3 },
 214     { 0, 1, 1 },
 215     { 0, 1 },
 216 };
 217
 218 static const uint8_t run_len[7][16]={
 219     {1,1},
 220     {1,2,2},
 221     {2,2,2,2},
 222     {2,2,2,3,3},
 223     {2,2,3,3,3,3},
 224     {2,3,3,3,3,3,3},
 225     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
 226 };
 227
 228 static const uint8_t run_bits[7][16]={
 229     {1,0},
 230     {1,1,0},
 231     {3,2,1,0},
 232     {3,2,1,1,0},
 233     {3,2,3,2,1,0},
 234     {3,0,1,3,2,5,4},
 235     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
 236 };
 237
 238 static VLC coeff_token_vlc[4];
 239 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
 240 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
 241
 242 static VLC chroma_dc_coeff_token_vlc;
 243 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
 244 static const int chroma_dc_coeff_token_vlc_table_size = 256;
 245
 246 static VLC chroma422_dc_coeff_token_vlc;
 247 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
 248 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
 249
 250 static VLC total_zeros_vlc[15];
 251 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
 252 static const int total_zeros_vlc_tables_size = 512;
 253
 254 static VLC chroma_dc_total_zeros_vlc[3];
 255 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
 256 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
 257
 258 static VLC chroma422_dc_total_zeros_vlc[7];
 259 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
 260 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
 261
 262 static VLC run_vlc[6];
 263 static VLC_TYPE run_vlc_tables[6][8][2];
 264 static const int run_vlc_tables_size = 8;
 265
 266 static VLC run7_vlc;
 267 static VLC_TYPE run7_vlc_table[96][2];
 268 static const int run7_vlc_table_size = 96;
 269
 270 #define LEVEL_TAB_BITS 8
 271 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
 272
 273 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
 274 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
 275 #define COEFF_TOKEN_VLC_BITS           8
 276 #define TOTAL_ZEROS_VLC_BITS           9
 277 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
 278 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
 279 #define RUN_VLC_BITS                   3
 280 #define RUN7_VLC_BITS                  6
 281
 282 /**
 283  * Get the predicted number of non-zero coefficients.
 284  * @param n block index
 285  */
 286 static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
 287 {
 288     const int index8= scan8[n];
 289     const int left = sl->non_zero_count_cache[index8 - 1];
 290     const int top  = sl->non_zero_count_cache[index8 - 8];
 291     int i= left + top;
 292
 293     if(i<64) i= (i+1)>>1;
 294
 295     ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
 296
 297     return i&31;
 298 }
 299
 300 static av_cold void init_cavlc_level_tab(void){
 301     int suffix_length;
 302     unsigned int i;
 303
 304     for(suffix_length=0; suffix_length<7; suffix_length++){
 305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
 306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
 307
 308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
 309                 int level_code = (prefix << suffix_length) +
 310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
 311                 int mask = -(level_code&1);
 312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
 313                 cavlc_level_tab[suffix_length][i][0]= level_code;
 314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
 315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
 316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
 317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
 318             }else{
 319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
 320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
 321             }
 322         }
 323     }
 324 }
 325
 326 av_cold void ff_h264_decode_init_vlc(void){
 327     static int done = 0;
 328
 329     if (!done) {
 330         int i;
 331         int offset;
 332         done = 1;
 333
 334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
 335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
 336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
 337                  &chroma_dc_coeff_token_len [0], 1, 1,
 338                  &chroma_dc_coeff_token_bits[0], 1, 1,
 339                  INIT_VLC_USE_NEW_STATIC);
 340
 341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
 342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
 343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
 344                  &chroma422_dc_coeff_token_len [0], 1, 1,
 345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
 346                  INIT_VLC_USE_NEW_STATIC);
 347
 348         offset = 0;
 349         for(i=0; i<4; i++){
 350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
 351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
 352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
 353                      &coeff_token_len [i][0], 1, 1,
 354                      &coeff_token_bits[i][0], 1, 1,
 355                      INIT_VLC_USE_NEW_STATIC);
 356             offset += coeff_token_vlc_tables_size[i];
 357         }
 358         /*
 359          * This is a one time safety check to make sure that
 360          * the packed static coeff_token_vlc table sizes
 361          * were initialized correctly.
 362          */
 363         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
 364
 365         for(i=0; i<3; i++){
 366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
 367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
 368             init_vlc(&chroma_dc_total_zeros_vlc[i],
 369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
 370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
 371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
 372                      INIT_VLC_USE_NEW_STATIC);
 373         }
 374
 375         for(i=0; i<7; i++){
 376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
 377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
 378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
 379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
 380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
 381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
 382                      INIT_VLC_USE_NEW_STATIC);
 383         }
 384
 385         for(i=0; i<15; i++){
 386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
 387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
 388             init_vlc(&total_zeros_vlc[i],
 389                      TOTAL_ZEROS_VLC_BITS, 16,
 390                      &total_zeros_len [i][0], 1, 1,
 391                      &total_zeros_bits[i][0], 1, 1,
 392                      INIT_VLC_USE_NEW_STATIC);
 393         }
 394
 395         for(i=0; i<6; i++){
 396             run_vlc[i].table = run_vlc_tables[i];
 397             run_vlc[i].table_allocated = run_vlc_tables_size;
 398             init_vlc(&run_vlc[i],
 399                      RUN_VLC_BITS, 7,
 400                      &run_len [i][0], 1, 1,
 401                      &run_bits[i][0], 1, 1,
 402                      INIT_VLC_USE_NEW_STATIC);
 403         }
 404         run7_vlc.table = run7_vlc_table,
 405         run7_vlc.table_allocated = run7_vlc_table_size;
 406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
 407                  &run_len [6][0], 1, 1,
 408                  &run_bits[6][0], 1, 1,
 409                  INIT_VLC_USE_NEW_STATIC);
 410
 411         init_cavlc_level_tab();
 412     }
 413 }
 414
 415 static inline int get_level_prefix(GetBitContext *gb){
 416     unsigned int buf;
 417     int log;
 418
 419     OPEN_READER(re, gb);
 420     UPDATE_CACHE(re, gb);
 421     buf=GET_CACHE(re, gb);
 422
 423     log= 32 - av_log2(buf);
 424
 425     LAST_SKIP_BITS(re, gb, log);
 426     CLOSE_READER(re, gb);
 427
 428     return log-1;
 429 }
 430
 431 /**
 432  * Decode a residual block.
 433  * @param n block index
 434  * @param scantable scantable
 435  * @param max_coeff number of coefficients in the block
 436  * @return <0 if an error occurred
 437  */
 438 static int decode_residual(const H264Context *h, H264SliceContext *sl,
 439                            GetBitContext *gb, int16_t *block, int n,
 440                            const uint8_t *scantable, const uint32_t *qmul,
 441                            int max_coeff)
 442 {
 443     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
 444     int level[16];
 445     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
 446
 447     //FIXME put trailing_onex into the context
 448
 449     if(max_coeff <= 8){
 450         if (max_coeff == 4)
 451             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
 452         else
 453             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
 454         total_coeff= coeff_token>>2;
 455     }else{
 456         if(n >= LUMA_DC_BLOCK_INDEX){
 457             total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
 458             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
 459             total_coeff= coeff_token>>2;
 460         }else{
 461             total_coeff= pred_non_zero_count(h, sl, n);
 462             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
 463             total_coeff= coeff_token>>2;
 464         }
 465     }
 466     sl->non_zero_count_cache[scan8[n]] = total_coeff;
 467
 468     //FIXME set last_non_zero?
 469
 470     if(total_coeff==0)
 471         return 0;
 472     if(total_coeff > (unsigned)max_coeff) {
 473         av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
 474         return -1;
 475     }
 476
 477     trailing_ones= coeff_token&3;
 478     ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
 479     assert(total_coeff<=16);
 480
 481     i = show_bits(gb, 3);
 482     skip_bits(gb, trailing_ones);
 483     level[0] = 1-((i&4)>>1);
 484     level[1] = 1-((i&2)   );
 485     level[2] = 1-((i&1)<<1);
 486
 487     if(trailing_ones<total_coeff) {
 488         int mask, prefix;
 489         int suffix_length = total_coeff > 10 & trailing_ones < 3;
 490         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
 491         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
 492
 493         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
 494         if(level_code >= 100){
 495             prefix= level_code - 100;
 496             if(prefix == LEVEL_TAB_BITS)
 497                 prefix += get_level_prefix(gb);
 498
 499             //first coefficient has suffix_length equal to 0 or 1
 500             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
 501                 if(suffix_length)
 502                     level_code= (prefix<<1) + get_bits1(gb); //part
 503                 else
 504                     level_code= prefix; //part
 505             }else if(prefix==14){
 506                 if(suffix_length)
 507                     level_code= (prefix<<1) + get_bits1(gb); //part
 508                 else
 509                     level_code= prefix + get_bits(gb, 4); //part
 510             }else{
 511                 level_code= 30 + get_bits(gb, prefix-3); //part
 512                 if(prefix>=16){
 513                     if(prefix > 25+3){
 514                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
 515                         return -1;
 516                     }
 517                     level_code += (1<<(prefix-3))-4096;
 518                 }
 519             }
 520
 521             if(trailing_ones < 3) level_code += 2;
 522
 523             suffix_length = 2;
 524             mask= -(level_code&1);
 525             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
 526         }else{
 527             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
 528
 529             suffix_length = 1 + (level_code + 3U > 6U);
 530             level[trailing_ones]= level_code;
 531         }
 532
 533         //remaining coefficients have suffix_length > 0
 534         for(i=trailing_ones+1;i<total_coeff;i++) {
 535             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
 536             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
 537             level_code= cavlc_level_tab[suffix_length][bitsi][0];
 538
 539             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
 540             if(level_code >= 100){
 541                 prefix= level_code - 100;
 542                 if(prefix == LEVEL_TAB_BITS){
 543                     prefix += get_level_prefix(gb);
 544                 }
 545                 if(prefix<15){
 546                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
 547                 }else{
 548                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
 549                     if(prefix>=16)
 550                         level_code += (1<<(prefix-3))-4096;
 551                 }
 552                 mask= -(level_code&1);
 553                 level_code= (((2+level_code)>>1) ^ mask) - mask;
 554             }
 555             level[i]= level_code;
 556             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
 557         }
 558     }
 559
 560     if(total_coeff == max_coeff)
 561         zeros_left=0;
 562     else{
 563         if (max_coeff <= 8) {
 564             if (max_coeff == 4)
 565                 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
 566                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
 567             else
 568                 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
 569                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
 570         } else {
 571             zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
 572         }
 573     }
 574
 575 #define STORE_BLOCK(type) \
 576     scantable += zeros_left + total_coeff - 1; \
 577     if(n >= LUMA_DC_BLOCK_INDEX){ \
 578         ((type*)block)[*scantable] = level[0]; \
 579         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
 580             if(zeros_left < 7) \
 581                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
 582             else {\
 583                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
 584                 run_before = FFMIN(zeros_left, run_before);\
 585             }\
 586             zeros_left -= run_before; \
 587             scantable -= 1 + run_before; \
 588             ((type*)block)[*scantable]= level[i]; \
 589         } \
 590         for(;i<total_coeff;i++) { \
 591             scantable--; \
 592             ((type*)block)[*scantable]= level[i]; \
 593         } \
 594     }else{ \
 595         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
 596         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
 597             if(zeros_left < 7) \
 598                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
 599             else {\
 600                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
 601                 run_before = FFMIN(zeros_left, run_before);\
 602             }\
 603             zeros_left -= run_before; \
 604             scantable -= 1 + run_before; \
 605             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
 606         } \
 607         for(;i<total_coeff;i++) { \
 608             scantable--; \
 609             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
 610         } \
 611     }
 612
 613     if (zeros_left < 0) {
 614         av_log(h->avctx, AV_LOG_ERROR,
 615                "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
 616         return AVERROR_INVALIDDATA;
 617     }
 618
 619     if (h->pixel_shift) {
 620         STORE_BLOCK(int32_t)
 621     } else {
 622         STORE_BLOCK(int16_t)
 623     }
 624
 625     return 0;
 626 }
 627
 628 static av_always_inline
 629 int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
 630                          GetBitContext *gb, const uint8_t *scan,
 631                          const uint8_t *scan8x8, int pixel_shift,
 632                          int mb_type, int cbp, int p)
 633 {
 634     int i4x4, i8x8;
 635     int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
 636     if(IS_INTRA16x16(mb_type)){
 637         AV_ZERO128(sl->mb_luma_dc[p]+0);
 638         AV_ZERO128(sl->mb_luma_dc[p]+8);
 639         AV_ZERO128(sl->mb_luma_dc[p]+16);
 640         AV_ZERO128(sl->mb_luma_dc[p]+24);
 641         if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
 642             return -1; //FIXME continue if partitioned and other return -1 too
 643         }
 644
 645         assert((cbp&15) == 0 || (cbp&15) == 15);
 646
 647         if(cbp&15){
 648             for(i8x8=0; i8x8<4; i8x8++){
 649                 for(i4x4=0; i4x4<4; i4x4++){
 650                     const int index= i4x4 + 4*i8x8 + p*16;
 651                     if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
 652                         index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){
 653                         return -1;
 654                     }
 655                 }
 656             }
 657             return 0xf;
 658         }else{
 659             fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
 660             return 0;
 661         }
 662     }else{
 663         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
 664         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
 665         int new_cbp = 0;
 666         for(i8x8=0; i8x8<4; i8x8++){
 667             if(cbp & (1<<i8x8)){
 668                 if(IS_8x8DCT(mb_type)){
 669                     int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
 670                     uint8_t *nnz;
 671                     for(i4x4=0; i4x4<4; i4x4++){
 672                         const int index= i4x4 + 4*i8x8 + p*16;
 673                         if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
 674                                             h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 )
 675                             return -1;
 676                     }
 677                     nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
 678                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
 679                     new_cbp |= !!nnz[0] << i8x8;
 680                 }else{
 681                     for(i4x4=0; i4x4<4; i4x4++){
 682                         const int index= i4x4 + 4*i8x8 + p*16;
 683                         if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
 684                                             scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){
 685                             return -1;
 686                         }
 687                         new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
 688                     }
 689                 }
 690             }else{
 691                 uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
 692                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
 693             }
 694         }
 695         return new_cbp;
 696     }
 697 }
 698
 699 int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
 700 {
 701     int mb_xy;
 702     int partition_count;
 703     unsigned int mb_type, cbp;
 704     int dct8x8_allowed= h->ps.pps->transform_8x8_mode;
 705     int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2;
 706     const int pixel_shift = h->pixel_shift;
 707
 708     mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
 709
 710     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
 711                 down the code */
 712     if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
 713         if (sl->mb_skip_run == -1)
 714             sl->mb_skip_run = get_ue_golomb(&sl->gb);
 715
 716         if (sl->mb_skip_run--) {
 717             if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
 718                 if (sl->mb_skip_run == 0)
 719                     sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
 720             }
 721             decode_mb_skip(h, sl);
 722             return 0;
 723         }
 724     }
 725     if (FRAME_MBAFF(h)) {
 726         if ((sl->mb_y & 1) == 0)
 727             sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
 728     }
 729
 730     sl->prev_mb_skipped = 0;
 731
 732     mb_type= get_ue_golomb(&sl->gb);
 733     if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
 734         if(mb_type < 23){
 735             partition_count = ff_h264_b_mb_type_info[mb_type].partition_count;
 736             mb_type         = ff_h264_b_mb_type_info[mb_type].type;
 737         }else{
 738             mb_type -= 23;
 739             goto decode_intra_mb;
 740         }
 741     } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
 742         if(mb_type < 5){
 743             partition_count = ff_h264_p_mb_type_info[mb_type].partition_count;
 744             mb_type         = ff_h264_p_mb_type_info[mb_type].type;
 745         }else{
 746             mb_type -= 5;
 747             goto decode_intra_mb;
 748         }
 749     }else{
 750        assert(sl->slice_type_nos == AV_PICTURE_TYPE_I);
 751         if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
 752             mb_type--;
 753 decode_intra_mb:
 754         if(mb_type > 25){
 755             av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
 756             return -1;
 757         }
 758         partition_count=0;
 759         cbp                      = ff_h264_i_mb_type_info[mb_type].cbp;
 760         sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode;
 761         mb_type                  = ff_h264_i_mb_type_info[mb_type].type;
 762     }
 763
 764     if (MB_FIELD(sl))
 765         mb_type |= MB_TYPE_INTERLACED;
 766
 767     h->slice_table[mb_xy] = sl->slice_num;
 768
 769     if(IS_INTRA_PCM(mb_type)){
 770         const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] *
 771                             h->ps.sps->bit_depth_luma;
 772
 773         // We assume these blocks are very rare so we do not optimize it.
 774         sl->intra_pcm_ptr = align_get_bits(&sl->gb);
 775         if (get_bits_left(&sl->gb) < mb_size) {
 776             av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
 777             return AVERROR_INVALIDDATA;
 778         }
 779         skip_bits_long(&sl->gb, mb_size);
 780
 781         // In deblocking, the quantizer is 0
 782         h->cur_pic.qscale_table[mb_xy] = 0;
 783         // All coeffs are present
 784         memset(h->non_zero_count[mb_xy], 16, 48);
 785
 786         h->cur_pic.mb_type[mb_xy] = mb_type;
 787         return 0;
 788     }
 789
 790     fill_decode_neighbors(h, sl, mb_type);
 791     fill_decode_caches(h, sl, mb_type);
 792
 793     //mb_pred
 794     if(IS_INTRA(mb_type)){
 795         int pred_mode;
 796 //            init_top_left_availability(h);
 797         if(IS_INTRA4x4(mb_type)){
 798             int i;
 799             int di = 1;
 800             if(dct8x8_allowed && get_bits1(&sl->gb)){
 801                 mb_type |= MB_TYPE_8x8DCT;
 802                 di = 4;
 803             }
 804
 805 //                fill_intra4x4_pred_table(h);
 806             for(i=0; i<16; i+=di){
 807                 int mode = pred_intra_mode(h, sl, i);
 808
 809                 if(!get_bits1(&sl->gb)){
 810                     const int rem_mode= get_bits(&sl->gb, 3);
 811                     mode = rem_mode + (rem_mode >= mode);
 812                 }
 813
 814                 if(di==4)
 815                     fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
 816                 else
 817                     sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
 818             }
 819             write_back_intra_pred_mode(h, sl);
 820             if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx,
 821                                                  sl->top_samples_available, sl->left_samples_available) < 0)
 822                 return -1;
 823         }else{
 824             sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
 825                                                                      sl->left_samples_available, sl->intra16x16_pred_mode, 0);
 826             if (sl->intra16x16_pred_mode < 0)
 827                 return -1;
 828         }
 829         if(decode_chroma){
 830             pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
 831                                                      sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1);
 832             if(pred_mode < 0)
 833                 return -1;
 834             sl->chroma_pred_mode = pred_mode;
 835         } else {
 836             sl->chroma_pred_mode = DC_128_PRED8x8;
 837         }
 838     }else if(partition_count==4){
 839         int i, j, sub_partition_count[4], list, ref[2][4];
 840
 841         if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
 842             for(i=0; i<4; i++){
 843                 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
 844                 if(sl->sub_mb_type[i] >=13){
 845                     av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
 846                     return -1;
 847                 }
 848                 sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
 849                 sl->sub_mb_type[i]     = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type;
 850             }
 851             if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
 852                 ff_h264_pred_direct_motion(h, sl, &mb_type);
 853                 sl->ref_cache[0][scan8[4]] =
 854                 sl->ref_cache[1][scan8[4]] =
 855                 sl->ref_cache[0][scan8[12]] =
 856                 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
 857             }
 858         }else{
 859             assert(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
 860             for(i=0; i<4; i++){
 861                 sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
 862                 if(sl->sub_mb_type[i] >=4){
 863                     av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
 864                     return -1;
 865                 }
 866                 sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
 867                 sl->sub_mb_type[i]     = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type;
 868             }
 869         }
 870
 871         for (list = 0; list < sl->list_count; list++) {
 872             int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
 873             for(i=0; i<4; i++){
 874                 if(IS_DIRECT(sl->sub_mb_type[i])) continue;
 875                 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
 876                     unsigned int tmp;
 877                     if(ref_count == 1){
 878                         tmp= 0;
 879                     }else if(ref_count == 2){
 880                         tmp= get_bits1(&sl->gb)^1;
 881                     }else{
 882                         tmp= get_ue_golomb_31(&sl->gb);
 883                         if(tmp>=ref_count){
 884                             av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
 885                             return -1;
 886                         }
 887                     }
 888                     ref[list][i]= tmp;
 889                 }else{
 890                  //FIXME
 891                     ref[list][i] = -1;
 892                 }
 893             }
 894         }
 895
 896         if(dct8x8_allowed)
 897             dct8x8_allowed = get_dct8x8_allowed(h, sl);
 898
 899         for (list = 0; list < sl->list_count; list++) {
 900             for(i=0; i<4; i++){
 901                 if(IS_DIRECT(sl->sub_mb_type[i])) {
 902                     sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
 903                     continue;
 904                 }
 905                 sl->ref_cache[list][ scan8[4*i]   ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
 906                 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
 907
 908                 if(IS_DIR(sl->sub_mb_type[i], 0, list)){
 909                     const int sub_mb_type= sl->sub_mb_type[i];
 910                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
 911                     for(j=0; j<sub_partition_count[i]; j++){
 912                         int mx, my;
 913                         const int index= 4*i + block_width*j;
 914                         int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
 915                         pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
 916                         mx += get_se_golomb(&sl->gb);
 917                         my += get_se_golomb(&sl->gb);
 918                         ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
 919
 920                         if(IS_SUB_8X8(sub_mb_type)){
 921                             mv_cache[ 1 ][0]=
 922                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
 923                             mv_cache[ 1 ][1]=
 924                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
 925                         }else if(IS_SUB_8X4(sub_mb_type)){
 926                             mv_cache[ 1 ][0]= mx;
 927                             mv_cache[ 1 ][1]= my;
 928                         }else if(IS_SUB_4X8(sub_mb_type)){
 929                             mv_cache[ 8 ][0]= mx;
 930                             mv_cache[ 8 ][1]= my;
 931                         }
 932                         mv_cache[ 0 ][0]= mx;
 933                         mv_cache[ 0 ][1]= my;
 934                     }
 935                 }else{
 936                     uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
 937                     p[0] = p[1]=
 938                     p[8] = p[9]= 0;
 939                 }
 940             }
 941         }
 942     }else if(IS_DIRECT(mb_type)){
 943         ff_h264_pred_direct_motion(h, sl, &mb_type);
 944         dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag;
 945     }else{
 946         int list, mx, my, i;
 947          //FIXME we should set ref_idx_l? to 0 if we use that later ...
 948         if(IS_16X16(mb_type)){
 949             for (list = 0; list < sl->list_count; list++) {
 950                     unsigned int val;
 951                     if(IS_DIR(mb_type, 0, list)){
 952                         int rc = sl->ref_count[list] << MB_MBAFF(sl);
 953                         if (rc == 1) {
 954                             val= 0;
 955                         } else if (rc == 2) {
 956                             val= get_bits1(&sl->gb)^1;
 957                         }else{
 958                             val= get_ue_golomb_31(&sl->gb);
 959                             if (val >= rc) {
 960                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
 961                                 return -1;
 962                             }
 963                         }
 964                     fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
 965                     }
 966             }
 967             for (list = 0; list < sl->list_count; list++) {
 968                 if(IS_DIR(mb_type, 0, list)){
 969                     pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
 970                     mx += get_se_golomb(&sl->gb);
 971                     my += get_se_golomb(&sl->gb);
 972                     ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
 973
 974                     fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
 975                 }
 976             }
 977         }
 978         else if(IS_16X8(mb_type)){
 979             for (list = 0; list < sl->list_count; list++) {
 980                     for(i=0; i<2; i++){
 981                         unsigned int val;
 982                         if(IS_DIR(mb_type, i, list)){
 983                             int rc = sl->ref_count[list] << MB_MBAFF(sl);
 984                             if (rc == 1) {
 985                                 val= 0;
 986                             } else if (rc == 2) {
 987                                 val= get_bits1(&sl->gb)^1;
 988                             }else{
 989                                 val= get_ue_golomb_31(&sl->gb);
 990                                 if (val >= rc) {
 991                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
 992                                     return -1;
 993                                 }
 994                             }
 995                         }else
 996                             val= LIST_NOT_USED&0xFF;
 997                         fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
 998                     }
 999             }
1000             for (list = 0; list < sl->list_count; list++) {
1001                 for(i=0; i<2; i++){
1002                     unsigned int val;
1003                     if(IS_DIR(mb_type, i, list)){
1004                         pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1005                         mx += get_se_golomb(&sl->gb);
1006                         my += get_se_golomb(&sl->gb);
1007                         ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1008
1009                         val= pack16to32(mx,my);
1010                     }else
1011                         val=0;
1012                     fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1013                 }
1014             }
1015         }else{
1016             assert(IS_8X16(mb_type));
1017             for (list = 0; list < sl->list_count; list++) {
1018                     for(i=0; i<2; i++){
1019                         unsigned int val;
1020                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1021                             int rc = sl->ref_count[list] << MB_MBAFF(sl);
1022                             if (rc == 1) {
1023                                 val= 0;
1024                             } else if (rc == 2) {
1025                                 val= get_bits1(&sl->gb)^1;
1026                             }else{
1027                                 val= get_ue_golomb_31(&sl->gb);
1028                                 if (val >= rc) {
1029                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1030                                     return -1;
1031                                 }
1032                             }
1033                         }else
1034                             val= LIST_NOT_USED&0xFF;
1035                         fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1036                     }
1037             }
1038             for (list = 0; list < sl->list_count; list++) {
1039                 for(i=0; i<2; i++){
1040                     unsigned int val;
1041                     if(IS_DIR(mb_type, i, list)){
1042                         pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1043                         mx += get_se_golomb(&sl->gb);
1044                         my += get_se_golomb(&sl->gb);
1045                         ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1046
1047                         val= pack16to32(mx,my);
1048                     }else
1049                         val=0;
1050                     fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1051                 }
1052             }
1053         }
1054     }
1055
1056     if(IS_INTER(mb_type))
1057         write_back_motion(h, sl, mb_type);
1058
1059     if(!IS_INTRA16x16(mb_type)){
1060         cbp= get_ue_golomb(&sl->gb);
1061
1062         if(decode_chroma){
1063             if(cbp > 47){
1064                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1065                 return -1;
1066             }
1067             if (IS_INTRA4x4(mb_type))
1068                 cbp = ff_h264_golomb_to_intra4x4_cbp[cbp];
1069             else
1070                 cbp = ff_h264_golomb_to_inter_cbp[cbp];
1071         }else{
1072             if(cbp > 15){
1073                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1074                 return -1;
1075             }
1076             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1077             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1078         }
1079     }
1080
1081     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1082         mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1083     }
1084     sl->cbp=
1085     h->cbp_table[mb_xy]= cbp;
1086     h->cur_pic.mb_type[mb_xy] = mb_type;
1087
1088     if(cbp || IS_INTRA16x16(mb_type)){
1089         int i4x4, i8x8, chroma_idx;
1090         int dquant;
1091         int ret;
1092         GetBitContext *gb = &sl->gb;
1093         const uint8_t *scan, *scan8x8;
1094         const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
1095
1096         dquant= get_se_golomb(&sl->gb);
1097
1098         sl->qscale += dquant;
1099
1100         if (((unsigned)sl->qscale) > max_qp){
1101             if (sl->qscale < 0) sl->qscale += max_qp + 1;
1102             else                sl->qscale -= max_qp+1;
1103             if (((unsigned)sl->qscale) > max_qp){
1104                 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1105                 return -1;
1106             }
1107         }
1108
1109         sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
1110         sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
1111
1112         if(IS_INTERLACED(mb_type)){
1113             scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1114             scan    = sl->qscale ? h->field_scan : h->field_scan_q0;
1115         }else{
1116             scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1117             scan    = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1118         }
1119
1120         if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1121             return -1;
1122         }
1123         h->cbp_table[mb_xy] |= ret << 12;
1124         if (CHROMA444(h)) {
1125             if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1126                 return -1;
1127             }
1128             if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1129                 return -1;
1130             }
1131         } else if (CHROMA422(h)) {
1132             if(cbp&0x30){
1133                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1134                     if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1135                                         CHROMA_DC_BLOCK_INDEX + chroma_idx, ff_h264_chroma422_dc_scan,
1136                                         NULL, 8) < 0) {
1137                         return -1;
1138                     }
1139             }
1140
1141             if(cbp&0x20){
1142                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1143                     const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1144                     int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1145                     for (i8x8 = 0; i8x8 < 2; i8x8++) {
1146                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
1147                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1148                             if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1149                                 return -1;
1150                             mb += 16 << pixel_shift;
1151                         }
1152                     }
1153                 }
1154             }else{
1155                 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1156                 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1157             }
1158         } else /* yuv420 */ {
1159             if(cbp&0x30){
1160                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1161                     if (decode_residual(h, sl, gb, sl->mb + ((256 + 16 * 16 * chroma_idx) << pixel_shift),
1162                                         CHROMA_DC_BLOCK_INDEX + chroma_idx, ff_h264_chroma_dc_scan, NULL, 4) < 0) {
1163                         return -1;
1164                     }
1165             }
1166
1167             if(cbp&0x20){
1168                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1169                     const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1170                     for(i4x4=0; i4x4<4; i4x4++){
1171                         const int index= 16 + 16*chroma_idx + i4x4;
1172                         if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1173                             return -1;
1174                         }
1175                     }
1176                 }
1177             }else{
1178                 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1179                 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1180             }
1181         }
1182     }else{
1183         fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1184         fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1185         fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1186     }
1187     h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1188     write_back_non_zero_count(h, sl);
1189
1190     return 0;
1191 }