]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
h264: Fix handling of changing reference counts between slices
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29 #define UNCHECKED_BITSTREAM_READER 1
30
31 #include "internal.h"
32 #include "avcodec.h"
33 #include "mpegvideo.h"
34 #include "h264.h"
35 #include "h264data.h" // FIXME FIXME FIXME
36 #include "h264_mvpred.h"
37 #include "golomb.h"
38 #include "libavutil/avassert.h"
39
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282
283 /**
284  * Get the predicted number of non-zero coefficients.
285  * @param n block index
286  */
287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292
293     if(i<64) i= (i+1)>>1;
294
295     tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296
297     return i&31;
298 }
299
300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length;
302     unsigned int i;
303
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307
308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309                 int level_code = (prefix << suffix_length) +
310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311                 int mask = -(level_code&1);
312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313                 cavlc_level_tab[suffix_length][i][0]= level_code;
314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318             }else{
319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321             }
322         }
323     }
324 }
325
326 av_cold void ff_h264_decode_init_vlc(void){
327     static int done = 0;
328
329     if (!done) {
330         int i;
331         int offset;
332         done = 1;
333
334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337                  &chroma_dc_coeff_token_len [0], 1, 1,
338                  &chroma_dc_coeff_token_bits[0], 1, 1,
339                  INIT_VLC_USE_NEW_STATIC);
340
341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344                  &chroma422_dc_coeff_token_len [0], 1, 1,
345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
346                  INIT_VLC_USE_NEW_STATIC);
347
348         offset = 0;
349         for(i=0; i<4; i++){
350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353                      &coeff_token_len [i][0], 1, 1,
354                      &coeff_token_bits[i][0], 1, 1,
355                      INIT_VLC_USE_NEW_STATIC);
356             offset += coeff_token_vlc_tables_size[i];
357         }
358         /*
359          * This is a one time safety check to make sure that
360          * the packed static coeff_token_vlc table sizes
361          * were initialized correctly.
362          */
363         av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364
365         for(i=0; i<3; i++){
366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368             init_vlc(&chroma_dc_total_zeros_vlc[i],
369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
372                      INIT_VLC_USE_NEW_STATIC);
373         }
374
375         for(i=0; i<7; i++){
376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382                      INIT_VLC_USE_NEW_STATIC);
383         }
384
385         for(i=0; i<15; i++){
386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388             init_vlc(&total_zeros_vlc[i],
389                      TOTAL_ZEROS_VLC_BITS, 16,
390                      &total_zeros_len [i][0], 1, 1,
391                      &total_zeros_bits[i][0], 1, 1,
392                      INIT_VLC_USE_NEW_STATIC);
393         }
394
395         for(i=0; i<6; i++){
396             run_vlc[i].table = run_vlc_tables[i];
397             run_vlc[i].table_allocated = run_vlc_tables_size;
398             init_vlc(&run_vlc[i],
399                      RUN_VLC_BITS, 7,
400                      &run_len [i][0], 1, 1,
401                      &run_bits[i][0], 1, 1,
402                      INIT_VLC_USE_NEW_STATIC);
403         }
404         run7_vlc.table = run7_vlc_table,
405         run7_vlc.table_allocated = run7_vlc_table_size;
406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407                  &run_len [6][0], 1, 1,
408                  &run_bits[6][0], 1, 1,
409                  INIT_VLC_USE_NEW_STATIC);
410
411         init_cavlc_level_tab();
412     }
413 }
414
415 /**
416  *
417  */
418 static inline int get_level_prefix(GetBitContext *gb){
419     unsigned int buf;
420     int log;
421
422     OPEN_READER(re, gb);
423     UPDATE_CACHE(re, gb);
424     buf=GET_CACHE(re, gb);
425
426     log= 32 - av_log2(buf);
427 #ifdef TRACE
428     print_bin(buf>>(32-log), log);
429     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431
432     LAST_SKIP_BITS(re, gb, log);
433     CLOSE_READER(re, gb);
434
435     return log-1;
436 }
437
438 /**
439  * Decode a residual block.
440  * @param n block index
441  * @param scantable scantable
442  * @param max_coeff number of coefficients in the block
443  * @return <0 if an error occurred
444  */
445 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
447     int level[16];
448     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
449
450     //FIXME put trailing_onex into the context
451
452     if(max_coeff <= 8){
453         if (max_coeff == 4)
454             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
455         else
456             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
457         total_coeff= coeff_token>>2;
458     }else{
459         if(n >= LUMA_DC_BLOCK_INDEX){
460             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
461             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
462             total_coeff= coeff_token>>2;
463         }else{
464             total_coeff= pred_non_zero_count(h, n);
465             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466             total_coeff= coeff_token>>2;
467         }
468     }
469     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
470
471     //FIXME set last_non_zero?
472
473     if(total_coeff==0)
474         return 0;
475     if(total_coeff > (unsigned)max_coeff) {
476         av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
477         return -1;
478     }
479
480     trailing_ones= coeff_token&3;
481     tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
482     av_assert2(total_coeff<=16);
483
484     i = show_bits(gb, 3);
485     skip_bits(gb, trailing_ones);
486     level[0] = 1-((i&4)>>1);
487     level[1] = 1-((i&2)   );
488     level[2] = 1-((i&1)<<1);
489
490     if(trailing_ones<total_coeff) {
491         int mask, prefix;
492         int suffix_length = total_coeff > 10 & trailing_ones < 3;
493         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
494         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
495
496         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
497         if(level_code >= 100){
498             prefix= level_code - 100;
499             if(prefix == LEVEL_TAB_BITS)
500                 prefix += get_level_prefix(gb);
501
502             //first coefficient has suffix_length equal to 0 or 1
503             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
504                 if(suffix_length)
505                     level_code= (prefix<<1) + get_bits1(gb); //part
506                 else
507                     level_code= prefix; //part
508             }else if(prefix==14){
509                 if(suffix_length)
510                     level_code= (prefix<<1) + get_bits1(gb); //part
511                 else
512                     level_code= prefix + get_bits(gb, 4); //part
513             }else{
514                 level_code= 30;
515                 if(prefix>=16){
516                     if(prefix > 25+3){
517                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
518                         return -1;
519                     }
520                     level_code += (1<<(prefix-3))-4096;
521                 }
522                 level_code += get_bits(gb, prefix-3); //part
523             }
524
525             if(trailing_ones < 3) level_code += 2;
526
527             suffix_length = 2;
528             mask= -(level_code&1);
529             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530         }else{
531             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532
533             suffix_length = 1 + (level_code + 3U > 6U);
534             level[trailing_ones]= level_code;
535         }
536
537         //remaining coefficients have suffix_length > 0
538         for(i=trailing_ones+1;i<total_coeff;i++) {
539             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541             level_code= cavlc_level_tab[suffix_length][bitsi][0];
542
543             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544             if(level_code >= 100){
545                 prefix= level_code - 100;
546                 if(prefix == LEVEL_TAB_BITS){
547                     prefix += get_level_prefix(gb);
548                 }
549                 if(prefix<15){
550                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551                 }else{
552                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
553                     if(prefix>=16)
554                         level_code += (1<<(prefix-3))-4096;
555                 }
556                 mask= -(level_code&1);
557                 level_code= (((2+level_code)>>1) ^ mask) - mask;
558             }
559             level[i]= level_code;
560             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
561         }
562     }
563
564     if(total_coeff == max_coeff)
565         zeros_left=0;
566     else{
567         if (max_coeff <= 8) {
568             if (max_coeff == 4)
569                 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
570                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
571             else
572                 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
573                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
574         } else {
575             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
576         }
577     }
578
579 #define STORE_BLOCK(type) \
580     scantable += zeros_left + total_coeff - 1; \
581     if(n >= LUMA_DC_BLOCK_INDEX){ \
582         ((type*)block)[*scantable] = level[0]; \
583         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
584             if(zeros_left < 7) \
585                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
586             else \
587                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
588             zeros_left -= run_before; \
589             scantable -= 1 + run_before; \
590             ((type*)block)[*scantable]= level[i]; \
591         } \
592         for(;i<total_coeff;i++) { \
593             scantable--; \
594             ((type*)block)[*scantable]= level[i]; \
595         } \
596     }else{ \
597         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
598         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
599             if(zeros_left < 7) \
600                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
601             else \
602                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
603             zeros_left -= run_before; \
604             scantable -= 1 + run_before; \
605             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
606         } \
607         for(;i<total_coeff;i++) { \
608             scantable--; \
609             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
610         } \
611     }
612
613     if (h->pixel_shift) {
614         STORE_BLOCK(int32_t)
615     } else {
616         STORE_BLOCK(int16_t)
617     }
618
619     if(zeros_left<0){
620         av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
621         return -1;
622     }
623
624     return 0;
625 }
626
627 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
628     int i4x4, i8x8;
629     int qscale = p == 0 ? h->qscale : h->chroma_qp[p-1];
630     if(IS_INTRA16x16(mb_type)){
631         AV_ZERO128(h->mb_luma_dc[p]+0);
632         AV_ZERO128(h->mb_luma_dc[p]+8);
633         AV_ZERO128(h->mb_luma_dc[p]+16);
634         AV_ZERO128(h->mb_luma_dc[p]+24);
635         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
636             return -1; //FIXME continue if partitioned and other return -1 too
637         }
638
639         av_assert2((cbp&15) == 0 || (cbp&15) == 15);
640
641         if(cbp&15){
642             for(i8x8=0; i8x8<4; i8x8++){
643                 for(i4x4=0; i4x4<4; i4x4++){
644                     const int index= i4x4 + 4*i8x8 + p*16;
645                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
646                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
647                         return -1;
648                     }
649                 }
650             }
651             return 0xf;
652         }else{
653             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
654             return 0;
655         }
656     }else{
657         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
658         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
659         int new_cbp = 0;
660         for(i8x8=0; i8x8<4; i8x8++){
661             if(cbp & (1<<i8x8)){
662                 if(IS_8x8DCT(mb_type)){
663                     int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
664                     uint8_t *nnz;
665                     for(i4x4=0; i4x4<4; i4x4++){
666                         const int index= i4x4 + 4*i8x8 + p*16;
667                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
668                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
669                             return -1;
670                     }
671                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
672                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
673                     new_cbp |= !!nnz[0] << i8x8;
674                 }else{
675                     for(i4x4=0; i4x4<4; i4x4++){
676                         const int index= i4x4 + 4*i8x8 + p*16;
677                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
678                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
679                             return -1;
680                         }
681                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
682                     }
683                 }
684             }else{
685                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
686                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
687             }
688         }
689         return new_cbp;
690     }
691 }
692
693 int ff_h264_decode_mb_cavlc(H264Context *h){
694     int mb_xy;
695     int partition_count;
696     unsigned int mb_type, cbp;
697     int dct8x8_allowed= h->pps.transform_8x8_mode;
698     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
699     const int pixel_shift = h->pixel_shift;
700     unsigned local_ref_count[2];
701
702     mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
703
704     tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
705     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
706                 down the code */
707     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
708         if(h->mb_skip_run==-1)
709             h->mb_skip_run= get_ue_golomb(&h->gb);
710
711         if (h->mb_skip_run--) {
712             if(FRAME_MBAFF && (h->mb_y&1) == 0){
713                 if(h->mb_skip_run==0)
714                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
715             }
716             decode_mb_skip(h);
717             return 0;
718         }
719     }
720     if(FRAME_MBAFF){
721         if( (h->mb_y&1) == 0 )
722             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
723     }
724
725     h->prev_mb_skipped= 0;
726
727     mb_type= get_ue_golomb(&h->gb);
728     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
729         if(mb_type < 23){
730             partition_count= b_mb_type_info[mb_type].partition_count;
731             mb_type=         b_mb_type_info[mb_type].type;
732         }else{
733             mb_type -= 23;
734             goto decode_intra_mb;
735         }
736     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
737         if(mb_type < 5){
738             partition_count= p_mb_type_info[mb_type].partition_count;
739             mb_type=         p_mb_type_info[mb_type].type;
740         }else{
741             mb_type -= 5;
742             goto decode_intra_mb;
743         }
744     }else{
745        av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_I);
746         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
747             mb_type--;
748 decode_intra_mb:
749         if(mb_type > 25){
750             av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
751             return -1;
752         }
753         partition_count=0;
754         cbp= i_mb_type_info[mb_type].cbp;
755         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
756         mb_type= i_mb_type_info[mb_type].type;
757     }
758
759     if(MB_FIELD)
760         mb_type |= MB_TYPE_INTERLACED;
761
762     h->slice_table[ mb_xy ]= h->slice_num;
763
764     if(IS_INTRA_PCM(mb_type)){
765         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
766                             h->sps.bit_depth_luma;
767
768         // We assume these blocks are very rare so we do not optimize it.
769         h->intra_pcm_ptr = align_get_bits(&h->gb);
770         skip_bits_long(&h->gb, mb_size);
771
772         // In deblocking, the quantizer is 0
773         h->cur_pic.f.qscale_table[mb_xy] = 0;
774         // All coeffs are present
775         memset(h->non_zero_count[mb_xy], 16, 48);
776
777         h->cur_pic.f.mb_type[mb_xy] = mb_type;
778         return 0;
779     }
780
781     local_ref_count[0] = h->ref_count[0] << MB_MBAFF;
782     local_ref_count[1] = h->ref_count[1] << MB_MBAFF;
783
784     fill_decode_neighbors(h, mb_type);
785     fill_decode_caches(h, mb_type);
786
787     //mb_pred
788     if(IS_INTRA(mb_type)){
789         int pred_mode;
790 //            init_top_left_availability(h);
791         if(IS_INTRA4x4(mb_type)){
792             int i;
793             int di = 1;
794             if(dct8x8_allowed && get_bits1(&h->gb)){
795                 mb_type |= MB_TYPE_8x8DCT;
796                 di = 4;
797             }
798
799 //                fill_intra4x4_pred_table(h);
800             for(i=0; i<16; i+=di){
801                 int mode= pred_intra_mode(h, i);
802
803                 if(!get_bits1(&h->gb)){
804                     const int rem_mode= get_bits(&h->gb, 3);
805                     mode = rem_mode + (rem_mode >= mode);
806                 }
807
808                 if(di==4)
809                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
810                 else
811                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
812             }
813             write_back_intra_pred_mode(h);
814             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
815                 return -1;
816         }else{
817             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
818             if(h->intra16x16_pred_mode < 0)
819                 return -1;
820         }
821         if(decode_chroma){
822             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
823             if(pred_mode < 0)
824                 return -1;
825             h->chroma_pred_mode= pred_mode;
826         } else {
827             h->chroma_pred_mode = DC_128_PRED8x8;
828         }
829     }else if(partition_count==4){
830         int i, j, sub_partition_count[4], list, ref[2][4];
831
832         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
833             for(i=0; i<4; i++){
834                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
835                 if(h->sub_mb_type[i] >=13){
836                     av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
837                     return -1;
838                 }
839                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
840                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
841             }
842             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
843                 ff_h264_pred_direct_motion(h, &mb_type);
844                 h->ref_cache[0][scan8[4]] =
845                 h->ref_cache[1][scan8[4]] =
846                 h->ref_cache[0][scan8[12]] =
847                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
848             }
849         }else{
850             av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
851             for(i=0; i<4; i++){
852                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
853                 if(h->sub_mb_type[i] >=4){
854                     av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
855                     return -1;
856                 }
857                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
858                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
859             }
860         }
861
862         for(list=0; list<h->list_count; list++){
863             int ref_count= IS_REF0(mb_type) ? 1 : local_ref_count[list];
864             for(i=0; i<4; i++){
865                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
866                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
867                     unsigned int tmp;
868                     if(ref_count == 1){
869                         tmp= 0;
870                     }else if(ref_count == 2){
871                         tmp= get_bits1(&h->gb)^1;
872                     }else{
873                         tmp= get_ue_golomb_31(&h->gb);
874                         if(tmp>=ref_count){
875                             av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
876                             return -1;
877                         }
878                     }
879                     ref[list][i]= tmp;
880                 }else{
881                  //FIXME
882                     ref[list][i] = -1;
883                 }
884             }
885         }
886
887         if(dct8x8_allowed)
888             dct8x8_allowed = get_dct8x8_allowed(h);
889
890         for(list=0; list<h->list_count; list++){
891             for(i=0; i<4; i++){
892                 if(IS_DIRECT(h->sub_mb_type[i])) {
893                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
894                     continue;
895                 }
896                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
897                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
898
899                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
900                     const int sub_mb_type= h->sub_mb_type[i];
901                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
902                     for(j=0; j<sub_partition_count[i]; j++){
903                         int mx, my;
904                         const int index= 4*i + block_width*j;
905                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
906                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
907                         mx += get_se_golomb(&h->gb);
908                         my += get_se_golomb(&h->gb);
909                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
910
911                         if(IS_SUB_8X8(sub_mb_type)){
912                             mv_cache[ 1 ][0]=
913                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
914                             mv_cache[ 1 ][1]=
915                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
916                         }else if(IS_SUB_8X4(sub_mb_type)){
917                             mv_cache[ 1 ][0]= mx;
918                             mv_cache[ 1 ][1]= my;
919                         }else if(IS_SUB_4X8(sub_mb_type)){
920                             mv_cache[ 8 ][0]= mx;
921                             mv_cache[ 8 ][1]= my;
922                         }
923                         mv_cache[ 0 ][0]= mx;
924                         mv_cache[ 0 ][1]= my;
925                     }
926                 }else{
927                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
928                     p[0] = p[1]=
929                     p[8] = p[9]= 0;
930                 }
931             }
932         }
933     }else if(IS_DIRECT(mb_type)){
934         ff_h264_pred_direct_motion(h, &mb_type);
935         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
936     }else{
937         int list, mx, my, i;
938          //FIXME we should set ref_idx_l? to 0 if we use that later ...
939         if(IS_16X16(mb_type)){
940             for(list=0; list<h->list_count; list++){
941                     unsigned int val;
942                     if(IS_DIR(mb_type, 0, list)){
943                         if(local_ref_count[list]==1){
944                             val= 0;
945                         }else if(local_ref_count[list]==2){
946                             val= get_bits1(&h->gb)^1;
947                         }else{
948                             val= get_ue_golomb_31(&h->gb);
949                             if(val >= local_ref_count[list]){
950                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
951                                 return -1;
952                             }
953                         }
954                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
955                     }
956             }
957             for(list=0; list<h->list_count; list++){
958                 if(IS_DIR(mb_type, 0, list)){
959                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
960                     mx += get_se_golomb(&h->gb);
961                     my += get_se_golomb(&h->gb);
962                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
963
964                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
965                 }
966             }
967         }
968         else if(IS_16X8(mb_type)){
969             for(list=0; list<h->list_count; list++){
970                     for(i=0; i<2; i++){
971                         unsigned int val;
972                         if(IS_DIR(mb_type, i, list)){
973                             if(local_ref_count[list] == 1){
974                                 val= 0;
975                             }else if(local_ref_count[list] == 2){
976                                 val= get_bits1(&h->gb)^1;
977                             }else{
978                                 val= get_ue_golomb_31(&h->gb);
979                                 if(val >= local_ref_count[list]){
980                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
981                                     return -1;
982                                 }
983                             }
984                         }else
985                             val= LIST_NOT_USED&0xFF;
986                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
987                     }
988             }
989             for(list=0; list<h->list_count; list++){
990                 for(i=0; i<2; i++){
991                     unsigned int val;
992                     if(IS_DIR(mb_type, i, list)){
993                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
994                         mx += get_se_golomb(&h->gb);
995                         my += get_se_golomb(&h->gb);
996                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
997
998                         val= pack16to32(mx,my);
999                     }else
1000                         val=0;
1001                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1002                 }
1003             }
1004         }else{
1005             av_assert2(IS_8X16(mb_type));
1006             for(list=0; list<h->list_count; list++){
1007                     for(i=0; i<2; i++){
1008                         unsigned int val;
1009                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1010                             if(local_ref_count[list]==1){
1011                                 val= 0;
1012                             }else if(local_ref_count[list]==2){
1013                                 val= get_bits1(&h->gb)^1;
1014                             }else{
1015                                 val= get_ue_golomb_31(&h->gb);
1016                                 if(val >= local_ref_count[list]){
1017                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1018                                     return -1;
1019                                 }
1020                             }
1021                         }else
1022                             val= LIST_NOT_USED&0xFF;
1023                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1024                     }
1025             }
1026             for(list=0; list<h->list_count; list++){
1027                 for(i=0; i<2; i++){
1028                     unsigned int val;
1029                     if(IS_DIR(mb_type, i, list)){
1030                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1031                         mx += get_se_golomb(&h->gb);
1032                         my += get_se_golomb(&h->gb);
1033                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1034
1035                         val= pack16to32(mx,my);
1036                     }else
1037                         val=0;
1038                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1039                 }
1040             }
1041         }
1042     }
1043
1044     if(IS_INTER(mb_type))
1045         write_back_motion(h, mb_type);
1046
1047     if(!IS_INTRA16x16(mb_type)){
1048         cbp= get_ue_golomb(&h->gb);
1049
1050         if(decode_chroma){
1051             if(cbp > 47){
1052                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1053                 return -1;
1054             }
1055             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1056             else                     cbp= golomb_to_inter_cbp   [cbp];
1057         }else{
1058             if(cbp > 15){
1059                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1060                 return -1;
1061             }
1062             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1063             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1064         }
1065     } else {
1066         if (!decode_chroma && cbp>15) {
1067             av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1068             return AVERROR_INVALIDDATA;
1069         }
1070     }
1071
1072     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1073         mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1074     }
1075     h->cbp=
1076     h->cbp_table[mb_xy]= cbp;
1077     h->cur_pic.f.mb_type[mb_xy] = mb_type;
1078
1079     if(cbp || IS_INTRA16x16(mb_type)){
1080         int i4x4, i8x8, chroma_idx;
1081         int dquant;
1082         int ret;
1083         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1084         const uint8_t *scan, *scan8x8;
1085         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1086
1087         if(IS_INTERLACED(mb_type)){
1088             scan8x8= h->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1089             scan= h->qscale ? h->field_scan : h->field_scan_q0;
1090         }else{
1091             scan8x8= h->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1092             scan= h->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1093         }
1094
1095         dquant= get_se_golomb(&h->gb);
1096
1097         h->qscale += dquant;
1098
1099         if(((unsigned)h->qscale) > max_qp){
1100             if(h->qscale<0) h->qscale+= max_qp+1;
1101             else            h->qscale-= max_qp+1;
1102             if(((unsigned)h->qscale) > max_qp){
1103                 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1104                 return -1;
1105             }
1106         }
1107
1108         h->chroma_qp[0]= get_chroma_qp(h, 0, h->qscale);
1109         h->chroma_qp[1]= get_chroma_qp(h, 1, h->qscale);
1110
1111         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1112             return -1;
1113         }
1114         h->cbp_table[mb_xy] |= ret << 12;
1115         if(CHROMA444){
1116             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1117                 return -1;
1118             }
1119             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1120                 return -1;
1121             }
1122         } else {
1123             const int num_c8x8 = h->sps.chroma_format_idc;
1124
1125             if(cbp&0x30){
1126                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1127                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1128                                         CHROMA_DC_BLOCK_INDEX+chroma_idx,
1129                                         CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1130                                         NULL, 4*num_c8x8) < 0) {
1131                         return -1;
1132                     }
1133             }
1134
1135             if(cbp&0x20){
1136                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1137                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1138                     int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1139                     for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1140                         for (i4x4=0; i4x4<4; i4x4++) {
1141                             const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1142                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1143                                 return -1;
1144                             mb += 16<<pixel_shift;
1145                         }
1146                     }
1147                 }
1148             }else{
1149                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1150                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1151             }
1152         }
1153     }else{
1154         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1155         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1156         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1157     }
1158     h->cur_pic.f.qscale_table[mb_xy] = h->qscale;
1159     write_back_non_zero_count(h);
1160
1161     return 0;
1162 }