]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
indeo: Reject impossible FRAMETYPE_NULL
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC(h) 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282
283 /**
284  * Get the predicted number of non-zero coefficients.
285  * @param n block index
286  */
287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292
293     if(i<64) i= (i+1)>>1;
294
295     tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296
297     return i&31;
298 }
299
300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length;
302     unsigned int i;
303
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307
308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309                 int level_code = (prefix << suffix_length) +
310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311                 int mask = -(level_code&1);
312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313                 cavlc_level_tab[suffix_length][i][0]= level_code;
314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318             }else{
319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321             }
322         }
323     }
324 }
325
326 av_cold void ff_h264_decode_init_vlc(void){
327     static int done = 0;
328
329     if (!done) {
330         int i;
331         int offset;
332         done = 1;
333
334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337                  &chroma_dc_coeff_token_len [0], 1, 1,
338                  &chroma_dc_coeff_token_bits[0], 1, 1,
339                  INIT_VLC_USE_NEW_STATIC);
340
341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344                  &chroma422_dc_coeff_token_len [0], 1, 1,
345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
346                  INIT_VLC_USE_NEW_STATIC);
347
348         offset = 0;
349         for(i=0; i<4; i++){
350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353                      &coeff_token_len [i][0], 1, 1,
354                      &coeff_token_bits[i][0], 1, 1,
355                      INIT_VLC_USE_NEW_STATIC);
356             offset += coeff_token_vlc_tables_size[i];
357         }
358         /*
359          * This is a one time safety check to make sure that
360          * the packed static coeff_token_vlc table sizes
361          * were initialized correctly.
362          */
363         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364
365         for(i=0; i<3; i++){
366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368             init_vlc(&chroma_dc_total_zeros_vlc[i],
369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
372                      INIT_VLC_USE_NEW_STATIC);
373         }
374
375         for(i=0; i<7; i++){
376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382                      INIT_VLC_USE_NEW_STATIC);
383         }
384
385         for(i=0; i<15; i++){
386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388             init_vlc(&total_zeros_vlc[i],
389                      TOTAL_ZEROS_VLC_BITS, 16,
390                      &total_zeros_len [i][0], 1, 1,
391                      &total_zeros_bits[i][0], 1, 1,
392                      INIT_VLC_USE_NEW_STATIC);
393         }
394
395         for(i=0; i<6; i++){
396             run_vlc[i].table = run_vlc_tables[i];
397             run_vlc[i].table_allocated = run_vlc_tables_size;
398             init_vlc(&run_vlc[i],
399                      RUN_VLC_BITS, 7,
400                      &run_len [i][0], 1, 1,
401                      &run_bits[i][0], 1, 1,
402                      INIT_VLC_USE_NEW_STATIC);
403         }
404         run7_vlc.table = run7_vlc_table,
405         run7_vlc.table_allocated = run7_vlc_table_size;
406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407                  &run_len [6][0], 1, 1,
408                  &run_bits[6][0], 1, 1,
409                  INIT_VLC_USE_NEW_STATIC);
410
411         init_cavlc_level_tab();
412     }
413 }
414
415 /**
416  *
417  */
418 static inline int get_level_prefix(GetBitContext *gb){
419     unsigned int buf;
420     int log;
421
422     OPEN_READER(re, gb);
423     UPDATE_CACHE(re, gb);
424     buf=GET_CACHE(re, gb);
425
426     log= 32 - av_log2(buf);
427 #ifdef TRACE
428     print_bin(buf>>(32-log), log);
429     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431
432     LAST_SKIP_BITS(re, gb, log);
433     CLOSE_READER(re, gb);
434
435     return log-1;
436 }
437
438 /**
439  * Decode a residual block.
440  * @param n block index
441  * @param scantable scantable
442  * @param max_coeff number of coefficients in the block
443  * @return <0 if an error occurred
444  */
445 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
447     int level[16];
448     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
449
450     //FIXME put trailing_onex into the context
451
452     if(max_coeff <= 8){
453         if (max_coeff == 4)
454             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
455         else
456             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
457         total_coeff= coeff_token>>2;
458     }else{
459         if(n >= LUMA_DC_BLOCK_INDEX){
460             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
461             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
462             total_coeff= coeff_token>>2;
463         }else{
464             total_coeff= pred_non_zero_count(h, n);
465             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466             total_coeff= coeff_token>>2;
467         }
468     }
469     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
470
471     //FIXME set last_non_zero?
472
473     if(total_coeff==0)
474         return 0;
475     if(total_coeff > (unsigned)max_coeff) {
476         av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
477         return -1;
478     }
479
480     trailing_ones= coeff_token&3;
481     tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
482     assert(total_coeff<=16);
483
484     i = show_bits(gb, 3);
485     skip_bits(gb, trailing_ones);
486     level[0] = 1-((i&4)>>1);
487     level[1] = 1-((i&2)   );
488     level[2] = 1-((i&1)<<1);
489
490     if(trailing_ones<total_coeff) {
491         int mask, prefix;
492         int suffix_length = total_coeff > 10 & trailing_ones < 3;
493         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
494         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
495
496         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
497         if(level_code >= 100){
498             prefix= level_code - 100;
499             if(prefix == LEVEL_TAB_BITS)
500                 prefix += get_level_prefix(gb);
501
502             //first coefficient has suffix_length equal to 0 or 1
503             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
504                 if(suffix_length)
505                     level_code= (prefix<<1) + get_bits1(gb); //part
506                 else
507                     level_code= prefix; //part
508             }else if(prefix==14){
509                 if(suffix_length)
510                     level_code= (prefix<<1) + get_bits1(gb); //part
511                 else
512                     level_code= prefix + get_bits(gb, 4); //part
513             }else{
514                 level_code= 30 + get_bits(gb, prefix-3); //part
515                 if(prefix>=16){
516                     if(prefix > 25+3){
517                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
518                         return -1;
519                     }
520                     level_code += (1<<(prefix-3))-4096;
521                 }
522             }
523
524             if(trailing_ones < 3) level_code += 2;
525
526             suffix_length = 2;
527             mask= -(level_code&1);
528             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
529         }else{
530             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
531
532             suffix_length = 1 + (level_code + 3U > 6U);
533             level[trailing_ones]= level_code;
534         }
535
536         //remaining coefficients have suffix_length > 0
537         for(i=trailing_ones+1;i<total_coeff;i++) {
538             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
539             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
540             level_code= cavlc_level_tab[suffix_length][bitsi][0];
541
542             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
543             if(level_code >= 100){
544                 prefix= level_code - 100;
545                 if(prefix == LEVEL_TAB_BITS){
546                     prefix += get_level_prefix(gb);
547                 }
548                 if(prefix<15){
549                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
550                 }else{
551                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
552                     if(prefix>=16)
553                         level_code += (1<<(prefix-3))-4096;
554                 }
555                 mask= -(level_code&1);
556                 level_code= (((2+level_code)>>1) ^ mask) - mask;
557             }
558             level[i]= level_code;
559             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
560         }
561     }
562
563     if(total_coeff == max_coeff)
564         zeros_left=0;
565     else{
566         if (max_coeff <= 8) {
567             if (max_coeff == 4)
568                 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
569                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
570             else
571                 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
572                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
573         } else {
574             zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
575         }
576     }
577
578 #define STORE_BLOCK(type) \
579     scantable += zeros_left + total_coeff - 1; \
580     if(n >= LUMA_DC_BLOCK_INDEX){ \
581         ((type*)block)[*scantable] = level[0]; \
582         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
583             if(zeros_left < 7) \
584                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
585             else \
586                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
587             zeros_left -= run_before; \
588             scantable -= 1 + run_before; \
589             ((type*)block)[*scantable]= level[i]; \
590         } \
591         for(;i<total_coeff;i++) { \
592             scantable--; \
593             ((type*)block)[*scantable]= level[i]; \
594         } \
595     }else{ \
596         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
597         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
598             if(zeros_left < 7) \
599                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
600             else \
601                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
602             zeros_left -= run_before; \
603             scantable -= 1 + run_before; \
604             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
605         } \
606         for(;i<total_coeff;i++) { \
607             scantable--; \
608             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
609         } \
610     }
611
612     if (zeros_left < 0) {
613         av_log(h->avctx, AV_LOG_ERROR,
614                "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
615         return AVERROR_INVALIDDATA;
616     }
617
618     if (h->pixel_shift) {
619         STORE_BLOCK(int32_t)
620     } else {
621         STORE_BLOCK(int16_t)
622     }
623
624     return 0;
625 }
626
627 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
628     int i4x4, i8x8;
629     int qscale = p == 0 ? h->qscale : h->chroma_qp[p-1];
630     if(IS_INTRA16x16(mb_type)){
631         AV_ZERO128(h->mb_luma_dc[p]+0);
632         AV_ZERO128(h->mb_luma_dc[p]+8);
633         AV_ZERO128(h->mb_luma_dc[p]+16);
634         AV_ZERO128(h->mb_luma_dc[p]+24);
635         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
636             return -1; //FIXME continue if partitioned and other return -1 too
637         }
638
639         assert((cbp&15) == 0 || (cbp&15) == 15);
640
641         if(cbp&15){
642             for(i8x8=0; i8x8<4; i8x8++){
643                 for(i4x4=0; i4x4<4; i4x4++){
644                     const int index= i4x4 + 4*i8x8 + p*16;
645                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
646                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
647                         return -1;
648                     }
649                 }
650             }
651             return 0xf;
652         }else{
653             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
654             return 0;
655         }
656     }else{
657         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
658         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
659         int new_cbp = 0;
660         for(i8x8=0; i8x8<4; i8x8++){
661             if(cbp & (1<<i8x8)){
662                 if(IS_8x8DCT(mb_type)){
663                     int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
664                     uint8_t *nnz;
665                     for(i4x4=0; i4x4<4; i4x4++){
666                         const int index= i4x4 + 4*i8x8 + p*16;
667                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
668                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
669                             return -1;
670                     }
671                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
672                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
673                     new_cbp |= !!nnz[0] << i8x8;
674                 }else{
675                     for(i4x4=0; i4x4<4; i4x4++){
676                         const int index= i4x4 + 4*i8x8 + p*16;
677                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
678                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
679                             return -1;
680                         }
681                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
682                     }
683                 }
684             }else{
685                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
686                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
687             }
688         }
689         return new_cbp;
690     }
691 }
692
693 int ff_h264_decode_mb_cavlc(H264Context *h){
694     int mb_xy;
695     int partition_count;
696     unsigned int mb_type, cbp;
697     int dct8x8_allowed= h->pps.transform_8x8_mode;
698     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
699     const int pixel_shift = h->pixel_shift;
700
701     mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
702
703     tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
704     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
705                 down the code */
706     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
707         if(h->mb_skip_run==-1)
708             h->mb_skip_run= get_ue_golomb(&h->gb);
709
710         if (h->mb_skip_run--) {
711             if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
712                 if(h->mb_skip_run==0)
713                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
714             }
715             decode_mb_skip(h);
716             return 0;
717         }
718     }
719     if (FRAME_MBAFF(h)) {
720         if( (h->mb_y&1) == 0 )
721             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
722     }
723
724     h->prev_mb_skipped= 0;
725
726     mb_type= get_ue_golomb(&h->gb);
727     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
728         if(mb_type < 23){
729             partition_count= b_mb_type_info[mb_type].partition_count;
730             mb_type=         b_mb_type_info[mb_type].type;
731         }else{
732             mb_type -= 23;
733             goto decode_intra_mb;
734         }
735     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
736         if(mb_type < 5){
737             partition_count= p_mb_type_info[mb_type].partition_count;
738             mb_type=         p_mb_type_info[mb_type].type;
739         }else{
740             mb_type -= 5;
741             goto decode_intra_mb;
742         }
743     }else{
744        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
745         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
746             mb_type--;
747 decode_intra_mb:
748         if(mb_type > 25){
749             av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
750             return -1;
751         }
752         partition_count=0;
753         cbp= i_mb_type_info[mb_type].cbp;
754         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
755         mb_type= i_mb_type_info[mb_type].type;
756     }
757
758     if(MB_FIELD(h))
759         mb_type |= MB_TYPE_INTERLACED;
760
761     h->slice_table[ mb_xy ]= h->slice_num;
762
763     if(IS_INTRA_PCM(mb_type)){
764         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
765                             h->sps.bit_depth_luma;
766
767         // We assume these blocks are very rare so we do not optimize it.
768         h->intra_pcm_ptr = align_get_bits(&h->gb);
769         skip_bits_long(&h->gb, mb_size);
770
771         // In deblocking, the quantizer is 0
772         h->cur_pic.qscale_table[mb_xy] = 0;
773         // All coeffs are present
774         memset(h->non_zero_count[mb_xy], 16, 48);
775
776         h->cur_pic.mb_type[mb_xy] = mb_type;
777         return 0;
778     }
779
780     fill_decode_neighbors(h, mb_type);
781     fill_decode_caches(h, mb_type);
782
783     //mb_pred
784     if(IS_INTRA(mb_type)){
785         int pred_mode;
786 //            init_top_left_availability(h);
787         if(IS_INTRA4x4(mb_type)){
788             int i;
789             int di = 1;
790             if(dct8x8_allowed && get_bits1(&h->gb)){
791                 mb_type |= MB_TYPE_8x8DCT;
792                 di = 4;
793             }
794
795 //                fill_intra4x4_pred_table(h);
796             for(i=0; i<16; i+=di){
797                 int mode= pred_intra_mode(h, i);
798
799                 if(!get_bits1(&h->gb)){
800                     const int rem_mode= get_bits(&h->gb, 3);
801                     mode = rem_mode + (rem_mode >= mode);
802                 }
803
804                 if(di==4)
805                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
806                 else
807                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
808             }
809             write_back_intra_pred_mode(h);
810             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
811                 return -1;
812         }else{
813             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
814             if(h->intra16x16_pred_mode < 0)
815                 return -1;
816         }
817         if(decode_chroma){
818             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
819             if(pred_mode < 0)
820                 return -1;
821             h->chroma_pred_mode= pred_mode;
822         } else {
823             h->chroma_pred_mode = DC_128_PRED8x8;
824         }
825     }else if(partition_count==4){
826         int i, j, sub_partition_count[4], list, ref[2][4];
827
828         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
829             for(i=0; i<4; i++){
830                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
831                 if(h->sub_mb_type[i] >=13){
832                     av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
833                     return -1;
834                 }
835                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
836                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
837             }
838             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
839                 ff_h264_pred_direct_motion(h, &mb_type);
840                 h->ref_cache[0][scan8[4]] =
841                 h->ref_cache[1][scan8[4]] =
842                 h->ref_cache[0][scan8[12]] =
843                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
844             }
845         }else{
846             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
847             for(i=0; i<4; i++){
848                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
849                 if(h->sub_mb_type[i] >=4){
850                     av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
851                     return -1;
852                 }
853                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
854                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
855             }
856         }
857
858         for(list=0; list<h->list_count; list++){
859             int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF(h);
860             for(i=0; i<4; i++){
861                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
862                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
863                     unsigned int tmp;
864                     if(ref_count == 1){
865                         tmp= 0;
866                     }else if(ref_count == 2){
867                         tmp= get_bits1(&h->gb)^1;
868                     }else{
869                         tmp= get_ue_golomb_31(&h->gb);
870                         if(tmp>=ref_count){
871                             av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
872                             return -1;
873                         }
874                     }
875                     ref[list][i]= tmp;
876                 }else{
877                  //FIXME
878                     ref[list][i] = -1;
879                 }
880             }
881         }
882
883         if(dct8x8_allowed)
884             dct8x8_allowed = get_dct8x8_allowed(h);
885
886         for(list=0; list<h->list_count; list++){
887             for(i=0; i<4; i++){
888                 if(IS_DIRECT(h->sub_mb_type[i])) {
889                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
890                     continue;
891                 }
892                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
893                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
894
895                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
896                     const int sub_mb_type= h->sub_mb_type[i];
897                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
898                     for(j=0; j<sub_partition_count[i]; j++){
899                         int mx, my;
900                         const int index= 4*i + block_width*j;
901                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
902                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
903                         mx += get_se_golomb(&h->gb);
904                         my += get_se_golomb(&h->gb);
905                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
906
907                         if(IS_SUB_8X8(sub_mb_type)){
908                             mv_cache[ 1 ][0]=
909                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
910                             mv_cache[ 1 ][1]=
911                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
912                         }else if(IS_SUB_8X4(sub_mb_type)){
913                             mv_cache[ 1 ][0]= mx;
914                             mv_cache[ 1 ][1]= my;
915                         }else if(IS_SUB_4X8(sub_mb_type)){
916                             mv_cache[ 8 ][0]= mx;
917                             mv_cache[ 8 ][1]= my;
918                         }
919                         mv_cache[ 0 ][0]= mx;
920                         mv_cache[ 0 ][1]= my;
921                     }
922                 }else{
923                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
924                     p[0] = p[1]=
925                     p[8] = p[9]= 0;
926                 }
927             }
928         }
929     }else if(IS_DIRECT(mb_type)){
930         ff_h264_pred_direct_motion(h, &mb_type);
931         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
932     }else{
933         int list, mx, my, i;
934          //FIXME we should set ref_idx_l? to 0 if we use that later ...
935         if(IS_16X16(mb_type)){
936             for(list=0; list<h->list_count; list++){
937                     unsigned int val;
938                     if(IS_DIR(mb_type, 0, list)){
939                         int rc = h->ref_count[list] << MB_MBAFF(h);
940                         if (rc == 1) {
941                             val= 0;
942                         } else if (rc == 2) {
943                             val= get_bits1(&h->gb)^1;
944                         }else{
945                             val= get_ue_golomb_31(&h->gb);
946                             if (val >= rc) {
947                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
948                                 return -1;
949                             }
950                         }
951                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
952                     }
953             }
954             for(list=0; list<h->list_count; list++){
955                 if(IS_DIR(mb_type, 0, list)){
956                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
957                     mx += get_se_golomb(&h->gb);
958                     my += get_se_golomb(&h->gb);
959                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
960
961                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
962                 }
963             }
964         }
965         else if(IS_16X8(mb_type)){
966             for(list=0; list<h->list_count; list++){
967                     for(i=0; i<2; i++){
968                         unsigned int val;
969                         if(IS_DIR(mb_type, i, list)){
970                             int rc = h->ref_count[list] << MB_MBAFF(h);
971                             if (rc == 1) {
972                                 val= 0;
973                             } else if (rc == 2) {
974                                 val= get_bits1(&h->gb)^1;
975                             }else{
976                                 val= get_ue_golomb_31(&h->gb);
977                                 if (val >= rc) {
978                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
979                                     return -1;
980                                 }
981                             }
982                         }else
983                             val= LIST_NOT_USED&0xFF;
984                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
985                     }
986             }
987             for(list=0; list<h->list_count; list++){
988                 for(i=0; i<2; i++){
989                     unsigned int val;
990                     if(IS_DIR(mb_type, i, list)){
991                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
992                         mx += get_se_golomb(&h->gb);
993                         my += get_se_golomb(&h->gb);
994                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
995
996                         val= pack16to32(mx,my);
997                     }else
998                         val=0;
999                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1000                 }
1001             }
1002         }else{
1003             assert(IS_8X16(mb_type));
1004             for(list=0; list<h->list_count; list++){
1005                     for(i=0; i<2; i++){
1006                         unsigned int val;
1007                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1008                             int rc = h->ref_count[list] << MB_MBAFF(h);
1009                             if (rc == 1) {
1010                                 val= 0;
1011                             } else if (rc == 2) {
1012                                 val= get_bits1(&h->gb)^1;
1013                             }else{
1014                                 val= get_ue_golomb_31(&h->gb);
1015                                 if (val >= rc) {
1016                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1017                                     return -1;
1018                                 }
1019                             }
1020                         }else
1021                             val= LIST_NOT_USED&0xFF;
1022                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1023                     }
1024             }
1025             for(list=0; list<h->list_count; list++){
1026                 for(i=0; i<2; i++){
1027                     unsigned int val;
1028                     if(IS_DIR(mb_type, i, list)){
1029                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1030                         mx += get_se_golomb(&h->gb);
1031                         my += get_se_golomb(&h->gb);
1032                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1033
1034                         val= pack16to32(mx,my);
1035                     }else
1036                         val=0;
1037                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1038                 }
1039             }
1040         }
1041     }
1042
1043     if(IS_INTER(mb_type))
1044         write_back_motion(h, mb_type);
1045
1046     if(!IS_INTRA16x16(mb_type)){
1047         cbp= get_ue_golomb(&h->gb);
1048
1049         if(decode_chroma){
1050             if(cbp > 47){
1051                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1052                 return -1;
1053             }
1054             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1055             else                     cbp= golomb_to_inter_cbp   [cbp];
1056         }else{
1057             if(cbp > 15){
1058                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1059                 return -1;
1060             }
1061             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1062             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1063         }
1064     }
1065
1066     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1067         mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1068     }
1069     h->cbp=
1070     h->cbp_table[mb_xy]= cbp;
1071     h->cur_pic.mb_type[mb_xy] = mb_type;
1072
1073     if(cbp || IS_INTRA16x16(mb_type)){
1074         int i4x4, i8x8, chroma_idx;
1075         int dquant;
1076         int ret;
1077         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1078         const uint8_t *scan, *scan8x8;
1079         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1080
1081         if(IS_INTERLACED(mb_type)){
1082             scan8x8= h->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1083             scan= h->qscale ? h->field_scan : h->field_scan_q0;
1084         }else{
1085             scan8x8= h->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1086             scan= h->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1087         }
1088
1089         dquant= get_se_golomb(&h->gb);
1090
1091         h->qscale += dquant;
1092
1093         if(((unsigned)h->qscale) > max_qp){
1094             if(h->qscale<0) h->qscale+= max_qp+1;
1095             else            h->qscale-= max_qp+1;
1096             if(((unsigned)h->qscale) > max_qp){
1097                 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1098                 return -1;
1099             }
1100         }
1101
1102         h->chroma_qp[0]= get_chroma_qp(h, 0, h->qscale);
1103         h->chroma_qp[1]= get_chroma_qp(h, 1, h->qscale);
1104
1105         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1106             return -1;
1107         }
1108         h->cbp_table[mb_xy] |= ret << 12;
1109         if (CHROMA444(h)) {
1110             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1111                 return -1;
1112             }
1113             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1114                 return -1;
1115             }
1116         } else if (CHROMA422(h)) {
1117             if(cbp&0x30){
1118                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1119                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1120                                         CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1121                                         NULL, 8) < 0) {
1122                         return -1;
1123                     }
1124             }
1125
1126             if(cbp&0x20){
1127                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1128                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1129                     int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1130                     for (i8x8 = 0; i8x8 < 2; i8x8++) {
1131                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
1132                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1133                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1134                                 return -1;
1135                             mb += 16 << pixel_shift;
1136                         }
1137                     }
1138                 }
1139             }else{
1140                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1141                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1142             }
1143         } else /* yuv420 */ {
1144             if(cbp&0x30){
1145                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1146                     if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1147                         return -1;
1148                     }
1149             }
1150
1151             if(cbp&0x20){
1152                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1153                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1154                     for(i4x4=0; i4x4<4; i4x4++){
1155                         const int index= 16 + 16*chroma_idx + i4x4;
1156                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1157                             return -1;
1158                         }
1159                     }
1160                 }
1161             }else{
1162                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1163                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1164             }
1165         }
1166     }else{
1167         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1168         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1169         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1170     }
1171     h->cur_pic.qscale_table[mb_xy] = h->qscale;
1172     write_back_non_zero_count(h);
1173
1174     return 0;
1175 }