]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
sgidec: stop using deprecated avcodec_set_dimensions
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC(h) 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 #include <assert.h>
39
40 static const uint8_t golomb_to_inter_cbp_gray[16]={
41  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
42 };
43
44 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
45 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
46 };
47
48 static const uint8_t chroma_dc_coeff_token_len[4*5]={
49  2, 0, 0, 0,
50  6, 1, 0, 0,
51  6, 6, 3, 0,
52  6, 7, 7, 6,
53  6, 8, 8, 7,
54 };
55
56 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
57  1, 0, 0, 0,
58  7, 1, 0, 0,
59  4, 6, 1, 0,
60  3, 3, 2, 5,
61  2, 3, 2, 0,
62 };
63
64 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
65   1,  0,  0,  0,
66   7,  2,  0,  0,
67   7,  7,  3,  0,
68   9,  7,  7,  5,
69   9,  9,  7,  6,
70  10, 10,  9,  7,
71  11, 11, 10,  7,
72  12, 12, 11, 10,
73  13, 12, 12, 11,
74 };
75
76 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
77   1,   0,  0, 0,
78  15,   1,  0, 0,
79  14,  13,  1, 0,
80   7,  12, 11, 1,
81   6,   5, 10, 1,
82   7,   6,  4, 9,
83   7,   6,  5, 8,
84   7,   6,  5, 4,
85   7,   5,  4, 4,
86 };
87
88 static const uint8_t coeff_token_len[4][4*17]={
89 {
90      1, 0, 0, 0,
91      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
92     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
93     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
94     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
95 },
96 {
97      2, 0, 0, 0,
98      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
99      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
100     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
101     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
102 },
103 {
104      4, 0, 0, 0,
105      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
106      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
107      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
108     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
109 },
110 {
111      6, 0, 0, 0,
112      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
113      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116 }
117 };
118
119 static const uint8_t coeff_token_bits[4][4*17]={
120 {
121      1, 0, 0, 0,
122      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
123      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
124     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
125     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
126 },
127 {
128      3, 0, 0, 0,
129     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
130      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
131     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
132     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
133 },
134 {
135     15, 0, 0, 0,
136     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
137     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
138     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
139     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
140 },
141 {
142      3, 0, 0, 0,
143      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
144     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
145     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
146     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
147 }
148 };
149
150 static const uint8_t total_zeros_len[16][16]= {
151     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
152     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
153     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
154     {5,3,4,4,3,3,3,4,3,4,5,5,5},
155     {4,4,4,3,3,3,3,3,4,5,4,5},
156     {6,5,3,3,3,3,3,3,4,3,6},
157     {6,5,3,3,3,2,3,4,3,6},
158     {6,4,5,3,2,2,3,3,6},
159     {6,6,4,2,2,3,2,5},
160     {5,5,3,2,2,2,4},
161     {4,4,3,3,1,3},
162     {4,4,2,1,3},
163     {3,3,1,2},
164     {2,2,1},
165     {1,1},
166 };
167
168 static const uint8_t total_zeros_bits[16][16]= {
169     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
170     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
171     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
172     {3,7,5,4,6,5,4,3,3,2,2,1,0},
173     {5,4,3,7,6,5,4,3,2,1,1,0},
174     {1,1,7,6,5,4,3,2,1,1,0},
175     {1,1,5,4,3,3,2,1,1,0},
176     {1,1,1,3,3,2,2,1,0},
177     {1,0,1,3,2,1,1,1},
178     {1,0,1,3,2,1,1},
179     {0,1,1,2,1,3},
180     {0,1,1,1,1},
181     {0,1,1,1},
182     {0,1,1},
183     {0,1},
184 };
185
186 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
187     { 1, 2, 3, 3,},
188     { 1, 2, 2, 0,},
189     { 1, 1, 0, 0,},
190 };
191
192 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
193     { 1, 1, 1, 0,},
194     { 1, 1, 0, 0,},
195     { 1, 0, 0, 0,},
196 };
197
198 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
199     { 1, 3, 3, 4, 4, 4, 5, 5 },
200     { 3, 2, 3, 3, 3, 3, 3 },
201     { 3, 3, 2, 2, 3, 3 },
202     { 3, 2, 2, 2, 3 },
203     { 2, 2, 2, 2 },
204     { 2, 2, 1 },
205     { 1, 1 },
206 };
207
208 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
209     { 1, 2, 3, 2, 3, 1, 1, 0 },
210     { 0, 1, 1, 4, 5, 6, 7 },
211     { 0, 1, 1, 2, 6, 7 },
212     { 6, 0, 1, 2, 7 },
213     { 0, 1, 2, 3 },
214     { 0, 1, 1 },
215     { 0, 1 },
216 };
217
218 static const uint8_t run_len[7][16]={
219     {1,1},
220     {1,2,2},
221     {2,2,2,2},
222     {2,2,2,3,3},
223     {2,2,3,3,3,3},
224     {2,3,3,3,3,3,3},
225     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
226 };
227
228 static const uint8_t run_bits[7][16]={
229     {1,0},
230     {1,1,0},
231     {3,2,1,0},
232     {3,2,1,1,0},
233     {3,2,3,2,1,0},
234     {3,0,1,3,2,5,4},
235     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
236 };
237
238 static VLC coeff_token_vlc[4];
239 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
240 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
241
242 static VLC chroma_dc_coeff_token_vlc;
243 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
244 static const int chroma_dc_coeff_token_vlc_table_size = 256;
245
246 static VLC chroma422_dc_coeff_token_vlc;
247 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
248 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
249
250 static VLC total_zeros_vlc[15];
251 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
252 static const int total_zeros_vlc_tables_size = 512;
253
254 static VLC chroma_dc_total_zeros_vlc[3];
255 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
256 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
257
258 static VLC chroma422_dc_total_zeros_vlc[7];
259 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
260 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
261
262 static VLC run_vlc[6];
263 static VLC_TYPE run_vlc_tables[6][8][2];
264 static const int run_vlc_tables_size = 8;
265
266 static VLC run7_vlc;
267 static VLC_TYPE run7_vlc_table[96][2];
268 static const int run7_vlc_table_size = 96;
269
270 #define LEVEL_TAB_BITS 8
271 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
272
273 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
274 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
275 #define COEFF_TOKEN_VLC_BITS           8
276 #define TOTAL_ZEROS_VLC_BITS           9
277 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
278 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
279 #define RUN_VLC_BITS                   3
280 #define RUN7_VLC_BITS                  6
281
282 /**
283  * Get the predicted number of non-zero coefficients.
284  * @param n block index
285  */
286 static inline int pred_non_zero_count(H264Context *h, int n){
287     const int index8= scan8[n];
288     const int left= h->non_zero_count_cache[index8 - 1];
289     const int top = h->non_zero_count_cache[index8 - 8];
290     int i= left + top;
291
292     if(i<64) i= (i+1)>>1;
293
294     tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
295
296     return i&31;
297 }
298
299 static av_cold void init_cavlc_level_tab(void){
300     int suffix_length;
301     unsigned int i;
302
303     for(suffix_length=0; suffix_length<7; suffix_length++){
304         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
305             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
306
307             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
308                 int level_code = (prefix << suffix_length) +
309                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
310                 int mask = -(level_code&1);
311                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
312                 cavlc_level_tab[suffix_length][i][0]= level_code;
313                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
314             }else if(prefix + 1 <= LEVEL_TAB_BITS){
315                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
316                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
317             }else{
318                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
319                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
320             }
321         }
322     }
323 }
324
325 av_cold void ff_h264_decode_init_vlc(void){
326     static int done = 0;
327
328     if (!done) {
329         int i;
330         int offset;
331         done = 1;
332
333         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
334         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
335         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
336                  &chroma_dc_coeff_token_len [0], 1, 1,
337                  &chroma_dc_coeff_token_bits[0], 1, 1,
338                  INIT_VLC_USE_NEW_STATIC);
339
340         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
341         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
342         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
343                  &chroma422_dc_coeff_token_len [0], 1, 1,
344                  &chroma422_dc_coeff_token_bits[0], 1, 1,
345                  INIT_VLC_USE_NEW_STATIC);
346
347         offset = 0;
348         for(i=0; i<4; i++){
349             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
350             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
351             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
352                      &coeff_token_len [i][0], 1, 1,
353                      &coeff_token_bits[i][0], 1, 1,
354                      INIT_VLC_USE_NEW_STATIC);
355             offset += coeff_token_vlc_tables_size[i];
356         }
357         /*
358          * This is a one time safety check to make sure that
359          * the packed static coeff_token_vlc table sizes
360          * were initialized correctly.
361          */
362         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
363
364         for(i=0; i<3; i++){
365             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
366             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
367             init_vlc(&chroma_dc_total_zeros_vlc[i],
368                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
369                      &chroma_dc_total_zeros_len [i][0], 1, 1,
370                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
371                      INIT_VLC_USE_NEW_STATIC);
372         }
373
374         for(i=0; i<7; i++){
375             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
376             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
377             init_vlc(&chroma422_dc_total_zeros_vlc[i],
378                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
379                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
380                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
381                      INIT_VLC_USE_NEW_STATIC);
382         }
383
384         for(i=0; i<15; i++){
385             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
386             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
387             init_vlc(&total_zeros_vlc[i],
388                      TOTAL_ZEROS_VLC_BITS, 16,
389                      &total_zeros_len [i][0], 1, 1,
390                      &total_zeros_bits[i][0], 1, 1,
391                      INIT_VLC_USE_NEW_STATIC);
392         }
393
394         for(i=0; i<6; i++){
395             run_vlc[i].table = run_vlc_tables[i];
396             run_vlc[i].table_allocated = run_vlc_tables_size;
397             init_vlc(&run_vlc[i],
398                      RUN_VLC_BITS, 7,
399                      &run_len [i][0], 1, 1,
400                      &run_bits[i][0], 1, 1,
401                      INIT_VLC_USE_NEW_STATIC);
402         }
403         run7_vlc.table = run7_vlc_table,
404         run7_vlc.table_allocated = run7_vlc_table_size;
405         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
406                  &run_len [6][0], 1, 1,
407                  &run_bits[6][0], 1, 1,
408                  INIT_VLC_USE_NEW_STATIC);
409
410         init_cavlc_level_tab();
411     }
412 }
413
414 /**
415  *
416  */
417 static inline int get_level_prefix(GetBitContext *gb){
418     unsigned int buf;
419     int log;
420
421     OPEN_READER(re, gb);
422     UPDATE_CACHE(re, gb);
423     buf=GET_CACHE(re, gb);
424
425     log= 32 - av_log2(buf);
426 #ifdef TRACE
427     print_bin(buf>>(32-log), log);
428     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
429 #endif
430
431     LAST_SKIP_BITS(re, gb, log);
432     CLOSE_READER(re, gb);
433
434     return log-1;
435 }
436
437 /**
438  * Decode a residual block.
439  * @param n block index
440  * @param scantable scantable
441  * @param max_coeff number of coefficients in the block
442  * @return <0 if an error occurred
443  */
444 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
445     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
446     int level[16];
447     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
448
449     //FIXME put trailing_onex into the context
450
451     if(max_coeff <= 8){
452         if (max_coeff == 4)
453             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
454         else
455             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
456         total_coeff= coeff_token>>2;
457     }else{
458         if(n >= LUMA_DC_BLOCK_INDEX){
459             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
460             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
461             total_coeff= coeff_token>>2;
462         }else{
463             total_coeff= pred_non_zero_count(h, n);
464             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
465             total_coeff= coeff_token>>2;
466         }
467     }
468     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
469
470     //FIXME set last_non_zero?
471
472     if(total_coeff==0)
473         return 0;
474     if(total_coeff > (unsigned)max_coeff) {
475         av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
476         return -1;
477     }
478
479     trailing_ones= coeff_token&3;
480     tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
481     assert(total_coeff<=16);
482
483     i = show_bits(gb, 3);
484     skip_bits(gb, trailing_ones);
485     level[0] = 1-((i&4)>>1);
486     level[1] = 1-((i&2)   );
487     level[2] = 1-((i&1)<<1);
488
489     if(trailing_ones<total_coeff) {
490         int mask, prefix;
491         int suffix_length = total_coeff > 10 & trailing_ones < 3;
492         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
493         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
494
495         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
496         if(level_code >= 100){
497             prefix= level_code - 100;
498             if(prefix == LEVEL_TAB_BITS)
499                 prefix += get_level_prefix(gb);
500
501             //first coefficient has suffix_length equal to 0 or 1
502             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
503                 if(suffix_length)
504                     level_code= (prefix<<1) + get_bits1(gb); //part
505                 else
506                     level_code= prefix; //part
507             }else if(prefix==14){
508                 if(suffix_length)
509                     level_code= (prefix<<1) + get_bits1(gb); //part
510                 else
511                     level_code= prefix + get_bits(gb, 4); //part
512             }else{
513                 level_code= 30 + get_bits(gb, prefix-3); //part
514                 if(prefix>=16){
515                     if(prefix > 25+3){
516                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
517                         return -1;
518                     }
519                     level_code += (1<<(prefix-3))-4096;
520                 }
521             }
522
523             if(trailing_ones < 3) level_code += 2;
524
525             suffix_length = 2;
526             mask= -(level_code&1);
527             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
528         }else{
529             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
530
531             suffix_length = 1 + (level_code + 3U > 6U);
532             level[trailing_ones]= level_code;
533         }
534
535         //remaining coefficients have suffix_length > 0
536         for(i=trailing_ones+1;i<total_coeff;i++) {
537             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
538             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
539             level_code= cavlc_level_tab[suffix_length][bitsi][0];
540
541             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
542             if(level_code >= 100){
543                 prefix= level_code - 100;
544                 if(prefix == LEVEL_TAB_BITS){
545                     prefix += get_level_prefix(gb);
546                 }
547                 if(prefix<15){
548                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
549                 }else{
550                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
551                     if(prefix>=16)
552                         level_code += (1<<(prefix-3))-4096;
553                 }
554                 mask= -(level_code&1);
555                 level_code= (((2+level_code)>>1) ^ mask) - mask;
556             }
557             level[i]= level_code;
558             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
559         }
560     }
561
562     if(total_coeff == max_coeff)
563         zeros_left=0;
564     else{
565         if (max_coeff <= 8) {
566             if (max_coeff == 4)
567                 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
568                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
569             else
570                 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
571                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
572         } else {
573             zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
574         }
575     }
576
577 #define STORE_BLOCK(type) \
578     scantable += zeros_left + total_coeff - 1; \
579     if(n >= LUMA_DC_BLOCK_INDEX){ \
580         ((type*)block)[*scantable] = level[0]; \
581         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
582             if(zeros_left < 7) \
583                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
584             else \
585                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
586             zeros_left -= run_before; \
587             scantable -= 1 + run_before; \
588             ((type*)block)[*scantable]= level[i]; \
589         } \
590         for(;i<total_coeff;i++) { \
591             scantable--; \
592             ((type*)block)[*scantable]= level[i]; \
593         } \
594     }else{ \
595         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
596         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
597             if(zeros_left < 7) \
598                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
599             else \
600                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
601             zeros_left -= run_before; \
602             scantable -= 1 + run_before; \
603             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
604         } \
605         for(;i<total_coeff;i++) { \
606             scantable--; \
607             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
608         } \
609     }
610
611     if (zeros_left < 0) {
612         av_log(h->avctx, AV_LOG_ERROR,
613                "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
614         return AVERROR_INVALIDDATA;
615     }
616
617     if (h->pixel_shift) {
618         STORE_BLOCK(int32_t)
619     } else {
620         STORE_BLOCK(int16_t)
621     }
622
623     return 0;
624 }
625
626 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
627     int i4x4, i8x8;
628     int qscale = p == 0 ? h->qscale : h->chroma_qp[p-1];
629     if(IS_INTRA16x16(mb_type)){
630         AV_ZERO128(h->mb_luma_dc[p]+0);
631         AV_ZERO128(h->mb_luma_dc[p]+8);
632         AV_ZERO128(h->mb_luma_dc[p]+16);
633         AV_ZERO128(h->mb_luma_dc[p]+24);
634         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
635             return -1; //FIXME continue if partitioned and other return -1 too
636         }
637
638         assert((cbp&15) == 0 || (cbp&15) == 15);
639
640         if(cbp&15){
641             for(i8x8=0; i8x8<4; i8x8++){
642                 for(i4x4=0; i4x4<4; i4x4++){
643                     const int index= i4x4 + 4*i8x8 + p*16;
644                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
645                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
646                         return -1;
647                     }
648                 }
649             }
650             return 0xf;
651         }else{
652             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
653             return 0;
654         }
655     }else{
656         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
657         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
658         int new_cbp = 0;
659         for(i8x8=0; i8x8<4; i8x8++){
660             if(cbp & (1<<i8x8)){
661                 if(IS_8x8DCT(mb_type)){
662                     int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
663                     uint8_t *nnz;
664                     for(i4x4=0; i4x4<4; i4x4++){
665                         const int index= i4x4 + 4*i8x8 + p*16;
666                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
667                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
668                             return -1;
669                     }
670                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
671                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
672                     new_cbp |= !!nnz[0] << i8x8;
673                 }else{
674                     for(i4x4=0; i4x4<4; i4x4++){
675                         const int index= i4x4 + 4*i8x8 + p*16;
676                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
677                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
678                             return -1;
679                         }
680                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
681                     }
682                 }
683             }else{
684                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
685                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
686             }
687         }
688         return new_cbp;
689     }
690 }
691
692 int ff_h264_decode_mb_cavlc(H264Context *h){
693     int mb_xy;
694     int partition_count;
695     unsigned int mb_type, cbp;
696     int dct8x8_allowed= h->pps.transform_8x8_mode;
697     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
698     const int pixel_shift = h->pixel_shift;
699
700     mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
701
702     tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
703     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
704                 down the code */
705     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
706         if(h->mb_skip_run==-1)
707             h->mb_skip_run= get_ue_golomb(&h->gb);
708
709         if (h->mb_skip_run--) {
710             if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
711                 if(h->mb_skip_run==0)
712                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
713             }
714             decode_mb_skip(h);
715             return 0;
716         }
717     }
718     if (FRAME_MBAFF(h)) {
719         if( (h->mb_y&1) == 0 )
720             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
721     }
722
723     h->prev_mb_skipped= 0;
724
725     mb_type= get_ue_golomb(&h->gb);
726     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
727         if(mb_type < 23){
728             partition_count= b_mb_type_info[mb_type].partition_count;
729             mb_type=         b_mb_type_info[mb_type].type;
730         }else{
731             mb_type -= 23;
732             goto decode_intra_mb;
733         }
734     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
735         if(mb_type < 5){
736             partition_count= p_mb_type_info[mb_type].partition_count;
737             mb_type=         p_mb_type_info[mb_type].type;
738         }else{
739             mb_type -= 5;
740             goto decode_intra_mb;
741         }
742     }else{
743        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
744         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
745             mb_type--;
746 decode_intra_mb:
747         if(mb_type > 25){
748             av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
749             return -1;
750         }
751         partition_count=0;
752         cbp= i_mb_type_info[mb_type].cbp;
753         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
754         mb_type= i_mb_type_info[mb_type].type;
755     }
756
757     if(MB_FIELD(h))
758         mb_type |= MB_TYPE_INTERLACED;
759
760     h->slice_table[ mb_xy ]= h->slice_num;
761
762     if(IS_INTRA_PCM(mb_type)){
763         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
764                             h->sps.bit_depth_luma;
765
766         // We assume these blocks are very rare so we do not optimize it.
767         h->intra_pcm_ptr = align_get_bits(&h->gb);
768         skip_bits_long(&h->gb, mb_size);
769
770         // In deblocking, the quantizer is 0
771         h->cur_pic.qscale_table[mb_xy] = 0;
772         // All coeffs are present
773         memset(h->non_zero_count[mb_xy], 16, 48);
774
775         h->cur_pic.mb_type[mb_xy] = mb_type;
776         return 0;
777     }
778
779     fill_decode_neighbors(h, mb_type);
780     fill_decode_caches(h, mb_type);
781
782     //mb_pred
783     if(IS_INTRA(mb_type)){
784         int pred_mode;
785 //            init_top_left_availability(h);
786         if(IS_INTRA4x4(mb_type)){
787             int i;
788             int di = 1;
789             if(dct8x8_allowed && get_bits1(&h->gb)){
790                 mb_type |= MB_TYPE_8x8DCT;
791                 di = 4;
792             }
793
794 //                fill_intra4x4_pred_table(h);
795             for(i=0; i<16; i+=di){
796                 int mode= pred_intra_mode(h, i);
797
798                 if(!get_bits1(&h->gb)){
799                     const int rem_mode= get_bits(&h->gb, 3);
800                     mode = rem_mode + (rem_mode >= mode);
801                 }
802
803                 if(di==4)
804                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
805                 else
806                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
807             }
808             write_back_intra_pred_mode(h);
809             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
810                 return -1;
811         }else{
812             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
813             if(h->intra16x16_pred_mode < 0)
814                 return -1;
815         }
816         if(decode_chroma){
817             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
818             if(pred_mode < 0)
819                 return -1;
820             h->chroma_pred_mode= pred_mode;
821         } else {
822             h->chroma_pred_mode = DC_128_PRED8x8;
823         }
824     }else if(partition_count==4){
825         int i, j, sub_partition_count[4], list, ref[2][4];
826
827         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
828             for(i=0; i<4; i++){
829                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
830                 if(h->sub_mb_type[i] >=13){
831                     av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
832                     return -1;
833                 }
834                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
835                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
836             }
837             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
838                 ff_h264_pred_direct_motion(h, &mb_type);
839                 h->ref_cache[0][scan8[4]] =
840                 h->ref_cache[1][scan8[4]] =
841                 h->ref_cache[0][scan8[12]] =
842                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
843             }
844         }else{
845             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
846             for(i=0; i<4; i++){
847                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
848                 if(h->sub_mb_type[i] >=4){
849                     av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
850                     return -1;
851                 }
852                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
853                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
854             }
855         }
856
857         for(list=0; list<h->list_count; list++){
858             int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF(h);
859             for(i=0; i<4; i++){
860                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
861                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
862                     unsigned int tmp;
863                     if(ref_count == 1){
864                         tmp= 0;
865                     }else if(ref_count == 2){
866                         tmp= get_bits1(&h->gb)^1;
867                     }else{
868                         tmp= get_ue_golomb_31(&h->gb);
869                         if(tmp>=ref_count){
870                             av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
871                             return -1;
872                         }
873                     }
874                     ref[list][i]= tmp;
875                 }else{
876                  //FIXME
877                     ref[list][i] = -1;
878                 }
879             }
880         }
881
882         if(dct8x8_allowed)
883             dct8x8_allowed = get_dct8x8_allowed(h);
884
885         for(list=0; list<h->list_count; list++){
886             for(i=0; i<4; i++){
887                 if(IS_DIRECT(h->sub_mb_type[i])) {
888                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
889                     continue;
890                 }
891                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
892                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
893
894                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
895                     const int sub_mb_type= h->sub_mb_type[i];
896                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
897                     for(j=0; j<sub_partition_count[i]; j++){
898                         int mx, my;
899                         const int index= 4*i + block_width*j;
900                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
901                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
902                         mx += get_se_golomb(&h->gb);
903                         my += get_se_golomb(&h->gb);
904                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
905
906                         if(IS_SUB_8X8(sub_mb_type)){
907                             mv_cache[ 1 ][0]=
908                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
909                             mv_cache[ 1 ][1]=
910                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
911                         }else if(IS_SUB_8X4(sub_mb_type)){
912                             mv_cache[ 1 ][0]= mx;
913                             mv_cache[ 1 ][1]= my;
914                         }else if(IS_SUB_4X8(sub_mb_type)){
915                             mv_cache[ 8 ][0]= mx;
916                             mv_cache[ 8 ][1]= my;
917                         }
918                         mv_cache[ 0 ][0]= mx;
919                         mv_cache[ 0 ][1]= my;
920                     }
921                 }else{
922                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
923                     p[0] = p[1]=
924                     p[8] = p[9]= 0;
925                 }
926             }
927         }
928     }else if(IS_DIRECT(mb_type)){
929         ff_h264_pred_direct_motion(h, &mb_type);
930         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
931     }else{
932         int list, mx, my, i;
933          //FIXME we should set ref_idx_l? to 0 if we use that later ...
934         if(IS_16X16(mb_type)){
935             for(list=0; list<h->list_count; list++){
936                     unsigned int val;
937                     if(IS_DIR(mb_type, 0, list)){
938                         int rc = h->ref_count[list] << MB_MBAFF(h);
939                         if (rc == 1) {
940                             val= 0;
941                         } else if (rc == 2) {
942                             val= get_bits1(&h->gb)^1;
943                         }else{
944                             val= get_ue_golomb_31(&h->gb);
945                             if (val >= rc) {
946                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
947                                 return -1;
948                             }
949                         }
950                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
951                     }
952             }
953             for(list=0; list<h->list_count; list++){
954                 if(IS_DIR(mb_type, 0, list)){
955                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
956                     mx += get_se_golomb(&h->gb);
957                     my += get_se_golomb(&h->gb);
958                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
959
960                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
961                 }
962             }
963         }
964         else if(IS_16X8(mb_type)){
965             for(list=0; list<h->list_count; list++){
966                     for(i=0; i<2; i++){
967                         unsigned int val;
968                         if(IS_DIR(mb_type, i, list)){
969                             int rc = h->ref_count[list] << MB_MBAFF(h);
970                             if (rc == 1) {
971                                 val= 0;
972                             } else if (rc == 2) {
973                                 val= get_bits1(&h->gb)^1;
974                             }else{
975                                 val= get_ue_golomb_31(&h->gb);
976                                 if (val >= rc) {
977                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
978                                     return -1;
979                                 }
980                             }
981                         }else
982                             val= LIST_NOT_USED&0xFF;
983                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
984                     }
985             }
986             for(list=0; list<h->list_count; list++){
987                 for(i=0; i<2; i++){
988                     unsigned int val;
989                     if(IS_DIR(mb_type, i, list)){
990                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
991                         mx += get_se_golomb(&h->gb);
992                         my += get_se_golomb(&h->gb);
993                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
994
995                         val= pack16to32(mx,my);
996                     }else
997                         val=0;
998                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
999                 }
1000             }
1001         }else{
1002             assert(IS_8X16(mb_type));
1003             for(list=0; list<h->list_count; list++){
1004                     for(i=0; i<2; i++){
1005                         unsigned int val;
1006                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1007                             int rc = h->ref_count[list] << MB_MBAFF(h);
1008                             if (rc == 1) {
1009                                 val= 0;
1010                             } else if (rc == 2) {
1011                                 val= get_bits1(&h->gb)^1;
1012                             }else{
1013                                 val= get_ue_golomb_31(&h->gb);
1014                                 if (val >= rc) {
1015                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1016                                     return -1;
1017                                 }
1018                             }
1019                         }else
1020                             val= LIST_NOT_USED&0xFF;
1021                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1022                     }
1023             }
1024             for(list=0; list<h->list_count; list++){
1025                 for(i=0; i<2; i++){
1026                     unsigned int val;
1027                     if(IS_DIR(mb_type, i, list)){
1028                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1029                         mx += get_se_golomb(&h->gb);
1030                         my += get_se_golomb(&h->gb);
1031                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1032
1033                         val= pack16to32(mx,my);
1034                     }else
1035                         val=0;
1036                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1037                 }
1038             }
1039         }
1040     }
1041
1042     if(IS_INTER(mb_type))
1043         write_back_motion(h, mb_type);
1044
1045     if(!IS_INTRA16x16(mb_type)){
1046         cbp= get_ue_golomb(&h->gb);
1047
1048         if(decode_chroma){
1049             if(cbp > 47){
1050                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1051                 return -1;
1052             }
1053             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1054             else                     cbp= golomb_to_inter_cbp   [cbp];
1055         }else{
1056             if(cbp > 15){
1057                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1058                 return -1;
1059             }
1060             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1061             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1062         }
1063     }
1064
1065     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1066         mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1067     }
1068     h->cbp=
1069     h->cbp_table[mb_xy]= cbp;
1070     h->cur_pic.mb_type[mb_xy] = mb_type;
1071
1072     if(cbp || IS_INTRA16x16(mb_type)){
1073         int i4x4, i8x8, chroma_idx;
1074         int dquant;
1075         int ret;
1076         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1077         const uint8_t *scan, *scan8x8;
1078         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1079
1080         if(IS_INTERLACED(mb_type)){
1081             scan8x8= h->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1082             scan= h->qscale ? h->field_scan : h->field_scan_q0;
1083         }else{
1084             scan8x8= h->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1085             scan= h->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1086         }
1087
1088         dquant= get_se_golomb(&h->gb);
1089
1090         h->qscale += dquant;
1091
1092         if(((unsigned)h->qscale) > max_qp){
1093             if(h->qscale<0) h->qscale+= max_qp+1;
1094             else            h->qscale-= max_qp+1;
1095             if(((unsigned)h->qscale) > max_qp){
1096                 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1097                 return -1;
1098             }
1099         }
1100
1101         h->chroma_qp[0]= get_chroma_qp(h, 0, h->qscale);
1102         h->chroma_qp[1]= get_chroma_qp(h, 1, h->qscale);
1103
1104         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1105             return -1;
1106         }
1107         h->cbp_table[mb_xy] |= ret << 12;
1108         if (CHROMA444(h)) {
1109             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1110                 return -1;
1111             }
1112             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1113                 return -1;
1114             }
1115         } else if (CHROMA422(h)) {
1116             if(cbp&0x30){
1117                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1118                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1119                                         CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1120                                         NULL, 8) < 0) {
1121                         return -1;
1122                     }
1123             }
1124
1125             if(cbp&0x20){
1126                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1127                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1128                     int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1129                     for (i8x8 = 0; i8x8 < 2; i8x8++) {
1130                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
1131                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1132                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1133                                 return -1;
1134                             mb += 16 << pixel_shift;
1135                         }
1136                     }
1137                 }
1138             }else{
1139                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1140                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1141             }
1142         } else /* yuv420 */ {
1143             if(cbp&0x30){
1144                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1145                     if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1146                         return -1;
1147                     }
1148             }
1149
1150             if(cbp&0x20){
1151                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1152                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1153                     for(i4x4=0; i4x4<4; i4x4++){
1154                         const int index= 16 + 16*chroma_idx + i4x4;
1155                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1156                             return -1;
1157                         }
1158                     }
1159                 }
1160             }else{
1161                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1162                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1163             }
1164         }
1165     }else{
1166         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1167         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1168         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1169     }
1170     h->cur_pic.qscale_table[mb_xy] = h->qscale;
1171     write_back_non_zero_count(h);
1172
1173     return 0;
1174 }