]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282
283 /**
284  * gets the predicted number of non-zero coefficients.
285  * @param n block index
286  */
287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292
293     if(i<64) i= (i+1)>>1;
294
295     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296
297     return i&31;
298 }
299
300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length, mask;
302     unsigned int i;
303
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
308
309             mask= -(level_code&1);
310             level_code= (((2+level_code)>>1) ^ mask) - mask;
311             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
312                 cavlc_level_tab[suffix_length][i][0]= level_code;
313                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
314             }else if(prefix + 1 <= LEVEL_TAB_BITS){
315                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
316                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
317             }else{
318                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
319                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
320             }
321         }
322     }
323 }
324
325 av_cold void ff_h264_decode_init_vlc(void){
326     static int done = 0;
327
328     if (!done) {
329         int i;
330         int offset;
331         done = 1;
332
333         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
334         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
335         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
336                  &chroma_dc_coeff_token_len [0], 1, 1,
337                  &chroma_dc_coeff_token_bits[0], 1, 1,
338                  INIT_VLC_USE_NEW_STATIC);
339
340         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
341         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
342         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
343                  &chroma422_dc_coeff_token_len [0], 1, 1,
344                  &chroma422_dc_coeff_token_bits[0], 1, 1,
345                  INIT_VLC_USE_NEW_STATIC);
346
347         offset = 0;
348         for(i=0; i<4; i++){
349             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
350             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
351             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
352                      &coeff_token_len [i][0], 1, 1,
353                      &coeff_token_bits[i][0], 1, 1,
354                      INIT_VLC_USE_NEW_STATIC);
355             offset += coeff_token_vlc_tables_size[i];
356         }
357         /*
358          * This is a one time safety check to make sure that
359          * the packed static coeff_token_vlc table sizes
360          * were initialized correctly.
361          */
362         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
363
364         for(i=0; i<3; i++){
365             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
366             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
367             init_vlc(&chroma_dc_total_zeros_vlc[i],
368                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
369                      &chroma_dc_total_zeros_len [i][0], 1, 1,
370                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
371                      INIT_VLC_USE_NEW_STATIC);
372         }
373
374         for(i=0; i<7; i++){
375             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
376             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
377             init_vlc(&chroma422_dc_total_zeros_vlc[i],
378                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
379                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
380                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
381                      INIT_VLC_USE_NEW_STATIC);
382         }
383
384         for(i=0; i<15; i++){
385             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
386             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
387             init_vlc(&total_zeros_vlc[i],
388                      TOTAL_ZEROS_VLC_BITS, 16,
389                      &total_zeros_len [i][0], 1, 1,
390                      &total_zeros_bits[i][0], 1, 1,
391                      INIT_VLC_USE_NEW_STATIC);
392         }
393
394         for(i=0; i<6; i++){
395             run_vlc[i].table = run_vlc_tables[i];
396             run_vlc[i].table_allocated = run_vlc_tables_size;
397             init_vlc(&run_vlc[i],
398                      RUN_VLC_BITS, 7,
399                      &run_len [i][0], 1, 1,
400                      &run_bits[i][0], 1, 1,
401                      INIT_VLC_USE_NEW_STATIC);
402         }
403         run7_vlc.table = run7_vlc_table,
404         run7_vlc.table_allocated = run7_vlc_table_size;
405         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
406                  &run_len [6][0], 1, 1,
407                  &run_bits[6][0], 1, 1,
408                  INIT_VLC_USE_NEW_STATIC);
409
410         init_cavlc_level_tab();
411     }
412 }
413
414 /**
415  *
416  */
417 static inline int get_level_prefix(GetBitContext *gb){
418     unsigned int buf;
419     int log;
420
421     OPEN_READER(re, gb);
422     UPDATE_CACHE(re, gb);
423     buf=GET_CACHE(re, gb);
424
425     log= 32 - av_log2(buf);
426 #ifdef TRACE
427     print_bin(buf>>(32-log), log);
428     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
429 #endif
430
431     LAST_SKIP_BITS(re, gb, log);
432     CLOSE_READER(re, gb);
433
434     return log-1;
435 }
436
437 /**
438  * decodes a residual block.
439  * @param n block index
440  * @param scantable scantable
441  * @param max_coeff number of coefficients in the block
442  * @return <0 if an error occurred
443  */
444 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
445     MpegEncContext * const s = &h->s;
446     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
447     int level[16];
448     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
449
450     //FIXME put trailing_onex into the context
451
452     if(max_coeff <= 8){
453         if (max_coeff == 4)
454             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
455         else
456             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
457         total_coeff= coeff_token>>2;
458     }else{
459         if(n >= LUMA_DC_BLOCK_INDEX){
460             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
461             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
462             total_coeff= coeff_token>>2;
463         }else{
464             total_coeff= pred_non_zero_count(h, n);
465             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466             total_coeff= coeff_token>>2;
467         }
468     }
469     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
470
471     //FIXME set last_non_zero?
472
473     if(total_coeff==0)
474         return 0;
475     if(total_coeff > (unsigned)max_coeff) {
476         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
477         return -1;
478     }
479
480     trailing_ones= coeff_token&3;
481     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
482     assert(total_coeff<=16);
483
484     i = show_bits(gb, 3);
485     skip_bits(gb, trailing_ones);
486     level[0] = 1-((i&4)>>1);
487     level[1] = 1-((i&2)   );
488     level[2] = 1-((i&1)<<1);
489
490     if(trailing_ones<total_coeff) {
491         int mask, prefix;
492         int suffix_length = total_coeff > 10 & trailing_ones < 3;
493         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
494         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
495
496         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
497         if(level_code >= 100){
498             prefix= level_code - 100;
499             if(prefix == LEVEL_TAB_BITS)
500                 prefix += get_level_prefix(gb);
501
502             //first coefficient has suffix_length equal to 0 or 1
503             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
504                 if(suffix_length)
505                     level_code= (prefix<<1) + get_bits1(gb); //part
506                 else
507                     level_code= prefix; //part
508             }else if(prefix==14){
509                 if(suffix_length)
510                     level_code= (prefix<<1) + get_bits1(gb); //part
511                 else
512                     level_code= prefix + get_bits(gb, 4); //part
513             }else{
514                 level_code= 30 + get_bits(gb, prefix-3); //part
515                 if(prefix>=16){
516                     if(prefix > 25+3){
517                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
518                         return -1;
519                     }
520                     level_code += (1<<(prefix-3))-4096;
521                 }
522             }
523
524             if(trailing_ones < 3) level_code += 2;
525
526             suffix_length = 2;
527             mask= -(level_code&1);
528             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
529         }else{
530             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
531
532             suffix_length = 1 + (level_code + 3U > 6U);
533             level[trailing_ones]= level_code;
534         }
535
536         //remaining coefficients have suffix_length > 0
537         for(i=trailing_ones+1;i<total_coeff;i++) {
538             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
539             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
540             level_code= cavlc_level_tab[suffix_length][bitsi][0];
541
542             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
543             if(level_code >= 100){
544                 prefix= level_code - 100;
545                 if(prefix == LEVEL_TAB_BITS){
546                     prefix += get_level_prefix(gb);
547                 }
548                 if(prefix<15){
549                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
550                 }else{
551                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
552                     if(prefix>=16)
553                         level_code += (1<<(prefix-3))-4096;
554                 }
555                 mask= -(level_code&1);
556                 level_code= (((2+level_code)>>1) ^ mask) - mask;
557             }
558             level[i]= level_code;
559             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
560         }
561     }
562
563     if(total_coeff == max_coeff)
564         zeros_left=0;
565     else{
566         if (max_coeff <= 8) {
567             if (max_coeff == 4)
568                 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
569                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
570             else
571                 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
572                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
573         } else {
574             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
575         }
576     }
577
578 #define STORE_BLOCK(type) \
579     scantable += zeros_left + total_coeff - 1; \
580     if(n >= LUMA_DC_BLOCK_INDEX){ \
581         ((type*)block)[*scantable] = level[0]; \
582         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
583             if(zeros_left < 7) \
584                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
585             else \
586                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
587             zeros_left -= run_before; \
588             scantable -= 1 + run_before; \
589             ((type*)block)[*scantable]= level[i]; \
590         } \
591         for(;i<total_coeff;i++) { \
592             scantable--; \
593             ((type*)block)[*scantable]= level[i]; \
594         } \
595     }else{ \
596         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
597         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
598             if(zeros_left < 7) \
599                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
600             else \
601                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
602             zeros_left -= run_before; \
603             scantable -= 1 + run_before; \
604             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
605         } \
606         for(;i<total_coeff;i++) { \
607             scantable--; \
608             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
609         } \
610     }
611
612     if (h->pixel_shift) {
613         STORE_BLOCK(int32_t)
614     } else {
615         STORE_BLOCK(int16_t)
616     }
617
618     if(zeros_left<0){
619         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
620         return -1;
621     }
622
623     return 0;
624 }
625
626 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
627     int i4x4, i8x8;
628     MpegEncContext * const s = &h->s;
629     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
630     if(IS_INTRA16x16(mb_type)){
631         AV_ZERO128(h->mb_luma_dc[p]+0);
632         AV_ZERO128(h->mb_luma_dc[p]+8);
633         AV_ZERO128(h->mb_luma_dc[p]+16);
634         AV_ZERO128(h->mb_luma_dc[p]+24);
635         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
636             return -1; //FIXME continue if partitioned and other return -1 too
637         }
638
639         assert((cbp&15) == 0 || (cbp&15) == 15);
640
641         if(cbp&15){
642             for(i8x8=0; i8x8<4; i8x8++){
643                 for(i4x4=0; i4x4<4; i4x4++){
644                     const int index= i4x4 + 4*i8x8 + p*16;
645                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
646                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
647                         return -1;
648                     }
649                 }
650             }
651             return 0xf;
652         }else{
653             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
654             return 0;
655         }
656     }else{
657         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
658         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
659         int new_cbp = 0;
660         for(i8x8=0; i8x8<4; i8x8++){
661             if(cbp & (1<<i8x8)){
662                 if(IS_8x8DCT(mb_type)){
663                     DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
664                     uint8_t *nnz;
665                     for(i4x4=0; i4x4<4; i4x4++){
666                         const int index= i4x4 + 4*i8x8 + p*16;
667                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
668                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
669                             return -1;
670                     }
671                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
672                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
673                     new_cbp |= !!nnz[0] << i8x8;
674                 }else{
675                     for(i4x4=0; i4x4<4; i4x4++){
676                         const int index= i4x4 + 4*i8x8 + p*16;
677                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
678                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
679                             return -1;
680                         }
681                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
682                     }
683                 }
684             }else{
685                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
686                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
687             }
688         }
689         return new_cbp;
690     }
691 }
692
693 int ff_h264_decode_mb_cavlc(H264Context *h){
694     MpegEncContext * const s = &h->s;
695     int mb_xy;
696     int partition_count;
697     unsigned int mb_type, cbp;
698     int dct8x8_allowed= h->pps.transform_8x8_mode;
699     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
700     const int pixel_shift = h->pixel_shift;
701
702     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
703
704     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
705     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
706                 down the code */
707     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
708         if(s->mb_skip_run==-1)
709             s->mb_skip_run= get_ue_golomb(&s->gb);
710
711         if (s->mb_skip_run--) {
712             if(FRAME_MBAFF && (s->mb_y&1) == 0){
713                 if(s->mb_skip_run==0)
714                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
715             }
716             decode_mb_skip(h);
717             return 0;
718         }
719     }
720     if(FRAME_MBAFF){
721         if( (s->mb_y&1) == 0 )
722             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
723     }
724
725     h->prev_mb_skipped= 0;
726
727     mb_type= get_ue_golomb(&s->gb);
728     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
729         if(mb_type < 23){
730             partition_count= b_mb_type_info[mb_type].partition_count;
731             mb_type=         b_mb_type_info[mb_type].type;
732         }else{
733             mb_type -= 23;
734             goto decode_intra_mb;
735         }
736     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
737         if(mb_type < 5){
738             partition_count= p_mb_type_info[mb_type].partition_count;
739             mb_type=         p_mb_type_info[mb_type].type;
740         }else{
741             mb_type -= 5;
742             goto decode_intra_mb;
743         }
744     }else{
745        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
746         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
747             mb_type--;
748 decode_intra_mb:
749         if(mb_type > 25){
750             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
751             return -1;
752         }
753         partition_count=0;
754         cbp= i_mb_type_info[mb_type].cbp;
755         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
756         mb_type= i_mb_type_info[mb_type].type;
757     }
758
759     if(MB_FIELD)
760         mb_type |= MB_TYPE_INTERLACED;
761
762     h->slice_table[ mb_xy ]= h->slice_num;
763
764     if(IS_INTRA_PCM(mb_type)){
765         unsigned int x;
766         static const uint16_t mb_sizes[4] = {256,384,512,768};
767         const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
768
769         // We assume these blocks are very rare so we do not optimize it.
770         align_get_bits(&s->gb);
771
772         // The pixels are stored in the same order as levels in h->mb array.
773         for(x=0; x < mb_size; x++){
774             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
775         }
776
777         // In deblocking, the quantizer is 0
778         s->current_picture.f.qscale_table[mb_xy] = 0;
779         // All coeffs are present
780         memset(h->non_zero_count[mb_xy], 16, 48);
781
782         s->current_picture.f.mb_type[mb_xy] = mb_type;
783         return 0;
784     }
785
786     if(MB_MBAFF){
787         h->ref_count[0] <<= 1;
788         h->ref_count[1] <<= 1;
789     }
790
791     fill_decode_neighbors(h, mb_type);
792     fill_decode_caches(h, mb_type);
793
794     //mb_pred
795     if(IS_INTRA(mb_type)){
796         int pred_mode;
797 //            init_top_left_availability(h);
798         if(IS_INTRA4x4(mb_type)){
799             int i;
800             int di = 1;
801             if(dct8x8_allowed && get_bits1(&s->gb)){
802                 mb_type |= MB_TYPE_8x8DCT;
803                 di = 4;
804             }
805
806 //                fill_intra4x4_pred_table(h);
807             for(i=0; i<16; i+=di){
808                 int mode= pred_intra_mode(h, i);
809
810                 if(!get_bits1(&s->gb)){
811                     const int rem_mode= get_bits(&s->gb, 3);
812                     mode = rem_mode + (rem_mode >= mode);
813                 }
814
815                 if(di==4)
816                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
817                 else
818                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
819             }
820             write_back_intra_pred_mode(h);
821             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
822                 return -1;
823         }else{
824             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
825             if(h->intra16x16_pred_mode < 0)
826                 return -1;
827         }
828         if(decode_chroma){
829             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
830             if(pred_mode < 0)
831                 return -1;
832             h->chroma_pred_mode= pred_mode;
833         } else {
834             h->chroma_pred_mode = DC_128_PRED8x8;
835         }
836     }else if(partition_count==4){
837         int i, j, sub_partition_count[4], list, ref[2][4];
838
839         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
840             for(i=0; i<4; i++){
841                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
842                 if(h->sub_mb_type[i] >=13){
843                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
844                     return -1;
845                 }
846                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
847                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
848             }
849             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
850                 ff_h264_pred_direct_motion(h, &mb_type);
851                 h->ref_cache[0][scan8[4]] =
852                 h->ref_cache[1][scan8[4]] =
853                 h->ref_cache[0][scan8[12]] =
854                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
855             }
856         }else{
857             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
858             for(i=0; i<4; i++){
859                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
860                 if(h->sub_mb_type[i] >=4){
861                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
862                     return -1;
863                 }
864                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
865                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
866             }
867         }
868
869         for(list=0; list<h->list_count; list++){
870             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
871             for(i=0; i<4; i++){
872                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
873                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
874                     unsigned int tmp;
875                     if(ref_count == 1){
876                         tmp= 0;
877                     }else if(ref_count == 2){
878                         tmp= get_bits1(&s->gb)^1;
879                     }else{
880                         tmp= get_ue_golomb_31(&s->gb);
881                         if(tmp>=ref_count){
882                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
883                             return -1;
884                         }
885                     }
886                     ref[list][i]= tmp;
887                 }else{
888                  //FIXME
889                     ref[list][i] = -1;
890                 }
891             }
892         }
893
894         if(dct8x8_allowed)
895             dct8x8_allowed = get_dct8x8_allowed(h);
896
897         for(list=0; list<h->list_count; list++){
898             for(i=0; i<4; i++){
899                 if(IS_DIRECT(h->sub_mb_type[i])) {
900                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
901                     continue;
902                 }
903                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
904                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
905
906                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
907                     const int sub_mb_type= h->sub_mb_type[i];
908                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
909                     for(j=0; j<sub_partition_count[i]; j++){
910                         int mx, my;
911                         const int index= 4*i + block_width*j;
912                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
913                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
914                         mx += get_se_golomb(&s->gb);
915                         my += get_se_golomb(&s->gb);
916                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
917
918                         if(IS_SUB_8X8(sub_mb_type)){
919                             mv_cache[ 1 ][0]=
920                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
921                             mv_cache[ 1 ][1]=
922                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
923                         }else if(IS_SUB_8X4(sub_mb_type)){
924                             mv_cache[ 1 ][0]= mx;
925                             mv_cache[ 1 ][1]= my;
926                         }else if(IS_SUB_4X8(sub_mb_type)){
927                             mv_cache[ 8 ][0]= mx;
928                             mv_cache[ 8 ][1]= my;
929                         }
930                         mv_cache[ 0 ][0]= mx;
931                         mv_cache[ 0 ][1]= my;
932                     }
933                 }else{
934                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
935                     p[0] = p[1]=
936                     p[8] = p[9]= 0;
937                 }
938             }
939         }
940     }else if(IS_DIRECT(mb_type)){
941         ff_h264_pred_direct_motion(h, &mb_type);
942         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
943     }else{
944         int list, mx, my, i;
945          //FIXME we should set ref_idx_l? to 0 if we use that later ...
946         if(IS_16X16(mb_type)){
947             for(list=0; list<h->list_count; list++){
948                     unsigned int val;
949                     if(IS_DIR(mb_type, 0, list)){
950                         if(h->ref_count[list]==1){
951                             val= 0;
952                         }else if(h->ref_count[list]==2){
953                             val= get_bits1(&s->gb)^1;
954                         }else{
955                             val= get_ue_golomb_31(&s->gb);
956                             if(val >= h->ref_count[list]){
957                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
958                                 return -1;
959                             }
960                         }
961                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
962                     }
963             }
964             for(list=0; list<h->list_count; list++){
965                 if(IS_DIR(mb_type, 0, list)){
966                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
967                     mx += get_se_golomb(&s->gb);
968                     my += get_se_golomb(&s->gb);
969                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
970
971                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
972                 }
973             }
974         }
975         else if(IS_16X8(mb_type)){
976             for(list=0; list<h->list_count; list++){
977                     for(i=0; i<2; i++){
978                         unsigned int val;
979                         if(IS_DIR(mb_type, i, list)){
980                             if(h->ref_count[list] == 1){
981                                 val= 0;
982                             }else if(h->ref_count[list] == 2){
983                                 val= get_bits1(&s->gb)^1;
984                             }else{
985                                 val= get_ue_golomb_31(&s->gb);
986                                 if(val >= h->ref_count[list]){
987                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
988                                     return -1;
989                                 }
990                             }
991                         }else
992                             val= LIST_NOT_USED&0xFF;
993                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
994                     }
995             }
996             for(list=0; list<h->list_count; list++){
997                 for(i=0; i<2; i++){
998                     unsigned int val;
999                     if(IS_DIR(mb_type, i, list)){
1000                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1001                         mx += get_se_golomb(&s->gb);
1002                         my += get_se_golomb(&s->gb);
1003                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1004
1005                         val= pack16to32(mx,my);
1006                     }else
1007                         val=0;
1008                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1009                 }
1010             }
1011         }else{
1012             assert(IS_8X16(mb_type));
1013             for(list=0; list<h->list_count; list++){
1014                     for(i=0; i<2; i++){
1015                         unsigned int val;
1016                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1017                             if(h->ref_count[list]==1){
1018                                 val= 0;
1019                             }else if(h->ref_count[list]==2){
1020                                 val= get_bits1(&s->gb)^1;
1021                             }else{
1022                                 val= get_ue_golomb_31(&s->gb);
1023                                 if(val >= h->ref_count[list]){
1024                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1025                                     return -1;
1026                                 }
1027                             }
1028                         }else
1029                             val= LIST_NOT_USED&0xFF;
1030                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1031                     }
1032             }
1033             for(list=0; list<h->list_count; list++){
1034                 for(i=0; i<2; i++){
1035                     unsigned int val;
1036                     if(IS_DIR(mb_type, i, list)){
1037                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1038                         mx += get_se_golomb(&s->gb);
1039                         my += get_se_golomb(&s->gb);
1040                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1041
1042                         val= pack16to32(mx,my);
1043                     }else
1044                         val=0;
1045                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1046                 }
1047             }
1048         }
1049     }
1050
1051     if(IS_INTER(mb_type))
1052         write_back_motion(h, mb_type);
1053
1054     if(!IS_INTRA16x16(mb_type)){
1055         cbp= get_ue_golomb(&s->gb);
1056
1057         if(decode_chroma){
1058             if(cbp > 47){
1059                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1060                 return -1;
1061             }
1062             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1063             else                     cbp= golomb_to_inter_cbp   [cbp];
1064         }else{
1065             if(cbp > 15){
1066                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1067                 return -1;
1068             }
1069             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1070             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1071         }
1072     }
1073
1074     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1075         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1076     }
1077     h->cbp=
1078     h->cbp_table[mb_xy]= cbp;
1079     s->current_picture.f.mb_type[mb_xy] = mb_type;
1080
1081     if(cbp || IS_INTRA16x16(mb_type)){
1082         int i4x4, i8x8, chroma_idx;
1083         int dquant;
1084         int ret;
1085         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1086         const uint8_t *scan, *scan8x8;
1087         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1088
1089         if(IS_INTERLACED(mb_type)){
1090             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1091             scan= s->qscale ? h->field_scan : h->field_scan_q0;
1092         }else{
1093             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1094             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1095         }
1096
1097         dquant= get_se_golomb(&s->gb);
1098
1099         s->qscale += dquant;
1100
1101         if(((unsigned)s->qscale) > max_qp){
1102             if(s->qscale<0) s->qscale+= max_qp+1;
1103             else            s->qscale-= max_qp+1;
1104             if(((unsigned)s->qscale) > max_qp){
1105                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1106                 return -1;
1107             }
1108         }
1109
1110         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1111         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1112
1113         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1114             return -1;
1115         }
1116         h->cbp_table[mb_xy] |= ret << 12;
1117         if(CHROMA444){
1118             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1119                 return -1;
1120             }
1121             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1122                 return -1;
1123             }
1124         } else {
1125             const int num_c8x8 = h->sps.chroma_format_idc;
1126
1127             if(cbp&0x30){
1128                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1129                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1130                                         CHROMA_DC_BLOCK_INDEX+chroma_idx,
1131                                         CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1132                                         NULL, 4*num_c8x8) < 0) {
1133                         return -1;
1134                     }
1135             }
1136
1137             if(cbp&0x20){
1138                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1139                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1140                     DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1141                     for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1142                         for (i4x4=0; i4x4<4; i4x4++) {
1143                             const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1144                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1145                                 return -1;
1146                             mb += 16<<pixel_shift;
1147                         }
1148                     }
1149                 }
1150             }else{
1151                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1152                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1153             }
1154         }
1155     }else{
1156         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1157         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1158         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1159     }
1160     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1161     write_back_non_zero_count(h);
1162
1163     if(MB_MBAFF){
1164         h->ref_count[0] >>= 1;
1165         h->ref_count[1] >>= 1;
1166     }
1167
1168     return 0;
1169 }
1170