]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
libcdio: support recent cdio-paranoia
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282
283 /**
284  * Get the predicted number of non-zero coefficients.
285  * @param n block index
286  */
287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292
293     if(i<64) i= (i+1)>>1;
294
295     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296
297     return i&31;
298 }
299
300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length;
302     unsigned int i;
303
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307
308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309                 int level_code = (prefix << suffix_length) +
310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311                 int mask = -(level_code&1);
312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313                 cavlc_level_tab[suffix_length][i][0]= level_code;
314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318             }else{
319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321             }
322         }
323     }
324 }
325
326 av_cold void ff_h264_decode_init_vlc(void){
327     static int done = 0;
328
329     if (!done) {
330         int i;
331         int offset;
332         done = 1;
333
334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337                  &chroma_dc_coeff_token_len [0], 1, 1,
338                  &chroma_dc_coeff_token_bits[0], 1, 1,
339                  INIT_VLC_USE_NEW_STATIC);
340
341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344                  &chroma422_dc_coeff_token_len [0], 1, 1,
345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
346                  INIT_VLC_USE_NEW_STATIC);
347
348         offset = 0;
349         for(i=0; i<4; i++){
350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353                      &coeff_token_len [i][0], 1, 1,
354                      &coeff_token_bits[i][0], 1, 1,
355                      INIT_VLC_USE_NEW_STATIC);
356             offset += coeff_token_vlc_tables_size[i];
357         }
358         /*
359          * This is a one time safety check to make sure that
360          * the packed static coeff_token_vlc table sizes
361          * were initialized correctly.
362          */
363         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364
365         for(i=0; i<3; i++){
366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368             init_vlc(&chroma_dc_total_zeros_vlc[i],
369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
372                      INIT_VLC_USE_NEW_STATIC);
373         }
374
375         for(i=0; i<7; i++){
376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382                      INIT_VLC_USE_NEW_STATIC);
383         }
384
385         for(i=0; i<15; i++){
386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388             init_vlc(&total_zeros_vlc[i],
389                      TOTAL_ZEROS_VLC_BITS, 16,
390                      &total_zeros_len [i][0], 1, 1,
391                      &total_zeros_bits[i][0], 1, 1,
392                      INIT_VLC_USE_NEW_STATIC);
393         }
394
395         for(i=0; i<6; i++){
396             run_vlc[i].table = run_vlc_tables[i];
397             run_vlc[i].table_allocated = run_vlc_tables_size;
398             init_vlc(&run_vlc[i],
399                      RUN_VLC_BITS, 7,
400                      &run_len [i][0], 1, 1,
401                      &run_bits[i][0], 1, 1,
402                      INIT_VLC_USE_NEW_STATIC);
403         }
404         run7_vlc.table = run7_vlc_table,
405         run7_vlc.table_allocated = run7_vlc_table_size;
406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407                  &run_len [6][0], 1, 1,
408                  &run_bits[6][0], 1, 1,
409                  INIT_VLC_USE_NEW_STATIC);
410
411         init_cavlc_level_tab();
412     }
413 }
414
415 /**
416  *
417  */
418 static inline int get_level_prefix(GetBitContext *gb){
419     unsigned int buf;
420     int log;
421
422     OPEN_READER(re, gb);
423     UPDATE_CACHE(re, gb);
424     buf=GET_CACHE(re, gb);
425
426     log= 32 - av_log2(buf);
427 #ifdef TRACE
428     print_bin(buf>>(32-log), log);
429     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431
432     LAST_SKIP_BITS(re, gb, log);
433     CLOSE_READER(re, gb);
434
435     return log-1;
436 }
437
438 /**
439  * Decode a residual block.
440  * @param n block index
441  * @param scantable scantable
442  * @param max_coeff number of coefficients in the block
443  * @return <0 if an error occurred
444  */
445 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446     MpegEncContext * const s = &h->s;
447     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448     int level[16];
449     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
450
451     //FIXME put trailing_onex into the context
452
453     if(max_coeff <= 8){
454         if (max_coeff == 4)
455             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
456         else
457             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
458         total_coeff= coeff_token>>2;
459     }else{
460         if(n >= LUMA_DC_BLOCK_INDEX){
461             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
462             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
463             total_coeff= coeff_token>>2;
464         }else{
465             total_coeff= pred_non_zero_count(h, n);
466             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467             total_coeff= coeff_token>>2;
468         }
469     }
470     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
471
472     //FIXME set last_non_zero?
473
474     if(total_coeff==0)
475         return 0;
476     if(total_coeff > (unsigned)max_coeff) {
477         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
478         return -1;
479     }
480
481     trailing_ones= coeff_token&3;
482     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
483     assert(total_coeff<=16);
484
485     i = show_bits(gb, 3);
486     skip_bits(gb, trailing_ones);
487     level[0] = 1-((i&4)>>1);
488     level[1] = 1-((i&2)   );
489     level[2] = 1-((i&1)<<1);
490
491     if(trailing_ones<total_coeff) {
492         int mask, prefix;
493         int suffix_length = total_coeff > 10 & trailing_ones < 3;
494         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
495         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
496
497         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
498         if(level_code >= 100){
499             prefix= level_code - 100;
500             if(prefix == LEVEL_TAB_BITS)
501                 prefix += get_level_prefix(gb);
502
503             //first coefficient has suffix_length equal to 0 or 1
504             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
505                 if(suffix_length)
506                     level_code= (prefix<<1) + get_bits1(gb); //part
507                 else
508                     level_code= prefix; //part
509             }else if(prefix==14){
510                 if(suffix_length)
511                     level_code= (prefix<<1) + get_bits1(gb); //part
512                 else
513                     level_code= prefix + get_bits(gb, 4); //part
514             }else{
515                 level_code= 30 + get_bits(gb, prefix-3); //part
516                 if(prefix>=16){
517                     if(prefix > 25+3){
518                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
519                         return -1;
520                     }
521                     level_code += (1<<(prefix-3))-4096;
522                 }
523             }
524
525             if(trailing_ones < 3) level_code += 2;
526
527             suffix_length = 2;
528             mask= -(level_code&1);
529             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530         }else{
531             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532
533             suffix_length = 1 + (level_code + 3U > 6U);
534             level[trailing_ones]= level_code;
535         }
536
537         //remaining coefficients have suffix_length > 0
538         for(i=trailing_ones+1;i<total_coeff;i++) {
539             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541             level_code= cavlc_level_tab[suffix_length][bitsi][0];
542
543             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544             if(level_code >= 100){
545                 prefix= level_code - 100;
546                 if(prefix == LEVEL_TAB_BITS){
547                     prefix += get_level_prefix(gb);
548                 }
549                 if(prefix<15){
550                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551                 }else{
552                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
553                     if(prefix>=16)
554                         level_code += (1<<(prefix-3))-4096;
555                 }
556                 mask= -(level_code&1);
557                 level_code= (((2+level_code)>>1) ^ mask) - mask;
558             }
559             level[i]= level_code;
560             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
561         }
562     }
563
564     if(total_coeff == max_coeff)
565         zeros_left=0;
566     else{
567         if (max_coeff <= 8) {
568             if (max_coeff == 4)
569                 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
570                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
571             else
572                 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
573                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
574         } else {
575             zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
576         }
577     }
578
579 #define STORE_BLOCK(type) \
580     scantable += zeros_left + total_coeff - 1; \
581     if(n >= LUMA_DC_BLOCK_INDEX){ \
582         ((type*)block)[*scantable] = level[0]; \
583         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
584             if(zeros_left < 7) \
585                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
586             else \
587                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
588             zeros_left -= run_before; \
589             scantable -= 1 + run_before; \
590             ((type*)block)[*scantable]= level[i]; \
591         } \
592         for(;i<total_coeff;i++) { \
593             scantable--; \
594             ((type*)block)[*scantable]= level[i]; \
595         } \
596     }else{ \
597         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
598         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
599             if(zeros_left < 7) \
600                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
601             else \
602                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
603             zeros_left -= run_before; \
604             scantable -= 1 + run_before; \
605             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
606         } \
607         for(;i<total_coeff;i++) { \
608             scantable--; \
609             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
610         } \
611     }
612
613     if (zeros_left < 0) {
614         av_log(h->s.avctx, AV_LOG_ERROR,
615                "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
616         return AVERROR_INVALIDDATA;
617     }
618
619     if (h->pixel_shift) {
620         STORE_BLOCK(int32_t)
621     } else {
622         STORE_BLOCK(int16_t)
623     }
624
625     return 0;
626 }
627
628 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
629     int i4x4, i8x8;
630     MpegEncContext * const s = &h->s;
631     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
632     if(IS_INTRA16x16(mb_type)){
633         AV_ZERO128(h->mb_luma_dc[p]+0);
634         AV_ZERO128(h->mb_luma_dc[p]+8);
635         AV_ZERO128(h->mb_luma_dc[p]+16);
636         AV_ZERO128(h->mb_luma_dc[p]+24);
637         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
638             return -1; //FIXME continue if partitioned and other return -1 too
639         }
640
641         assert((cbp&15) == 0 || (cbp&15) == 15);
642
643         if(cbp&15){
644             for(i8x8=0; i8x8<4; i8x8++){
645                 for(i4x4=0; i4x4<4; i4x4++){
646                     const int index= i4x4 + 4*i8x8 + p*16;
647                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
648                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
649                         return -1;
650                     }
651                 }
652             }
653             return 0xf;
654         }else{
655             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
656             return 0;
657         }
658     }else{
659         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
660         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
661         int new_cbp = 0;
662         for(i8x8=0; i8x8<4; i8x8++){
663             if(cbp & (1<<i8x8)){
664                 if(IS_8x8DCT(mb_type)){
665                     DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
666                     uint8_t *nnz;
667                     for(i4x4=0; i4x4<4; i4x4++){
668                         const int index= i4x4 + 4*i8x8 + p*16;
669                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
670                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
671                             return -1;
672                     }
673                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
674                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
675                     new_cbp |= !!nnz[0] << i8x8;
676                 }else{
677                     for(i4x4=0; i4x4<4; i4x4++){
678                         const int index= i4x4 + 4*i8x8 + p*16;
679                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
680                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
681                             return -1;
682                         }
683                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
684                     }
685                 }
686             }else{
687                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
688                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
689             }
690         }
691         return new_cbp;
692     }
693 }
694
695 int ff_h264_decode_mb_cavlc(H264Context *h){
696     MpegEncContext * const s = &h->s;
697     int mb_xy;
698     int partition_count;
699     unsigned int mb_type, cbp;
700     int dct8x8_allowed= h->pps.transform_8x8_mode;
701     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
702     const int pixel_shift = h->pixel_shift;
703
704     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
705
706     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
707     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
708                 down the code */
709     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
710         if(s->mb_skip_run==-1)
711             s->mb_skip_run= get_ue_golomb(&s->gb);
712
713         if (s->mb_skip_run--) {
714             if(FRAME_MBAFF && (s->mb_y&1) == 0){
715                 if(s->mb_skip_run==0)
716                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
717             }
718             decode_mb_skip(h);
719             return 0;
720         }
721     }
722     if(FRAME_MBAFF){
723         if( (s->mb_y&1) == 0 )
724             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
725     }
726
727     h->prev_mb_skipped= 0;
728
729     mb_type= get_ue_golomb(&s->gb);
730     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
731         if(mb_type < 23){
732             partition_count= b_mb_type_info[mb_type].partition_count;
733             mb_type=         b_mb_type_info[mb_type].type;
734         }else{
735             mb_type -= 23;
736             goto decode_intra_mb;
737         }
738     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
739         if(mb_type < 5){
740             partition_count= p_mb_type_info[mb_type].partition_count;
741             mb_type=         p_mb_type_info[mb_type].type;
742         }else{
743             mb_type -= 5;
744             goto decode_intra_mb;
745         }
746     }else{
747        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
748         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
749             mb_type--;
750 decode_intra_mb:
751         if(mb_type > 25){
752             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
753             return -1;
754         }
755         partition_count=0;
756         cbp= i_mb_type_info[mb_type].cbp;
757         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
758         mb_type= i_mb_type_info[mb_type].type;
759     }
760
761     if(MB_FIELD)
762         mb_type |= MB_TYPE_INTERLACED;
763
764     h->slice_table[ mb_xy ]= h->slice_num;
765
766     if(IS_INTRA_PCM(mb_type)){
767         unsigned int x;
768         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
769                             h->sps.bit_depth_luma >> 3;
770
771         // We assume these blocks are very rare so we do not optimize it.
772         align_get_bits(&s->gb);
773
774         // The pixels are stored in the same order as levels in h->mb array.
775         for(x=0; x < mb_size; x++){
776             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
777         }
778
779         // In deblocking, the quantizer is 0
780         s->current_picture.f.qscale_table[mb_xy] = 0;
781         // All coeffs are present
782         memset(h->non_zero_count[mb_xy], 16, 48);
783
784         s->current_picture.f.mb_type[mb_xy] = mb_type;
785         return 0;
786     }
787
788     fill_decode_neighbors(h, mb_type);
789     fill_decode_caches(h, mb_type);
790
791     //mb_pred
792     if(IS_INTRA(mb_type)){
793         int pred_mode;
794 //            init_top_left_availability(h);
795         if(IS_INTRA4x4(mb_type)){
796             int i;
797             int di = 1;
798             if(dct8x8_allowed && get_bits1(&s->gb)){
799                 mb_type |= MB_TYPE_8x8DCT;
800                 di = 4;
801             }
802
803 //                fill_intra4x4_pred_table(h);
804             for(i=0; i<16; i+=di){
805                 int mode= pred_intra_mode(h, i);
806
807                 if(!get_bits1(&s->gb)){
808                     const int rem_mode= get_bits(&s->gb, 3);
809                     mode = rem_mode + (rem_mode >= mode);
810                 }
811
812                 if(di==4)
813                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
814                 else
815                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
816             }
817             write_back_intra_pred_mode(h);
818             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
819                 return -1;
820         }else{
821             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
822             if(h->intra16x16_pred_mode < 0)
823                 return -1;
824         }
825         if(decode_chroma){
826             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
827             if(pred_mode < 0)
828                 return -1;
829             h->chroma_pred_mode= pred_mode;
830         } else {
831             h->chroma_pred_mode = DC_128_PRED8x8;
832         }
833     }else if(partition_count==4){
834         int i, j, sub_partition_count[4], list, ref[2][4];
835
836         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
837             for(i=0; i<4; i++){
838                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
839                 if(h->sub_mb_type[i] >=13){
840                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
841                     return -1;
842                 }
843                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
844                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
845             }
846             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
847                 ff_h264_pred_direct_motion(h, &mb_type);
848                 h->ref_cache[0][scan8[4]] =
849                 h->ref_cache[1][scan8[4]] =
850                 h->ref_cache[0][scan8[12]] =
851                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
852             }
853         }else{
854             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
855             for(i=0; i<4; i++){
856                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
857                 if(h->sub_mb_type[i] >=4){
858                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
859                     return -1;
860                 }
861                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
862                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
863             }
864         }
865
866         for(list=0; list<h->list_count; list++){
867             int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF;
868             for(i=0; i<4; i++){
869                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
870                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
871                     unsigned int tmp;
872                     if(ref_count == 1){
873                         tmp= 0;
874                     }else if(ref_count == 2){
875                         tmp= get_bits1(&s->gb)^1;
876                     }else{
877                         tmp= get_ue_golomb_31(&s->gb);
878                         if(tmp>=ref_count){
879                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
880                             return -1;
881                         }
882                     }
883                     ref[list][i]= tmp;
884                 }else{
885                  //FIXME
886                     ref[list][i] = -1;
887                 }
888             }
889         }
890
891         if(dct8x8_allowed)
892             dct8x8_allowed = get_dct8x8_allowed(h);
893
894         for(list=0; list<h->list_count; list++){
895             for(i=0; i<4; i++){
896                 if(IS_DIRECT(h->sub_mb_type[i])) {
897                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
898                     continue;
899                 }
900                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
901                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
902
903                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
904                     const int sub_mb_type= h->sub_mb_type[i];
905                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
906                     for(j=0; j<sub_partition_count[i]; j++){
907                         int mx, my;
908                         const int index= 4*i + block_width*j;
909                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
910                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
911                         mx += get_se_golomb(&s->gb);
912                         my += get_se_golomb(&s->gb);
913                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
914
915                         if(IS_SUB_8X8(sub_mb_type)){
916                             mv_cache[ 1 ][0]=
917                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
918                             mv_cache[ 1 ][1]=
919                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
920                         }else if(IS_SUB_8X4(sub_mb_type)){
921                             mv_cache[ 1 ][0]= mx;
922                             mv_cache[ 1 ][1]= my;
923                         }else if(IS_SUB_4X8(sub_mb_type)){
924                             mv_cache[ 8 ][0]= mx;
925                             mv_cache[ 8 ][1]= my;
926                         }
927                         mv_cache[ 0 ][0]= mx;
928                         mv_cache[ 0 ][1]= my;
929                     }
930                 }else{
931                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
932                     p[0] = p[1]=
933                     p[8] = p[9]= 0;
934                 }
935             }
936         }
937     }else if(IS_DIRECT(mb_type)){
938         ff_h264_pred_direct_motion(h, &mb_type);
939         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
940     }else{
941         int list, mx, my, i;
942          //FIXME we should set ref_idx_l? to 0 if we use that later ...
943         if(IS_16X16(mb_type)){
944             for(list=0; list<h->list_count; list++){
945                     unsigned int val;
946                     if(IS_DIR(mb_type, 0, list)){
947                         int rc = h->ref_count[list] << MB_MBAFF;
948                         if (rc == 1) {
949                             val= 0;
950                         } else if (rc == 2) {
951                             val= get_bits1(&s->gb)^1;
952                         }else{
953                             val= get_ue_golomb_31(&s->gb);
954                             if (val >= rc) {
955                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
956                                 return -1;
957                             }
958                         }
959                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
960                     }
961             }
962             for(list=0; list<h->list_count; list++){
963                 if(IS_DIR(mb_type, 0, list)){
964                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
965                     mx += get_se_golomb(&s->gb);
966                     my += get_se_golomb(&s->gb);
967                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
968
969                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
970                 }
971             }
972         }
973         else if(IS_16X8(mb_type)){
974             for(list=0; list<h->list_count; list++){
975                     for(i=0; i<2; i++){
976                         unsigned int val;
977                         if(IS_DIR(mb_type, i, list)){
978                             int rc = h->ref_count[list] << MB_MBAFF;
979                             if (rc == 1) {
980                                 val= 0;
981                             } else if (rc == 2) {
982                                 val= get_bits1(&s->gb)^1;
983                             }else{
984                                 val= get_ue_golomb_31(&s->gb);
985                                 if (val >= rc) {
986                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
987                                     return -1;
988                                 }
989                             }
990                         }else
991                             val= LIST_NOT_USED&0xFF;
992                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
993                     }
994             }
995             for(list=0; list<h->list_count; list++){
996                 for(i=0; i<2; i++){
997                     unsigned int val;
998                     if(IS_DIR(mb_type, i, list)){
999                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1000                         mx += get_se_golomb(&s->gb);
1001                         my += get_se_golomb(&s->gb);
1002                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1003
1004                         val= pack16to32(mx,my);
1005                     }else
1006                         val=0;
1007                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1008                 }
1009             }
1010         }else{
1011             assert(IS_8X16(mb_type));
1012             for(list=0; list<h->list_count; list++){
1013                     for(i=0; i<2; i++){
1014                         unsigned int val;
1015                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1016                             int rc = h->ref_count[list] << MB_MBAFF;
1017                             if (rc == 1) {
1018                                 val= 0;
1019                             } else if (rc == 2) {
1020                                 val= get_bits1(&s->gb)^1;
1021                             }else{
1022                                 val= get_ue_golomb_31(&s->gb);
1023                                 if (val >= rc) {
1024                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1025                                     return -1;
1026                                 }
1027                             }
1028                         }else
1029                             val= LIST_NOT_USED&0xFF;
1030                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1031                     }
1032             }
1033             for(list=0; list<h->list_count; list++){
1034                 for(i=0; i<2; i++){
1035                     unsigned int val;
1036                     if(IS_DIR(mb_type, i, list)){
1037                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1038                         mx += get_se_golomb(&s->gb);
1039                         my += get_se_golomb(&s->gb);
1040                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1041
1042                         val= pack16to32(mx,my);
1043                     }else
1044                         val=0;
1045                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1046                 }
1047             }
1048         }
1049     }
1050
1051     if(IS_INTER(mb_type))
1052         write_back_motion(h, mb_type);
1053
1054     if(!IS_INTRA16x16(mb_type)){
1055         cbp= get_ue_golomb(&s->gb);
1056
1057         if(decode_chroma){
1058             if(cbp > 47){
1059                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1060                 return -1;
1061             }
1062             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1063             else                     cbp= golomb_to_inter_cbp   [cbp];
1064         }else{
1065             if(cbp > 15){
1066                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1067                 return -1;
1068             }
1069             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1070             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1071         }
1072     }
1073
1074     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1075         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1076     }
1077     h->cbp=
1078     h->cbp_table[mb_xy]= cbp;
1079     s->current_picture.f.mb_type[mb_xy] = mb_type;
1080
1081     if(cbp || IS_INTRA16x16(mb_type)){
1082         int i4x4, i8x8, chroma_idx;
1083         int dquant;
1084         int ret;
1085         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1086         const uint8_t *scan, *scan8x8;
1087         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1088
1089         if(IS_INTERLACED(mb_type)){
1090             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1091             scan= s->qscale ? h->field_scan : h->field_scan_q0;
1092         }else{
1093             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1094             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1095         }
1096
1097         dquant= get_se_golomb(&s->gb);
1098
1099         s->qscale += dquant;
1100
1101         if(((unsigned)s->qscale) > max_qp){
1102             if(s->qscale<0) s->qscale+= max_qp+1;
1103             else            s->qscale-= max_qp+1;
1104             if(((unsigned)s->qscale) > max_qp){
1105                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1106                 return -1;
1107             }
1108         }
1109
1110         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1111         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1112
1113         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1114             return -1;
1115         }
1116         h->cbp_table[mb_xy] |= ret << 12;
1117         if(CHROMA444){
1118             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1119                 return -1;
1120             }
1121             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1122                 return -1;
1123             }
1124         } else if (CHROMA422) {
1125             if(cbp&0x30){
1126                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1127                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1128                                         CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1129                                         NULL, 8) < 0) {
1130                         return -1;
1131                     }
1132             }
1133
1134             if(cbp&0x20){
1135                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1136                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1137                     DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1138                     for (i8x8 = 0; i8x8 < 2; i8x8++) {
1139                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
1140                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1141                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1142                                 return -1;
1143                             mb += 16 << pixel_shift;
1144                         }
1145                     }
1146                 }
1147             }else{
1148                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1149                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1150             }
1151         } else /* yuv420 */ {
1152             if(cbp&0x30){
1153                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1154                     if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1155                         return -1;
1156                     }
1157             }
1158
1159             if(cbp&0x20){
1160                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1161                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1162                     for(i4x4=0; i4x4<4; i4x4++){
1163                         const int index= 16 + 16*chroma_idx + i4x4;
1164                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1165                             return -1;
1166                         }
1167                     }
1168                 }
1169             }else{
1170                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1171                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1172             }
1173         }
1174     }else{
1175         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1176         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1177         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1178     }
1179     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1180     write_back_non_zero_count(h);
1181
1182     return 0;
1183 }