]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
Merge commit '88bd7fdc821aaa0cbcf44cf075c62aaa42121e3f'
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29 #define UNCHECKED_BITSTREAM_READER 1
30
31 #include "internal.h"
32 #include "avcodec.h"
33 #include "mpegvideo.h"
34 #include "h264.h"
35 #include "h264data.h" // FIXME FIXME FIXME
36 #include "h264_mvpred.h"
37 #include "golomb.h"
38 #include "libavutil/avassert.h"
39
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282
283 /**
284  * Get the predicted number of non-zero coefficients.
285  * @param n block index
286  */
287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292
293     if(i<64) i= (i+1)>>1;
294
295     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296
297     return i&31;
298 }
299
300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length;
302     unsigned int i;
303
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307
308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309                 int level_code = (prefix << suffix_length) +
310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311                 int mask = -(level_code&1);
312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313                 cavlc_level_tab[suffix_length][i][0]= level_code;
314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318             }else{
319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321             }
322         }
323     }
324 }
325
326 av_cold void ff_h264_decode_init_vlc(void){
327     static int done = 0;
328
329     if (!done) {
330         int i;
331         int offset;
332         done = 1;
333
334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337                  &chroma_dc_coeff_token_len [0], 1, 1,
338                  &chroma_dc_coeff_token_bits[0], 1, 1,
339                  INIT_VLC_USE_NEW_STATIC);
340
341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344                  &chroma422_dc_coeff_token_len [0], 1, 1,
345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
346                  INIT_VLC_USE_NEW_STATIC);
347
348         offset = 0;
349         for(i=0; i<4; i++){
350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353                      &coeff_token_len [i][0], 1, 1,
354                      &coeff_token_bits[i][0], 1, 1,
355                      INIT_VLC_USE_NEW_STATIC);
356             offset += coeff_token_vlc_tables_size[i];
357         }
358         /*
359          * This is a one time safety check to make sure that
360          * the packed static coeff_token_vlc table sizes
361          * were initialized correctly.
362          */
363         av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364
365         for(i=0; i<3; i++){
366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368             init_vlc(&chroma_dc_total_zeros_vlc[i],
369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
372                      INIT_VLC_USE_NEW_STATIC);
373         }
374
375         for(i=0; i<7; i++){
376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382                      INIT_VLC_USE_NEW_STATIC);
383         }
384
385         for(i=0; i<15; i++){
386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388             init_vlc(&total_zeros_vlc[i],
389                      TOTAL_ZEROS_VLC_BITS, 16,
390                      &total_zeros_len [i][0], 1, 1,
391                      &total_zeros_bits[i][0], 1, 1,
392                      INIT_VLC_USE_NEW_STATIC);
393         }
394
395         for(i=0; i<6; i++){
396             run_vlc[i].table = run_vlc_tables[i];
397             run_vlc[i].table_allocated = run_vlc_tables_size;
398             init_vlc(&run_vlc[i],
399                      RUN_VLC_BITS, 7,
400                      &run_len [i][0], 1, 1,
401                      &run_bits[i][0], 1, 1,
402                      INIT_VLC_USE_NEW_STATIC);
403         }
404         run7_vlc.table = run7_vlc_table,
405         run7_vlc.table_allocated = run7_vlc_table_size;
406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407                  &run_len [6][0], 1, 1,
408                  &run_bits[6][0], 1, 1,
409                  INIT_VLC_USE_NEW_STATIC);
410
411         init_cavlc_level_tab();
412     }
413 }
414
415 /**
416  *
417  */
418 static inline int get_level_prefix(GetBitContext *gb){
419     unsigned int buf;
420     int log;
421
422     OPEN_READER(re, gb);
423     UPDATE_CACHE(re, gb);
424     buf=GET_CACHE(re, gb);
425
426     log= 32 - av_log2(buf);
427 #ifdef TRACE
428     print_bin(buf>>(32-log), log);
429     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431
432     LAST_SKIP_BITS(re, gb, log);
433     CLOSE_READER(re, gb);
434
435     return log-1;
436 }
437
438 /**
439  * Decode a residual block.
440  * @param n block index
441  * @param scantable scantable
442  * @param max_coeff number of coefficients in the block
443  * @return <0 if an error occurred
444  */
445 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446     MpegEncContext * const s = &h->s;
447     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448     int level[16];
449     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
450
451     //FIXME put trailing_onex into the context
452
453     if(max_coeff <= 8){
454         if (max_coeff == 4)
455             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
456         else
457             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
458         total_coeff= coeff_token>>2;
459     }else{
460         if(n >= LUMA_DC_BLOCK_INDEX){
461             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
462             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
463             total_coeff= coeff_token>>2;
464         }else{
465             total_coeff= pred_non_zero_count(h, n);
466             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467             total_coeff= coeff_token>>2;
468         }
469     }
470     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
471
472     //FIXME set last_non_zero?
473
474     if(total_coeff==0)
475         return 0;
476     if(total_coeff > (unsigned)max_coeff) {
477         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
478         return -1;
479     }
480
481     trailing_ones= coeff_token&3;
482     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
483     av_assert2(total_coeff<=16);
484
485     i = show_bits(gb, 3);
486     skip_bits(gb, trailing_ones);
487     level[0] = 1-((i&4)>>1);
488     level[1] = 1-((i&2)   );
489     level[2] = 1-((i&1)<<1);
490
491     if(trailing_ones<total_coeff) {
492         int mask, prefix;
493         int suffix_length = total_coeff > 10 & trailing_ones < 3;
494         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
495         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
496
497         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
498         if(level_code >= 100){
499             prefix= level_code - 100;
500             if(prefix == LEVEL_TAB_BITS)
501                 prefix += get_level_prefix(gb);
502
503             //first coefficient has suffix_length equal to 0 or 1
504             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
505                 if(suffix_length)
506                     level_code= (prefix<<1) + get_bits1(gb); //part
507                 else
508                     level_code= prefix; //part
509             }else if(prefix==14){
510                 if(suffix_length)
511                     level_code= (prefix<<1) + get_bits1(gb); //part
512                 else
513                     level_code= prefix + get_bits(gb, 4); //part
514             }else{
515                 level_code= 30;
516                 if(prefix>=16){
517                     if(prefix > 25+3){
518                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
519                         return -1;
520                     }
521                     level_code += (1<<(prefix-3))-4096;
522                 }
523                 level_code += get_bits(gb, prefix-3); //part
524             }
525
526             if(trailing_ones < 3) level_code += 2;
527
528             suffix_length = 2;
529             mask= -(level_code&1);
530             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
531         }else{
532             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
533
534             suffix_length = 1 + (level_code + 3U > 6U);
535             level[trailing_ones]= level_code;
536         }
537
538         //remaining coefficients have suffix_length > 0
539         for(i=trailing_ones+1;i<total_coeff;i++) {
540             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
541             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
542             level_code= cavlc_level_tab[suffix_length][bitsi][0];
543
544             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
545             if(level_code >= 100){
546                 prefix= level_code - 100;
547                 if(prefix == LEVEL_TAB_BITS){
548                     prefix += get_level_prefix(gb);
549                 }
550                 if(prefix<15){
551                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
552                 }else{
553                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
554                     if(prefix>=16)
555                         level_code += (1<<(prefix-3))-4096;
556                 }
557                 mask= -(level_code&1);
558                 level_code= (((2+level_code)>>1) ^ mask) - mask;
559             }
560             level[i]= level_code;
561             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
562         }
563     }
564
565     if(total_coeff == max_coeff)
566         zeros_left=0;
567     else{
568         if (max_coeff <= 8) {
569             if (max_coeff == 4)
570                 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
571                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
572             else
573                 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
574                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
575         } else {
576             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
577         }
578     }
579
580 #define STORE_BLOCK(type) \
581     scantable += zeros_left + total_coeff - 1; \
582     if(n >= LUMA_DC_BLOCK_INDEX){ \
583         ((type*)block)[*scantable] = level[0]; \
584         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
585             if(zeros_left < 7) \
586                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
587             else \
588                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
589             zeros_left -= run_before; \
590             scantable -= 1 + run_before; \
591             ((type*)block)[*scantable]= level[i]; \
592         } \
593         for(;i<total_coeff;i++) { \
594             scantable--; \
595             ((type*)block)[*scantable]= level[i]; \
596         } \
597     }else{ \
598         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
599         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
600             if(zeros_left < 7) \
601                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
602             else \
603                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
604             zeros_left -= run_before; \
605             scantable -= 1 + run_before; \
606             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
607         } \
608         for(;i<total_coeff;i++) { \
609             scantable--; \
610             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
611         } \
612     }
613
614     if (h->pixel_shift) {
615         STORE_BLOCK(int32_t)
616     } else {
617         STORE_BLOCK(int16_t)
618     }
619
620     if(zeros_left<0){
621         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
622         return -1;
623     }
624
625     return 0;
626 }
627
628 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
629     int i4x4, i8x8;
630     MpegEncContext * const s = &h->s;
631     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
632     if(IS_INTRA16x16(mb_type)){
633         AV_ZERO128(h->mb_luma_dc[p]+0);
634         AV_ZERO128(h->mb_luma_dc[p]+8);
635         AV_ZERO128(h->mb_luma_dc[p]+16);
636         AV_ZERO128(h->mb_luma_dc[p]+24);
637         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
638             return -1; //FIXME continue if partitioned and other return -1 too
639         }
640
641         av_assert2((cbp&15) == 0 || (cbp&15) == 15);
642
643         if(cbp&15){
644             for(i8x8=0; i8x8<4; i8x8++){
645                 for(i4x4=0; i4x4<4; i4x4++){
646                     const int index= i4x4 + 4*i8x8 + p*16;
647                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
648                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
649                         return -1;
650                     }
651                 }
652             }
653             return 0xf;
654         }else{
655             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
656             return 0;
657         }
658     }else{
659         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
660         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
661         int new_cbp = 0;
662         for(i8x8=0; i8x8<4; i8x8++){
663             if(cbp & (1<<i8x8)){
664                 if(IS_8x8DCT(mb_type)){
665                     int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
666                     uint8_t *nnz;
667                     for(i4x4=0; i4x4<4; i4x4++){
668                         const int index= i4x4 + 4*i8x8 + p*16;
669                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
670                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
671                             return -1;
672                     }
673                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
674                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
675                     new_cbp |= !!nnz[0] << i8x8;
676                 }else{
677                     for(i4x4=0; i4x4<4; i4x4++){
678                         const int index= i4x4 + 4*i8x8 + p*16;
679                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
680                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
681                             return -1;
682                         }
683                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
684                     }
685                 }
686             }else{
687                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
688                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
689             }
690         }
691         return new_cbp;
692     }
693 }
694
695 int ff_h264_decode_mb_cavlc(H264Context *h){
696     MpegEncContext * const s = &h->s;
697     int mb_xy;
698     int partition_count;
699     unsigned int mb_type, cbp;
700     int dct8x8_allowed= h->pps.transform_8x8_mode;
701     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
702     const int pixel_shift = h->pixel_shift;
703     unsigned local_ref_count[2];
704
705     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
706
707     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
708     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
709                 down the code */
710     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
711         if(s->mb_skip_run==-1)
712             s->mb_skip_run= get_ue_golomb(&s->gb);
713
714         if (s->mb_skip_run--) {
715             if(FRAME_MBAFF && (s->mb_y&1) == 0){
716                 if(s->mb_skip_run==0)
717                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
718             }
719             decode_mb_skip(h);
720             return 0;
721         }
722     }
723     if(FRAME_MBAFF){
724         if( (s->mb_y&1) == 0 )
725             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
726     }
727
728     h->prev_mb_skipped= 0;
729
730     mb_type= get_ue_golomb(&s->gb);
731     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
732         if(mb_type < 23){
733             partition_count= b_mb_type_info[mb_type].partition_count;
734             mb_type=         b_mb_type_info[mb_type].type;
735         }else{
736             mb_type -= 23;
737             goto decode_intra_mb;
738         }
739     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
740         if(mb_type < 5){
741             partition_count= p_mb_type_info[mb_type].partition_count;
742             mb_type=         p_mb_type_info[mb_type].type;
743         }else{
744             mb_type -= 5;
745             goto decode_intra_mb;
746         }
747     }else{
748        av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_I);
749         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
750             mb_type--;
751 decode_intra_mb:
752         if(mb_type > 25){
753             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
754             return -1;
755         }
756         partition_count=0;
757         cbp= i_mb_type_info[mb_type].cbp;
758         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
759         mb_type= i_mb_type_info[mb_type].type;
760     }
761
762     if(MB_FIELD)
763         mb_type |= MB_TYPE_INTERLACED;
764
765     h->slice_table[ mb_xy ]= h->slice_num;
766
767     if(IS_INTRA_PCM(mb_type)){
768         unsigned int x;
769         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
770                             h->sps.bit_depth_luma >> 3;
771
772         // We assume these blocks are very rare so we do not optimize it.
773         align_get_bits(&s->gb);
774
775         // The pixels are stored in the same order as levels in h->mb array.
776         for(x=0; x < mb_size; x++){
777             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
778         }
779
780         // In deblocking, the quantizer is 0
781         s->current_picture.f.qscale_table[mb_xy] = 0;
782         // All coeffs are present
783         memset(h->non_zero_count[mb_xy], 16, 48);
784
785         s->current_picture.f.mb_type[mb_xy] = mb_type;
786         return 0;
787     }
788
789     local_ref_count[0] = h->ref_count[0] << MB_MBAFF;
790     local_ref_count[1] = h->ref_count[1] << MB_MBAFF;
791
792     fill_decode_neighbors(h, mb_type);
793     fill_decode_caches(h, mb_type);
794
795     //mb_pred
796     if(IS_INTRA(mb_type)){
797         int pred_mode;
798 //            init_top_left_availability(h);
799         if(IS_INTRA4x4(mb_type)){
800             int i;
801             int di = 1;
802             if(dct8x8_allowed && get_bits1(&s->gb)){
803                 mb_type |= MB_TYPE_8x8DCT;
804                 di = 4;
805             }
806
807 //                fill_intra4x4_pred_table(h);
808             for(i=0; i<16; i+=di){
809                 int mode= pred_intra_mode(h, i);
810
811                 if(!get_bits1(&s->gb)){
812                     const int rem_mode= get_bits(&s->gb, 3);
813                     mode = rem_mode + (rem_mode >= mode);
814                 }
815
816                 if(di==4)
817                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
818                 else
819                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
820             }
821             write_back_intra_pred_mode(h);
822             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
823                 return -1;
824         }else{
825             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
826             if(h->intra16x16_pred_mode < 0)
827                 return -1;
828         }
829         if(decode_chroma){
830             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
831             if(pred_mode < 0)
832                 return -1;
833             h->chroma_pred_mode= pred_mode;
834         } else {
835             h->chroma_pred_mode = DC_128_PRED8x8;
836         }
837     }else if(partition_count==4){
838         int i, j, sub_partition_count[4], list, ref[2][4];
839
840         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
841             for(i=0; i<4; i++){
842                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
843                 if(h->sub_mb_type[i] >=13){
844                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
845                     return -1;
846                 }
847                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
848                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
849             }
850             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
851                 ff_h264_pred_direct_motion(h, &mb_type);
852                 h->ref_cache[0][scan8[4]] =
853                 h->ref_cache[1][scan8[4]] =
854                 h->ref_cache[0][scan8[12]] =
855                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
856             }
857         }else{
858             av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
859             for(i=0; i<4; i++){
860                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
861                 if(h->sub_mb_type[i] >=4){
862                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
863                     return -1;
864                 }
865                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
866                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
867             }
868         }
869
870         for(list=0; list<h->list_count; list++){
871             int ref_count= IS_REF0(mb_type) ? 1 : local_ref_count[list];
872             for(i=0; i<4; i++){
873                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
874                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
875                     unsigned int tmp;
876                     if(ref_count == 1){
877                         tmp= 0;
878                     }else if(ref_count == 2){
879                         tmp= get_bits1(&s->gb)^1;
880                     }else{
881                         tmp= get_ue_golomb_31(&s->gb);
882                         if(tmp>=ref_count){
883                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
884                             return -1;
885                         }
886                     }
887                     ref[list][i]= tmp;
888                 }else{
889                  //FIXME
890                     ref[list][i] = -1;
891                 }
892             }
893         }
894
895         if(dct8x8_allowed)
896             dct8x8_allowed = get_dct8x8_allowed(h);
897
898         for(list=0; list<h->list_count; list++){
899             for(i=0; i<4; i++){
900                 if(IS_DIRECT(h->sub_mb_type[i])) {
901                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
902                     continue;
903                 }
904                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
905                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
906
907                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
908                     const int sub_mb_type= h->sub_mb_type[i];
909                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
910                     for(j=0; j<sub_partition_count[i]; j++){
911                         int mx, my;
912                         const int index= 4*i + block_width*j;
913                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
914                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
915                         mx += get_se_golomb(&s->gb);
916                         my += get_se_golomb(&s->gb);
917                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
918
919                         if(IS_SUB_8X8(sub_mb_type)){
920                             mv_cache[ 1 ][0]=
921                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
922                             mv_cache[ 1 ][1]=
923                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
924                         }else if(IS_SUB_8X4(sub_mb_type)){
925                             mv_cache[ 1 ][0]= mx;
926                             mv_cache[ 1 ][1]= my;
927                         }else if(IS_SUB_4X8(sub_mb_type)){
928                             mv_cache[ 8 ][0]= mx;
929                             mv_cache[ 8 ][1]= my;
930                         }
931                         mv_cache[ 0 ][0]= mx;
932                         mv_cache[ 0 ][1]= my;
933                     }
934                 }else{
935                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
936                     p[0] = p[1]=
937                     p[8] = p[9]= 0;
938                 }
939             }
940         }
941     }else if(IS_DIRECT(mb_type)){
942         ff_h264_pred_direct_motion(h, &mb_type);
943         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
944     }else{
945         int list, mx, my, i;
946          //FIXME we should set ref_idx_l? to 0 if we use that later ...
947         if(IS_16X16(mb_type)){
948             for(list=0; list<h->list_count; list++){
949                     unsigned int val;
950                     if(IS_DIR(mb_type, 0, list)){
951                         if(local_ref_count[list]==1){
952                             val= 0;
953                         }else if(local_ref_count[list]==2){
954                             val= get_bits1(&s->gb)^1;
955                         }else{
956                             val= get_ue_golomb_31(&s->gb);
957                             if(val >= local_ref_count[list]){
958                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
959                                 return -1;
960                             }
961                         }
962                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
963                     }
964             }
965             for(list=0; list<h->list_count; list++){
966                 if(IS_DIR(mb_type, 0, list)){
967                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
968                     mx += get_se_golomb(&s->gb);
969                     my += get_se_golomb(&s->gb);
970                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
971
972                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
973                 }
974             }
975         }
976         else if(IS_16X8(mb_type)){
977             for(list=0; list<h->list_count; list++){
978                     for(i=0; i<2; i++){
979                         unsigned int val;
980                         if(IS_DIR(mb_type, i, list)){
981                             if(local_ref_count[list] == 1){
982                                 val= 0;
983                             }else if(local_ref_count[list] == 2){
984                                 val= get_bits1(&s->gb)^1;
985                             }else{
986                                 val= get_ue_golomb_31(&s->gb);
987                                 if(val >= local_ref_count[list]){
988                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
989                                     return -1;
990                                 }
991                             }
992                         }else
993                             val= LIST_NOT_USED&0xFF;
994                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
995                     }
996             }
997             for(list=0; list<h->list_count; list++){
998                 for(i=0; i<2; i++){
999                     unsigned int val;
1000                     if(IS_DIR(mb_type, i, list)){
1001                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1002                         mx += get_se_golomb(&s->gb);
1003                         my += get_se_golomb(&s->gb);
1004                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1005
1006                         val= pack16to32(mx,my);
1007                     }else
1008                         val=0;
1009                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1010                 }
1011             }
1012         }else{
1013             av_assert2(IS_8X16(mb_type));
1014             for(list=0; list<h->list_count; list++){
1015                     for(i=0; i<2; i++){
1016                         unsigned int val;
1017                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1018                             if(local_ref_count[list]==1){
1019                                 val= 0;
1020                             }else if(local_ref_count[list]==2){
1021                                 val= get_bits1(&s->gb)^1;
1022                             }else{
1023                                 val= get_ue_golomb_31(&s->gb);
1024                                 if(val >= local_ref_count[list]){
1025                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1026                                     return -1;
1027                                 }
1028                             }
1029                         }else
1030                             val= LIST_NOT_USED&0xFF;
1031                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1032                     }
1033             }
1034             for(list=0; list<h->list_count; list++){
1035                 for(i=0; i<2; i++){
1036                     unsigned int val;
1037                     if(IS_DIR(mb_type, i, list)){
1038                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1039                         mx += get_se_golomb(&s->gb);
1040                         my += get_se_golomb(&s->gb);
1041                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1042
1043                         val= pack16to32(mx,my);
1044                     }else
1045                         val=0;
1046                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1047                 }
1048             }
1049         }
1050     }
1051
1052     if(IS_INTER(mb_type))
1053         write_back_motion(h, mb_type);
1054
1055     if(!IS_INTRA16x16(mb_type)){
1056         cbp= get_ue_golomb(&s->gb);
1057
1058         if(decode_chroma){
1059             if(cbp > 47){
1060                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1061                 return -1;
1062             }
1063             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1064             else                     cbp= golomb_to_inter_cbp   [cbp];
1065         }else{
1066             if(cbp > 15){
1067                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1068                 return -1;
1069             }
1070             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1071             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1072         }
1073     } else {
1074         if (!decode_chroma && cbp>15) {
1075             av_log(s->avctx, AV_LOG_ERROR, "gray chroma\n");
1076             return AVERROR_INVALIDDATA;
1077         }
1078     }
1079
1080     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1081         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1082     }
1083     h->cbp=
1084     h->cbp_table[mb_xy]= cbp;
1085     s->current_picture.f.mb_type[mb_xy] = mb_type;
1086
1087     if(cbp || IS_INTRA16x16(mb_type)){
1088         int i4x4, i8x8, chroma_idx;
1089         int dquant;
1090         int ret;
1091         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1092         const uint8_t *scan, *scan8x8;
1093         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1094
1095         if(IS_INTERLACED(mb_type)){
1096             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1097             scan= s->qscale ? h->field_scan : h->field_scan_q0;
1098         }else{
1099             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1100             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1101         }
1102
1103         dquant= get_se_golomb(&s->gb);
1104
1105         s->qscale += dquant;
1106
1107         if(((unsigned)s->qscale) > max_qp){
1108             if(s->qscale<0) s->qscale+= max_qp+1;
1109             else            s->qscale-= max_qp+1;
1110             if(((unsigned)s->qscale) > max_qp){
1111                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1112                 return -1;
1113             }
1114         }
1115
1116         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1117         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1118
1119         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1120             return -1;
1121         }
1122         h->cbp_table[mb_xy] |= ret << 12;
1123         if(CHROMA444){
1124             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1125                 return -1;
1126             }
1127             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1128                 return -1;
1129             }
1130         } else {
1131             const int num_c8x8 = h->sps.chroma_format_idc;
1132
1133             if(cbp&0x30){
1134                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1135                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1136                                         CHROMA_DC_BLOCK_INDEX+chroma_idx,
1137                                         CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1138                                         NULL, 4*num_c8x8) < 0) {
1139                         return -1;
1140                     }
1141             }
1142
1143             if(cbp&0x20){
1144                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1145                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1146                     int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1147                     for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1148                         for (i4x4=0; i4x4<4; i4x4++) {
1149                             const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1150                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1151                                 return -1;
1152                             mb += 16<<pixel_shift;
1153                         }
1154                     }
1155                 }
1156             }else{
1157                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1158                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1159             }
1160         }
1161     }else{
1162         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1163         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1164         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1165     }
1166     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1167     write_back_non_zero_count(h);
1168
1169     return 0;
1170 }