]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
Merge commit 'ffb0af7f17eb0da86e9b140e86a1404d3c6c9e79'
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC(h) 0
29 #define UNCHECKED_BITSTREAM_READER 1
30
31 #include "internal.h"
32 #include "avcodec.h"
33 #include "mpegvideo.h"
34 #include "h264.h"
35 #include "h264data.h" // FIXME FIXME FIXME
36 #include "h264_mvpred.h"
37 #include "golomb.h"
38 #include "libavutil/avassert.h"
39
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282
283 /**
284  * Get the predicted number of non-zero coefficients.
285  * @param n block index
286  */
287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292
293     if(i<64) i= (i+1)>>1;
294
295     tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296
297     return i&31;
298 }
299
300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length;
302     unsigned int i;
303
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307
308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309                 int level_code = (prefix << suffix_length) +
310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311                 int mask = -(level_code&1);
312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313                 cavlc_level_tab[suffix_length][i][0]= level_code;
314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318             }else{
319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321             }
322         }
323     }
324 }
325
326 av_cold void ff_h264_decode_init_vlc(void){
327     static int done = 0;
328
329     if (!done) {
330         int i;
331         int offset;
332         done = 1;
333
334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337                  &chroma_dc_coeff_token_len [0], 1, 1,
338                  &chroma_dc_coeff_token_bits[0], 1, 1,
339                  INIT_VLC_USE_NEW_STATIC);
340
341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344                  &chroma422_dc_coeff_token_len [0], 1, 1,
345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
346                  INIT_VLC_USE_NEW_STATIC);
347
348         offset = 0;
349         for(i=0; i<4; i++){
350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353                      &coeff_token_len [i][0], 1, 1,
354                      &coeff_token_bits[i][0], 1, 1,
355                      INIT_VLC_USE_NEW_STATIC);
356             offset += coeff_token_vlc_tables_size[i];
357         }
358         /*
359          * This is a one time safety check to make sure that
360          * the packed static coeff_token_vlc table sizes
361          * were initialized correctly.
362          */
363         av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364
365         for(i=0; i<3; i++){
366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368             init_vlc(&chroma_dc_total_zeros_vlc[i],
369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
372                      INIT_VLC_USE_NEW_STATIC);
373         }
374
375         for(i=0; i<7; i++){
376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382                      INIT_VLC_USE_NEW_STATIC);
383         }
384
385         for(i=0; i<15; i++){
386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388             init_vlc(&total_zeros_vlc[i],
389                      TOTAL_ZEROS_VLC_BITS, 16,
390                      &total_zeros_len [i][0], 1, 1,
391                      &total_zeros_bits[i][0], 1, 1,
392                      INIT_VLC_USE_NEW_STATIC);
393         }
394
395         for(i=0; i<6; i++){
396             run_vlc[i].table = run_vlc_tables[i];
397             run_vlc[i].table_allocated = run_vlc_tables_size;
398             init_vlc(&run_vlc[i],
399                      RUN_VLC_BITS, 7,
400                      &run_len [i][0], 1, 1,
401                      &run_bits[i][0], 1, 1,
402                      INIT_VLC_USE_NEW_STATIC);
403         }
404         run7_vlc.table = run7_vlc_table,
405         run7_vlc.table_allocated = run7_vlc_table_size;
406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407                  &run_len [6][0], 1, 1,
408                  &run_bits[6][0], 1, 1,
409                  INIT_VLC_USE_NEW_STATIC);
410
411         init_cavlc_level_tab();
412     }
413 }
414
415 /**
416  *
417  */
418 static inline int get_level_prefix(GetBitContext *gb){
419     unsigned int buf;
420     int log;
421
422     OPEN_READER(re, gb);
423     UPDATE_CACHE(re, gb);
424     buf=GET_CACHE(re, gb);
425
426     log= 32 - av_log2(buf);
427 #ifdef TRACE
428     print_bin(buf>>(32-log), log);
429     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431
432     LAST_SKIP_BITS(re, gb, log);
433     CLOSE_READER(re, gb);
434
435     return log-1;
436 }
437
438 /**
439  * Decode a residual block.
440  * @param n block index
441  * @param scantable scantable
442  * @param max_coeff number of coefficients in the block
443  * @return <0 if an error occurred
444  */
445 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
447     int level[16];
448     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
449
450     //FIXME put trailing_onex into the context
451
452     if(max_coeff <= 8){
453         if (max_coeff == 4)
454             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
455         else
456             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
457         total_coeff= coeff_token>>2;
458     }else{
459         if(n >= LUMA_DC_BLOCK_INDEX){
460             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
461             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
462             total_coeff= coeff_token>>2;
463         }else{
464             total_coeff= pred_non_zero_count(h, n);
465             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466             total_coeff= coeff_token>>2;
467         }
468     }
469     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
470
471     //FIXME set last_non_zero?
472
473     if(total_coeff==0)
474         return 0;
475     if(total_coeff > (unsigned)max_coeff) {
476         av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
477         return -1;
478     }
479
480     trailing_ones= coeff_token&3;
481     tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
482     av_assert2(total_coeff<=16);
483
484     i = show_bits(gb, 3);
485     skip_bits(gb, trailing_ones);
486     level[0] = 1-((i&4)>>1);
487     level[1] = 1-((i&2)   );
488     level[2] = 1-((i&1)<<1);
489
490     if(trailing_ones<total_coeff) {
491         int mask, prefix;
492         int suffix_length = total_coeff > 10 & trailing_ones < 3;
493         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
494         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
495
496         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
497         if(level_code >= 100){
498             prefix= level_code - 100;
499             if(prefix == LEVEL_TAB_BITS)
500                 prefix += get_level_prefix(gb);
501
502             //first coefficient has suffix_length equal to 0 or 1
503             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
504                 if(suffix_length)
505                     level_code= (prefix<<1) + get_bits1(gb); //part
506                 else
507                     level_code= prefix; //part
508             }else if(prefix==14){
509                 if(suffix_length)
510                     level_code= (prefix<<1) + get_bits1(gb); //part
511                 else
512                     level_code= prefix + get_bits(gb, 4); //part
513             }else{
514                 level_code= 30;
515                 if(prefix>=16){
516                     if(prefix > 25+3){
517                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
518                         return -1;
519                     }
520                     level_code += (1<<(prefix-3))-4096;
521                 }
522                 level_code += get_bits(gb, prefix-3); //part
523             }
524
525             if(trailing_ones < 3) level_code += 2;
526
527             suffix_length = 2;
528             mask= -(level_code&1);
529             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530         }else{
531             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532
533             suffix_length = 1 + (level_code + 3U > 6U);
534             level[trailing_ones]= level_code;
535         }
536
537         //remaining coefficients have suffix_length > 0
538         for(i=trailing_ones+1;i<total_coeff;i++) {
539             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541             level_code= cavlc_level_tab[suffix_length][bitsi][0];
542
543             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544             if(level_code >= 100){
545                 prefix= level_code - 100;
546                 if(prefix == LEVEL_TAB_BITS){
547                     prefix += get_level_prefix(gb);
548                 }
549                 if(prefix<15){
550                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551                 }else{
552                     level_code = 15<<suffix_length;
553                     if (prefix>=16) {
554                         if(prefix > 25+3){
555                             av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
556                             return AVERROR_INVALIDDATA;
557                         }
558                         level_code += (1<<(prefix-3))-4096;
559                     }
560                     level_code += get_bits(gb, prefix-3);
561                 }
562                 mask= -(level_code&1);
563                 level_code= (((2+level_code)>>1) ^ mask) - mask;
564             }
565             level[i]= level_code;
566             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
567         }
568     }
569
570     if(total_coeff == max_coeff)
571         zeros_left=0;
572     else{
573         if (max_coeff <= 8) {
574             if (max_coeff == 4)
575                 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
576                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
577             else
578                 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
579                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
580         } else {
581             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
582         }
583     }
584
585 #define STORE_BLOCK(type) \
586     scantable += zeros_left + total_coeff - 1; \
587     if(n >= LUMA_DC_BLOCK_INDEX){ \
588         ((type*)block)[*scantable] = level[0]; \
589         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
590             if(zeros_left < 7) \
591                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
592             else \
593                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
594             zeros_left -= run_before; \
595             scantable -= 1 + run_before; \
596             ((type*)block)[*scantable]= level[i]; \
597         } \
598         for(;i<total_coeff;i++) { \
599             scantable--; \
600             ((type*)block)[*scantable]= level[i]; \
601         } \
602     }else{ \
603         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
604         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
605             if(zeros_left < 7) \
606                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
607             else \
608                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
609             zeros_left -= run_before; \
610             scantable -= 1 + run_before; \
611             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
612         } \
613         for(;i<total_coeff;i++) { \
614             scantable--; \
615             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
616         } \
617     }
618
619     if (h->pixel_shift) {
620         STORE_BLOCK(int32_t)
621     } else {
622         STORE_BLOCK(int16_t)
623     }
624
625     if(zeros_left<0){
626         av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
627         return -1;
628     }
629
630     return 0;
631 }
632
633 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
634     int i4x4, i8x8;
635     int qscale = p == 0 ? h->qscale : h->chroma_qp[p-1];
636     if(IS_INTRA16x16(mb_type)){
637         AV_ZERO128(h->mb_luma_dc[p]+0);
638         AV_ZERO128(h->mb_luma_dc[p]+8);
639         AV_ZERO128(h->mb_luma_dc[p]+16);
640         AV_ZERO128(h->mb_luma_dc[p]+24);
641         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
642             return -1; //FIXME continue if partitioned and other return -1 too
643         }
644
645         av_assert2((cbp&15) == 0 || (cbp&15) == 15);
646
647         if(cbp&15){
648             for(i8x8=0; i8x8<4; i8x8++){
649                 for(i4x4=0; i4x4<4; i4x4++){
650                     const int index= i4x4 + 4*i8x8 + p*16;
651                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
652                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
653                         return -1;
654                     }
655                 }
656             }
657             return 0xf;
658         }else{
659             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
660             return 0;
661         }
662     }else{
663         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
664         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
665         int new_cbp = 0;
666         for(i8x8=0; i8x8<4; i8x8++){
667             if(cbp & (1<<i8x8)){
668                 if(IS_8x8DCT(mb_type)){
669                     int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
670                     uint8_t *nnz;
671                     for(i4x4=0; i4x4<4; i4x4++){
672                         const int index= i4x4 + 4*i8x8 + p*16;
673                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
674                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
675                             return -1;
676                     }
677                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
678                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
679                     new_cbp |= !!nnz[0] << i8x8;
680                 }else{
681                     for(i4x4=0; i4x4<4; i4x4++){
682                         const int index= i4x4 + 4*i8x8 + p*16;
683                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
684                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
685                             return -1;
686                         }
687                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
688                     }
689                 }
690             }else{
691                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
692                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
693             }
694         }
695         return new_cbp;
696     }
697 }
698
699 int ff_h264_decode_mb_cavlc(H264Context *h){
700     int mb_xy;
701     int partition_count;
702     unsigned int mb_type, cbp;
703     int dct8x8_allowed= h->pps.transform_8x8_mode;
704     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
705     const int pixel_shift = h->pixel_shift;
706     unsigned local_ref_count[2];
707
708     mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
709
710     tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
711     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
712                 down the code */
713     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
714         if(h->mb_skip_run==-1)
715             h->mb_skip_run= get_ue_golomb_long(&h->gb);
716
717         if (h->mb_skip_run--) {
718             if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
719                 if(h->mb_skip_run==0)
720                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
721             }
722             decode_mb_skip(h);
723             return 0;
724         }
725     }
726     if (FRAME_MBAFF(h)) {
727         if( (h->mb_y&1) == 0 )
728             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
729     }
730
731     h->prev_mb_skipped= 0;
732
733     mb_type= get_ue_golomb(&h->gb);
734     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
735         if(mb_type < 23){
736             partition_count= b_mb_type_info[mb_type].partition_count;
737             mb_type=         b_mb_type_info[mb_type].type;
738         }else{
739             mb_type -= 23;
740             goto decode_intra_mb;
741         }
742     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
743         if(mb_type < 5){
744             partition_count= p_mb_type_info[mb_type].partition_count;
745             mb_type=         p_mb_type_info[mb_type].type;
746         }else{
747             mb_type -= 5;
748             goto decode_intra_mb;
749         }
750     }else{
751        av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_I);
752         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
753             mb_type--;
754 decode_intra_mb:
755         if(mb_type > 25){
756             av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
757             return -1;
758         }
759         partition_count=0;
760         cbp= i_mb_type_info[mb_type].cbp;
761         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
762         mb_type= i_mb_type_info[mb_type].type;
763     }
764
765     if(MB_FIELD(h))
766         mb_type |= MB_TYPE_INTERLACED;
767
768     h->slice_table[ mb_xy ]= h->slice_num;
769
770     if(IS_INTRA_PCM(mb_type)){
771         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
772                             h->sps.bit_depth_luma;
773
774         // We assume these blocks are very rare so we do not optimize it.
775         h->intra_pcm_ptr = align_get_bits(&h->gb);
776         skip_bits_long(&h->gb, mb_size);
777
778         // In deblocking, the quantizer is 0
779         h->cur_pic.qscale_table[mb_xy] = 0;
780         // All coeffs are present
781         memset(h->non_zero_count[mb_xy], 16, 48);
782
783         h->cur_pic.mb_type[mb_xy] = mb_type;
784         return 0;
785     }
786
787     local_ref_count[0] = h->ref_count[0] << MB_MBAFF(h);
788     local_ref_count[1] = h->ref_count[1] << MB_MBAFF(h);
789
790     fill_decode_neighbors(h, mb_type);
791     fill_decode_caches(h, mb_type);
792
793     //mb_pred
794     if(IS_INTRA(mb_type)){
795         int pred_mode;
796 //            init_top_left_availability(h);
797         if(IS_INTRA4x4(mb_type)){
798             int i;
799             int di = 1;
800             if(dct8x8_allowed && get_bits1(&h->gb)){
801                 mb_type |= MB_TYPE_8x8DCT;
802                 di = 4;
803             }
804
805 //                fill_intra4x4_pred_table(h);
806             for(i=0; i<16; i+=di){
807                 int mode= pred_intra_mode(h, i);
808
809                 if(!get_bits1(&h->gb)){
810                     const int rem_mode= get_bits(&h->gb, 3);
811                     mode = rem_mode + (rem_mode >= mode);
812                 }
813
814                 if(di==4)
815                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
816                 else
817                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
818             }
819             write_back_intra_pred_mode(h);
820             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
821                 return -1;
822         }else{
823             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
824             if(h->intra16x16_pred_mode < 0)
825                 return -1;
826         }
827         if(decode_chroma){
828             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
829             if(pred_mode < 0)
830                 return -1;
831             h->chroma_pred_mode= pred_mode;
832         } else {
833             h->chroma_pred_mode = DC_128_PRED8x8;
834         }
835     }else if(partition_count==4){
836         int i, j, sub_partition_count[4], list, ref[2][4];
837
838         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
839             for(i=0; i<4; i++){
840                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
841                 if(h->sub_mb_type[i] >=13){
842                     av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
843                     return -1;
844                 }
845                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
846                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
847             }
848             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
849                 ff_h264_pred_direct_motion(h, &mb_type);
850                 h->ref_cache[0][scan8[4]] =
851                 h->ref_cache[1][scan8[4]] =
852                 h->ref_cache[0][scan8[12]] =
853                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
854             }
855         }else{
856             av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
857             for(i=0; i<4; i++){
858                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
859                 if(h->sub_mb_type[i] >=4){
860                     av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
861                     return -1;
862                 }
863                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
864                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
865             }
866         }
867
868         for(list=0; list<h->list_count; list++){
869             int ref_count = IS_REF0(mb_type) ? 1 : local_ref_count[list];
870             for(i=0; i<4; i++){
871                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
872                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
873                     unsigned int tmp;
874                     if(ref_count == 1){
875                         tmp= 0;
876                     }else if(ref_count == 2){
877                         tmp= get_bits1(&h->gb)^1;
878                     }else{
879                         tmp= get_ue_golomb_31(&h->gb);
880                         if(tmp>=ref_count){
881                             av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
882                             return -1;
883                         }
884                     }
885                     ref[list][i]= tmp;
886                 }else{
887                  //FIXME
888                     ref[list][i] = -1;
889                 }
890             }
891         }
892
893         if(dct8x8_allowed)
894             dct8x8_allowed = get_dct8x8_allowed(h);
895
896         for(list=0; list<h->list_count; list++){
897             for(i=0; i<4; i++){
898                 if(IS_DIRECT(h->sub_mb_type[i])) {
899                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
900                     continue;
901                 }
902                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
903                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
904
905                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
906                     const int sub_mb_type= h->sub_mb_type[i];
907                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
908                     for(j=0; j<sub_partition_count[i]; j++){
909                         int mx, my;
910                         const int index= 4*i + block_width*j;
911                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
912                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
913                         mx += get_se_golomb(&h->gb);
914                         my += get_se_golomb(&h->gb);
915                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
916
917                         if(IS_SUB_8X8(sub_mb_type)){
918                             mv_cache[ 1 ][0]=
919                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
920                             mv_cache[ 1 ][1]=
921                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
922                         }else if(IS_SUB_8X4(sub_mb_type)){
923                             mv_cache[ 1 ][0]= mx;
924                             mv_cache[ 1 ][1]= my;
925                         }else if(IS_SUB_4X8(sub_mb_type)){
926                             mv_cache[ 8 ][0]= mx;
927                             mv_cache[ 8 ][1]= my;
928                         }
929                         mv_cache[ 0 ][0]= mx;
930                         mv_cache[ 0 ][1]= my;
931                     }
932                 }else{
933                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
934                     p[0] = p[1]=
935                     p[8] = p[9]= 0;
936                 }
937             }
938         }
939     }else if(IS_DIRECT(mb_type)){
940         ff_h264_pred_direct_motion(h, &mb_type);
941         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
942     }else{
943         int list, mx, my, i;
944          //FIXME we should set ref_idx_l? to 0 if we use that later ...
945         if(IS_16X16(mb_type)){
946             for(list=0; list<h->list_count; list++){
947                     unsigned int val;
948                     if(IS_DIR(mb_type, 0, list)){
949                         if(local_ref_count[list]==1){
950                             val= 0;
951                         } else if(local_ref_count[list]==2){
952                             val= get_bits1(&h->gb)^1;
953                         }else{
954                             val= get_ue_golomb_31(&h->gb);
955                             if (val >= local_ref_count[list]){
956                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
957                                 return -1;
958                             }
959                         }
960                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
961                     }
962             }
963             for(list=0; list<h->list_count; list++){
964                 if(IS_DIR(mb_type, 0, list)){
965                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
966                     mx += get_se_golomb(&h->gb);
967                     my += get_se_golomb(&h->gb);
968                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
969
970                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
971                 }
972             }
973         }
974         else if(IS_16X8(mb_type)){
975             for(list=0; list<h->list_count; list++){
976                     for(i=0; i<2; i++){
977                         unsigned int val;
978                         if(IS_DIR(mb_type, i, list)){
979                             if(local_ref_count[list] == 1) {
980                                 val= 0;
981                             } else if(local_ref_count[list] == 2) {
982                                 val= get_bits1(&h->gb)^1;
983                             }else{
984                                 val= get_ue_golomb_31(&h->gb);
985                                 if (val >= local_ref_count[list]){
986                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
987                                     return -1;
988                                 }
989                             }
990                         }else
991                             val= LIST_NOT_USED&0xFF;
992                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
993                     }
994             }
995             for(list=0; list<h->list_count; list++){
996                 for(i=0; i<2; i++){
997                     unsigned int val;
998                     if(IS_DIR(mb_type, i, list)){
999                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1000                         mx += get_se_golomb(&h->gb);
1001                         my += get_se_golomb(&h->gb);
1002                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1003
1004                         val= pack16to32(mx,my);
1005                     }else
1006                         val=0;
1007                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1008                 }
1009             }
1010         }else{
1011             av_assert2(IS_8X16(mb_type));
1012             for(list=0; list<h->list_count; list++){
1013                     for(i=0; i<2; i++){
1014                         unsigned int val;
1015                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1016                             if(local_ref_count[list]==1){
1017                                 val= 0;
1018                             } else if(local_ref_count[list]==2){
1019                                 val= get_bits1(&h->gb)^1;
1020                             }else{
1021                                 val= get_ue_golomb_31(&h->gb);
1022                                 if (val >= local_ref_count[list]){
1023                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1024                                     return -1;
1025                                 }
1026                             }
1027                         }else
1028                             val= LIST_NOT_USED&0xFF;
1029                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1030                     }
1031             }
1032             for(list=0; list<h->list_count; list++){
1033                 for(i=0; i<2; i++){
1034                     unsigned int val;
1035                     if(IS_DIR(mb_type, i, list)){
1036                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1037                         mx += get_se_golomb(&h->gb);
1038                         my += get_se_golomb(&h->gb);
1039                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1040
1041                         val= pack16to32(mx,my);
1042                     }else
1043                         val=0;
1044                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1045                 }
1046             }
1047         }
1048     }
1049
1050     if(IS_INTER(mb_type))
1051         write_back_motion(h, mb_type);
1052
1053     if(!IS_INTRA16x16(mb_type)){
1054         cbp= get_ue_golomb(&h->gb);
1055
1056         if(decode_chroma){
1057             if(cbp > 47){
1058                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1059                 return -1;
1060             }
1061             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1062             else                     cbp= golomb_to_inter_cbp   [cbp];
1063         }else{
1064             if(cbp > 15){
1065                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1066                 return -1;
1067             }
1068             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1069             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1070         }
1071     } else {
1072         if (!decode_chroma && cbp>15) {
1073             av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1074             return AVERROR_INVALIDDATA;
1075         }
1076     }
1077
1078     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1079         mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1080     }
1081     h->cbp=
1082     h->cbp_table[mb_xy]= cbp;
1083     h->cur_pic.mb_type[mb_xy] = mb_type;
1084
1085     if(cbp || IS_INTRA16x16(mb_type)){
1086         int i4x4, i8x8, chroma_idx;
1087         int dquant;
1088         int ret;
1089         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1090         const uint8_t *scan, *scan8x8;
1091         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1092
1093         if(IS_INTERLACED(mb_type)){
1094             scan8x8= h->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1095             scan= h->qscale ? h->field_scan : h->field_scan_q0;
1096         }else{
1097             scan8x8= h->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1098             scan= h->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1099         }
1100
1101         dquant= get_se_golomb(&h->gb);
1102
1103         h->qscale += dquant;
1104
1105         if(((unsigned)h->qscale) > max_qp){
1106             if(h->qscale<0) h->qscale+= max_qp+1;
1107             else            h->qscale-= max_qp+1;
1108             if(((unsigned)h->qscale) > max_qp){
1109                 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1110                 return -1;
1111             }
1112         }
1113
1114         h->chroma_qp[0]= get_chroma_qp(h, 0, h->qscale);
1115         h->chroma_qp[1]= get_chroma_qp(h, 1, h->qscale);
1116
1117         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1118             return -1;
1119         }
1120         h->cbp_table[mb_xy] |= ret << 12;
1121         if (CHROMA444(h)) {
1122             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1123                 return -1;
1124             }
1125             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1126                 return -1;
1127             }
1128         } else {
1129             const int num_c8x8 = h->sps.chroma_format_idc;
1130
1131             if(cbp&0x30){
1132                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1133                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1134                                         CHROMA_DC_BLOCK_INDEX+chroma_idx,
1135                                         CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
1136                                         NULL, 4*num_c8x8) < 0) {
1137                         return -1;
1138                     }
1139             }
1140
1141             if(cbp&0x20){
1142                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1143                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1144                     int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1145                     for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1146                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
1147                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1148                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1149                                 return -1;
1150                             mb += 16 << pixel_shift;
1151                         }
1152                     }
1153                 }
1154             }else{
1155                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1156                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1157             }
1158         }
1159     }else{
1160         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1161         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1162         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1163     }
1164     h->cur_pic.qscale_table[mb_xy] = h->qscale;
1165     write_back_non_zero_count(h);
1166
1167     return 0;
1168 }