]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
timecode: support >24h timecode.
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29 #define UNCHECKED_BITSTREAM_READER 1
30
31 #include "internal.h"
32 #include "avcodec.h"
33 #include "mpegvideo.h"
34 #include "h264.h"
35 #include "h264data.h" // FIXME FIXME FIXME
36 #include "h264_mvpred.h"
37 #include "golomb.h"
38
39 //#undef NDEBUG
40 #include <assert.h>
41
42 static const uint8_t golomb_to_inter_cbp_gray[16]={
43  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
44 };
45
46 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
47 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
48 };
49
50 static const uint8_t chroma_dc_coeff_token_len[4*5]={
51  2, 0, 0, 0,
52  6, 1, 0, 0,
53  6, 6, 3, 0,
54  6, 7, 7, 6,
55  6, 8, 8, 7,
56 };
57
58 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
59  1, 0, 0, 0,
60  7, 1, 0, 0,
61  4, 6, 1, 0,
62  3, 3, 2, 5,
63  2, 3, 2, 0,
64 };
65
66 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
67   1,  0,  0,  0,
68   7,  2,  0,  0,
69   7,  7,  3,  0,
70   9,  7,  7,  5,
71   9,  9,  7,  6,
72  10, 10,  9,  7,
73  11, 11, 10,  7,
74  12, 12, 11, 10,
75  13, 12, 12, 11,
76 };
77
78 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
79   1,   0,  0, 0,
80  15,   1,  0, 0,
81  14,  13,  1, 0,
82   7,  12, 11, 1,
83   6,   5, 10, 1,
84   7,   6,  4, 9,
85   7,   6,  5, 8,
86   7,   6,  5, 4,
87   7,   5,  4, 4,
88 };
89
90 static const uint8_t coeff_token_len[4][4*17]={
91 {
92      1, 0, 0, 0,
93      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
94     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
95     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
96     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
97 },
98 {
99      2, 0, 0, 0,
100      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
101      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
102     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
103     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
104 },
105 {
106      4, 0, 0, 0,
107      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
108      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
109      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
110     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
111 },
112 {
113      6, 0, 0, 0,
114      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
118 }
119 };
120
121 static const uint8_t coeff_token_bits[4][4*17]={
122 {
123      1, 0, 0, 0,
124      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
125      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
126     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
127     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
128 },
129 {
130      3, 0, 0, 0,
131     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
132      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
133     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
134     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
135 },
136 {
137     15, 0, 0, 0,
138     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
139     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
140     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
141     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
142 },
143 {
144      3, 0, 0, 0,
145      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
146     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
147     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
148     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
149 }
150 };
151
152 static const uint8_t total_zeros_len[16][16]= {
153     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
154     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
155     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
156     {5,3,4,4,3,3,3,4,3,4,5,5,5},
157     {4,4,4,3,3,3,3,3,4,5,4,5},
158     {6,5,3,3,3,3,3,3,4,3,6},
159     {6,5,3,3,3,2,3,4,3,6},
160     {6,4,5,3,2,2,3,3,6},
161     {6,6,4,2,2,3,2,5},
162     {5,5,3,2,2,2,4},
163     {4,4,3,3,1,3},
164     {4,4,2,1,3},
165     {3,3,1,2},
166     {2,2,1},
167     {1,1},
168 };
169
170 static const uint8_t total_zeros_bits[16][16]= {
171     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
172     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
173     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
174     {3,7,5,4,6,5,4,3,3,2,2,1,0},
175     {5,4,3,7,6,5,4,3,2,1,1,0},
176     {1,1,7,6,5,4,3,2,1,1,0},
177     {1,1,5,4,3,3,2,1,1,0},
178     {1,1,1,3,3,2,2,1,0},
179     {1,0,1,3,2,1,1,1},
180     {1,0,1,3,2,1,1},
181     {0,1,1,2,1,3},
182     {0,1,1,1,1},
183     {0,1,1,1},
184     {0,1,1},
185     {0,1},
186 };
187
188 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
189     { 1, 2, 3, 3,},
190     { 1, 2, 2, 0,},
191     { 1, 1, 0, 0,},
192 };
193
194 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
195     { 1, 1, 1, 0,},
196     { 1, 1, 0, 0,},
197     { 1, 0, 0, 0,},
198 };
199
200 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
201     { 1, 3, 3, 4, 4, 4, 5, 5 },
202     { 3, 2, 3, 3, 3, 3, 3 },
203     { 3, 3, 2, 2, 3, 3 },
204     { 3, 2, 2, 2, 3 },
205     { 2, 2, 2, 2 },
206     { 2, 2, 1 },
207     { 1, 1 },
208 };
209
210 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
211     { 1, 2, 3, 2, 3, 1, 1, 0 },
212     { 0, 1, 1, 4, 5, 6, 7 },
213     { 0, 1, 1, 2, 6, 7 },
214     { 6, 0, 1, 2, 7 },
215     { 0, 1, 2, 3 },
216     { 0, 1, 1 },
217     { 0, 1 },
218 };
219
220 static const uint8_t run_len[7][16]={
221     {1,1},
222     {1,2,2},
223     {2,2,2,2},
224     {2,2,2,3,3},
225     {2,2,3,3,3,3},
226     {2,3,3,3,3,3,3},
227     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
228 };
229
230 static const uint8_t run_bits[7][16]={
231     {1,0},
232     {1,1,0},
233     {3,2,1,0},
234     {3,2,1,1,0},
235     {3,2,3,2,1,0},
236     {3,0,1,3,2,5,4},
237     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
238 };
239
240 static VLC coeff_token_vlc[4];
241 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
242 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243
244 static VLC chroma_dc_coeff_token_vlc;
245 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
246 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247
248 static VLC chroma422_dc_coeff_token_vlc;
249 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
250 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251
252 static VLC total_zeros_vlc[15];
253 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
254 static const int total_zeros_vlc_tables_size = 512;
255
256 static VLC chroma_dc_total_zeros_vlc[3];
257 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
258 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259
260 static VLC chroma422_dc_total_zeros_vlc[7];
261 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
262 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263
264 static VLC run_vlc[6];
265 static VLC_TYPE run_vlc_tables[6][8][2];
266 static const int run_vlc_tables_size = 8;
267
268 static VLC run7_vlc;
269 static VLC_TYPE run7_vlc_table[96][2];
270 static const int run7_vlc_table_size = 96;
271
272 #define LEVEL_TAB_BITS 8
273 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274
275 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
276 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
277 #define COEFF_TOKEN_VLC_BITS           8
278 #define TOTAL_ZEROS_VLC_BITS           9
279 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
280 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
281 #define RUN_VLC_BITS                   3
282 #define RUN7_VLC_BITS                  6
283
284 /**
285  * Get the predicted number of non-zero coefficients.
286  * @param n block index
287  */
288 static inline int pred_non_zero_count(H264Context *h, int n){
289     const int index8= scan8[n];
290     const int left= h->non_zero_count_cache[index8 - 1];
291     const int top = h->non_zero_count_cache[index8 - 8];
292     int i= left + top;
293
294     if(i<64) i= (i+1)>>1;
295
296     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
297
298     return i&31;
299 }
300
301 static av_cold void init_cavlc_level_tab(void){
302     int suffix_length;
303     unsigned int i;
304
305     for(suffix_length=0; suffix_length<7; suffix_length++){
306         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
307             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
308
309             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
310                 int level_code = (prefix << suffix_length) +
311                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
312                 int mask = -(level_code&1);
313                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
314                 cavlc_level_tab[suffix_length][i][0]= level_code;
315                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
316             }else if(prefix + 1 <= LEVEL_TAB_BITS){
317                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
318                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
319             }else{
320                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
321                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
322             }
323         }
324     }
325 }
326
327 av_cold void ff_h264_decode_init_vlc(void){
328     static int done = 0;
329
330     if (!done) {
331         int i;
332         int offset;
333         done = 1;
334
335         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
336         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
337         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
338                  &chroma_dc_coeff_token_len [0], 1, 1,
339                  &chroma_dc_coeff_token_bits[0], 1, 1,
340                  INIT_VLC_USE_NEW_STATIC);
341
342         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
343         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
344         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
345                  &chroma422_dc_coeff_token_len [0], 1, 1,
346                  &chroma422_dc_coeff_token_bits[0], 1, 1,
347                  INIT_VLC_USE_NEW_STATIC);
348
349         offset = 0;
350         for(i=0; i<4; i++){
351             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
352             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
353             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
354                      &coeff_token_len [i][0], 1, 1,
355                      &coeff_token_bits[i][0], 1, 1,
356                      INIT_VLC_USE_NEW_STATIC);
357             offset += coeff_token_vlc_tables_size[i];
358         }
359         /*
360          * This is a one time safety check to make sure that
361          * the packed static coeff_token_vlc table sizes
362          * were initialized correctly.
363          */
364         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
365
366         for(i=0; i<3; i++){
367             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
368             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
369             init_vlc(&chroma_dc_total_zeros_vlc[i],
370                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
371                      &chroma_dc_total_zeros_len [i][0], 1, 1,
372                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
373                      INIT_VLC_USE_NEW_STATIC);
374         }
375
376         for(i=0; i<7; i++){
377             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
378             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
379             init_vlc(&chroma422_dc_total_zeros_vlc[i],
380                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
381                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
382                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
383                      INIT_VLC_USE_NEW_STATIC);
384         }
385
386         for(i=0; i<15; i++){
387             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
388             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
389             init_vlc(&total_zeros_vlc[i],
390                      TOTAL_ZEROS_VLC_BITS, 16,
391                      &total_zeros_len [i][0], 1, 1,
392                      &total_zeros_bits[i][0], 1, 1,
393                      INIT_VLC_USE_NEW_STATIC);
394         }
395
396         for(i=0; i<6; i++){
397             run_vlc[i].table = run_vlc_tables[i];
398             run_vlc[i].table_allocated = run_vlc_tables_size;
399             init_vlc(&run_vlc[i],
400                      RUN_VLC_BITS, 7,
401                      &run_len [i][0], 1, 1,
402                      &run_bits[i][0], 1, 1,
403                      INIT_VLC_USE_NEW_STATIC);
404         }
405         run7_vlc.table = run7_vlc_table,
406         run7_vlc.table_allocated = run7_vlc_table_size;
407         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
408                  &run_len [6][0], 1, 1,
409                  &run_bits[6][0], 1, 1,
410                  INIT_VLC_USE_NEW_STATIC);
411
412         init_cavlc_level_tab();
413     }
414 }
415
416 /**
417  *
418  */
419 static inline int get_level_prefix(GetBitContext *gb){
420     unsigned int buf;
421     int log;
422
423     OPEN_READER(re, gb);
424     UPDATE_CACHE(re, gb);
425     buf=GET_CACHE(re, gb);
426
427     log= 32 - av_log2(buf);
428 #ifdef TRACE
429     print_bin(buf>>(32-log), log);
430     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
431 #endif
432
433     LAST_SKIP_BITS(re, gb, log);
434     CLOSE_READER(re, gb);
435
436     return log-1;
437 }
438
439 /**
440  * Decode a residual block.
441  * @param n block index
442  * @param scantable scantable
443  * @param max_coeff number of coefficients in the block
444  * @return <0 if an error occurred
445  */
446 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
447     MpegEncContext * const s = &h->s;
448     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
449     int level[16];
450     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
451
452     //FIXME put trailing_onex into the context
453
454     if(max_coeff <= 8){
455         if (max_coeff == 4)
456             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
457         else
458             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
459         total_coeff= coeff_token>>2;
460     }else{
461         if(n >= LUMA_DC_BLOCK_INDEX){
462             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
463             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
464             total_coeff= coeff_token>>2;
465         }else{
466             total_coeff= pred_non_zero_count(h, n);
467             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
468             total_coeff= coeff_token>>2;
469         }
470     }
471     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
472
473     //FIXME set last_non_zero?
474
475     if(total_coeff==0)
476         return 0;
477     if(total_coeff > (unsigned)max_coeff) {
478         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
479         return -1;
480     }
481
482     trailing_ones= coeff_token&3;
483     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
484     assert(total_coeff<=16);
485
486     i = show_bits(gb, 3);
487     skip_bits(gb, trailing_ones);
488     level[0] = 1-((i&4)>>1);
489     level[1] = 1-((i&2)   );
490     level[2] = 1-((i&1)<<1);
491
492     if(trailing_ones<total_coeff) {
493         int mask, prefix;
494         int suffix_length = total_coeff > 10 & trailing_ones < 3;
495         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
496         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
497
498         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
499         if(level_code >= 100){
500             prefix= level_code - 100;
501             if(prefix == LEVEL_TAB_BITS)
502                 prefix += get_level_prefix(gb);
503
504             //first coefficient has suffix_length equal to 0 or 1
505             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
506                 if(suffix_length)
507                     level_code= (prefix<<1) + get_bits1(gb); //part
508                 else
509                     level_code= prefix; //part
510             }else if(prefix==14){
511                 if(suffix_length)
512                     level_code= (prefix<<1) + get_bits1(gb); //part
513                 else
514                     level_code= prefix + get_bits(gb, 4); //part
515             }else{
516                 level_code= 30 + get_bits(gb, prefix-3); //part
517                 if(prefix>=16){
518                     if(prefix > 25+3){
519                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
520                         return -1;
521                     }
522                     level_code += (1<<(prefix-3))-4096;
523                 }
524             }
525
526             if(trailing_ones < 3) level_code += 2;
527
528             suffix_length = 2;
529             mask= -(level_code&1);
530             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
531         }else{
532             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
533
534             suffix_length = 1 + (level_code + 3U > 6U);
535             level[trailing_ones]= level_code;
536         }
537
538         //remaining coefficients have suffix_length > 0
539         for(i=trailing_ones+1;i<total_coeff;i++) {
540             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
541             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
542             level_code= cavlc_level_tab[suffix_length][bitsi][0];
543
544             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
545             if(level_code >= 100){
546                 prefix= level_code - 100;
547                 if(prefix == LEVEL_TAB_BITS){
548                     prefix += get_level_prefix(gb);
549                 }
550                 if(prefix<15){
551                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
552                 }else{
553                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
554                     if(prefix>=16)
555                         level_code += (1<<(prefix-3))-4096;
556                 }
557                 mask= -(level_code&1);
558                 level_code= (((2+level_code)>>1) ^ mask) - mask;
559             }
560             level[i]= level_code;
561             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
562         }
563     }
564
565     if(total_coeff == max_coeff)
566         zeros_left=0;
567     else{
568         if (max_coeff <= 8) {
569             if (max_coeff == 4)
570                 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
571                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
572             else
573                 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
574                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
575         } else {
576             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
577         }
578     }
579
580 #define STORE_BLOCK(type) \
581     scantable += zeros_left + total_coeff - 1; \
582     if(n >= LUMA_DC_BLOCK_INDEX){ \
583         ((type*)block)[*scantable] = level[0]; \
584         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
585             if(zeros_left < 7) \
586                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
587             else \
588                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
589             zeros_left -= run_before; \
590             scantable -= 1 + run_before; \
591             ((type*)block)[*scantable]= level[i]; \
592         } \
593         for(;i<total_coeff;i++) { \
594             scantable--; \
595             ((type*)block)[*scantable]= level[i]; \
596         } \
597     }else{ \
598         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
599         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
600             if(zeros_left < 7) \
601                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
602             else \
603                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
604             zeros_left -= run_before; \
605             scantable -= 1 + run_before; \
606             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
607         } \
608         for(;i<total_coeff;i++) { \
609             scantable--; \
610             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
611         } \
612     }
613
614     if (h->pixel_shift) {
615         STORE_BLOCK(int32_t)
616     } else {
617         STORE_BLOCK(int16_t)
618     }
619
620     if(zeros_left<0){
621         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
622         return -1;
623     }
624
625     return 0;
626 }
627
628 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
629     int i4x4, i8x8;
630     MpegEncContext * const s = &h->s;
631     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
632     if(IS_INTRA16x16(mb_type)){
633         AV_ZERO128(h->mb_luma_dc[p]+0);
634         AV_ZERO128(h->mb_luma_dc[p]+8);
635         AV_ZERO128(h->mb_luma_dc[p]+16);
636         AV_ZERO128(h->mb_luma_dc[p]+24);
637         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
638             return -1; //FIXME continue if partitioned and other return -1 too
639         }
640
641         assert((cbp&15) == 0 || (cbp&15) == 15);
642
643         if(cbp&15){
644             for(i8x8=0; i8x8<4; i8x8++){
645                 for(i4x4=0; i4x4<4; i4x4++){
646                     const int index= i4x4 + 4*i8x8 + p*16;
647                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
648                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
649                         return -1;
650                     }
651                 }
652             }
653             return 0xf;
654         }else{
655             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
656             return 0;
657         }
658     }else{
659         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
660         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
661         int new_cbp = 0;
662         for(i8x8=0; i8x8<4; i8x8++){
663             if(cbp & (1<<i8x8)){
664                 if(IS_8x8DCT(mb_type)){
665                     DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
666                     uint8_t *nnz;
667                     for(i4x4=0; i4x4<4; i4x4++){
668                         const int index= i4x4 + 4*i8x8 + p*16;
669                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
670                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
671                             return -1;
672                     }
673                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
674                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
675                     new_cbp |= !!nnz[0] << i8x8;
676                 }else{
677                     for(i4x4=0; i4x4<4; i4x4++){
678                         const int index= i4x4 + 4*i8x8 + p*16;
679                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
680                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
681                             return -1;
682                         }
683                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
684                     }
685                 }
686             }else{
687                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
688                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
689             }
690         }
691         return new_cbp;
692     }
693 }
694
695 int ff_h264_decode_mb_cavlc(H264Context *h){
696     MpegEncContext * const s = &h->s;
697     int mb_xy;
698     int partition_count;
699     unsigned int mb_type, cbp;
700     int dct8x8_allowed= h->pps.transform_8x8_mode;
701     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
702     const int pixel_shift = h->pixel_shift;
703
704     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
705
706     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
707     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
708                 down the code */
709     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
710         if(s->mb_skip_run==-1)
711             s->mb_skip_run= get_ue_golomb(&s->gb);
712
713         if (s->mb_skip_run--) {
714             if(FRAME_MBAFF && (s->mb_y&1) == 0){
715                 if(s->mb_skip_run==0)
716                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
717             }
718             decode_mb_skip(h);
719             return 0;
720         }
721     }
722     if(FRAME_MBAFF){
723         if( (s->mb_y&1) == 0 )
724             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
725     }
726
727     h->prev_mb_skipped= 0;
728
729     mb_type= get_ue_golomb(&s->gb);
730     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
731         if(mb_type < 23){
732             partition_count= b_mb_type_info[mb_type].partition_count;
733             mb_type=         b_mb_type_info[mb_type].type;
734         }else{
735             mb_type -= 23;
736             goto decode_intra_mb;
737         }
738     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
739         if(mb_type < 5){
740             partition_count= p_mb_type_info[mb_type].partition_count;
741             mb_type=         p_mb_type_info[mb_type].type;
742         }else{
743             mb_type -= 5;
744             goto decode_intra_mb;
745         }
746     }else{
747        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
748         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
749             mb_type--;
750 decode_intra_mb:
751         if(mb_type > 25){
752             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
753             return -1;
754         }
755         partition_count=0;
756         cbp= i_mb_type_info[mb_type].cbp;
757         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
758         mb_type= i_mb_type_info[mb_type].type;
759     }
760
761     if(MB_FIELD)
762         mb_type |= MB_TYPE_INTERLACED;
763
764     h->slice_table[ mb_xy ]= h->slice_num;
765
766     if(IS_INTRA_PCM(mb_type)){
767         unsigned int x;
768         static const uint16_t mb_sizes[4] = {256,384,512,768};
769         const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
770
771         // We assume these blocks are very rare so we do not optimize it.
772         align_get_bits(&s->gb);
773
774         // The pixels are stored in the same order as levels in h->mb array.
775         for(x=0; x < mb_size; x++){
776             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
777         }
778
779         // In deblocking, the quantizer is 0
780         s->current_picture.f.qscale_table[mb_xy] = 0;
781         // All coeffs are present
782         memset(h->non_zero_count[mb_xy], 16, 48);
783
784         s->current_picture.f.mb_type[mb_xy] = mb_type;
785         return 0;
786     }
787
788     if(MB_MBAFF){
789         h->ref_count[0] <<= 1;
790         h->ref_count[1] <<= 1;
791     }
792
793     fill_decode_neighbors(h, mb_type);
794     fill_decode_caches(h, mb_type);
795
796     //mb_pred
797     if(IS_INTRA(mb_type)){
798         int pred_mode;
799 //            init_top_left_availability(h);
800         if(IS_INTRA4x4(mb_type)){
801             int i;
802             int di = 1;
803             if(dct8x8_allowed && get_bits1(&s->gb)){
804                 mb_type |= MB_TYPE_8x8DCT;
805                 di = 4;
806             }
807
808 //                fill_intra4x4_pred_table(h);
809             for(i=0; i<16; i+=di){
810                 int mode= pred_intra_mode(h, i);
811
812                 if(!get_bits1(&s->gb)){
813                     const int rem_mode= get_bits(&s->gb, 3);
814                     mode = rem_mode + (rem_mode >= mode);
815                 }
816
817                 if(di==4)
818                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
819                 else
820                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
821             }
822             write_back_intra_pred_mode(h);
823             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
824                 return -1;
825         }else{
826             h->intra16x16_pred_mode= ff_h264_check_intra16x16_pred_mode(h, h->intra16x16_pred_mode);
827             if(h->intra16x16_pred_mode < 0)
828                 return -1;
829         }
830         if(decode_chroma){
831             pred_mode= ff_h264_check_intra_chroma_pred_mode(h, get_ue_golomb_31(&s->gb));
832             if(pred_mode < 0)
833                 return -1;
834             h->chroma_pred_mode= pred_mode;
835         } else {
836             h->chroma_pred_mode = DC_128_PRED8x8;
837         }
838     }else if(partition_count==4){
839         int i, j, sub_partition_count[4], list, ref[2][4];
840
841         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
842             for(i=0; i<4; i++){
843                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
844                 if(h->sub_mb_type[i] >=13){
845                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
846                     return -1;
847                 }
848                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
849                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
850             }
851             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
852                 ff_h264_pred_direct_motion(h, &mb_type);
853                 h->ref_cache[0][scan8[4]] =
854                 h->ref_cache[1][scan8[4]] =
855                 h->ref_cache[0][scan8[12]] =
856                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
857             }
858         }else{
859             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
860             for(i=0; i<4; i++){
861                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
862                 if(h->sub_mb_type[i] >=4){
863                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
864                     return -1;
865                 }
866                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
867                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
868             }
869         }
870
871         for(list=0; list<h->list_count; list++){
872             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
873             for(i=0; i<4; i++){
874                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
875                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
876                     unsigned int tmp;
877                     if(ref_count == 1){
878                         tmp= 0;
879                     }else if(ref_count == 2){
880                         tmp= get_bits1(&s->gb)^1;
881                     }else{
882                         tmp= get_ue_golomb_31(&s->gb);
883                         if(tmp>=ref_count){
884                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
885                             return -1;
886                         }
887                     }
888                     ref[list][i]= tmp;
889                 }else{
890                  //FIXME
891                     ref[list][i] = -1;
892                 }
893             }
894         }
895
896         if(dct8x8_allowed)
897             dct8x8_allowed = get_dct8x8_allowed(h);
898
899         for(list=0; list<h->list_count; list++){
900             for(i=0; i<4; i++){
901                 if(IS_DIRECT(h->sub_mb_type[i])) {
902                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
903                     continue;
904                 }
905                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
906                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
907
908                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
909                     const int sub_mb_type= h->sub_mb_type[i];
910                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
911                     for(j=0; j<sub_partition_count[i]; j++){
912                         int mx, my;
913                         const int index= 4*i + block_width*j;
914                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
915                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
916                         mx += get_se_golomb(&s->gb);
917                         my += get_se_golomb(&s->gb);
918                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
919
920                         if(IS_SUB_8X8(sub_mb_type)){
921                             mv_cache[ 1 ][0]=
922                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
923                             mv_cache[ 1 ][1]=
924                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
925                         }else if(IS_SUB_8X4(sub_mb_type)){
926                             mv_cache[ 1 ][0]= mx;
927                             mv_cache[ 1 ][1]= my;
928                         }else if(IS_SUB_4X8(sub_mb_type)){
929                             mv_cache[ 8 ][0]= mx;
930                             mv_cache[ 8 ][1]= my;
931                         }
932                         mv_cache[ 0 ][0]= mx;
933                         mv_cache[ 0 ][1]= my;
934                     }
935                 }else{
936                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
937                     p[0] = p[1]=
938                     p[8] = p[9]= 0;
939                 }
940             }
941         }
942     }else if(IS_DIRECT(mb_type)){
943         ff_h264_pred_direct_motion(h, &mb_type);
944         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
945     }else{
946         int list, mx, my, i;
947          //FIXME we should set ref_idx_l? to 0 if we use that later ...
948         if(IS_16X16(mb_type)){
949             for(list=0; list<h->list_count; list++){
950                     unsigned int val;
951                     if(IS_DIR(mb_type, 0, list)){
952                         if(h->ref_count[list]==1){
953                             val= 0;
954                         }else if(h->ref_count[list]==2){
955                             val= get_bits1(&s->gb)^1;
956                         }else{
957                             val= get_ue_golomb_31(&s->gb);
958                             if(val >= h->ref_count[list]){
959                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
960                                 return -1;
961                             }
962                         }
963                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
964                     }
965             }
966             for(list=0; list<h->list_count; list++){
967                 if(IS_DIR(mb_type, 0, list)){
968                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
969                     mx += get_se_golomb(&s->gb);
970                     my += get_se_golomb(&s->gb);
971                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
972
973                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
974                 }
975             }
976         }
977         else if(IS_16X8(mb_type)){
978             for(list=0; list<h->list_count; list++){
979                     for(i=0; i<2; i++){
980                         unsigned int val;
981                         if(IS_DIR(mb_type, i, list)){
982                             if(h->ref_count[list] == 1){
983                                 val= 0;
984                             }else if(h->ref_count[list] == 2){
985                                 val= get_bits1(&s->gb)^1;
986                             }else{
987                                 val= get_ue_golomb_31(&s->gb);
988                                 if(val >= h->ref_count[list]){
989                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
990                                     return -1;
991                                 }
992                             }
993                         }else
994                             val= LIST_NOT_USED&0xFF;
995                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
996                     }
997             }
998             for(list=0; list<h->list_count; list++){
999                 for(i=0; i<2; i++){
1000                     unsigned int val;
1001                     if(IS_DIR(mb_type, i, list)){
1002                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1003                         mx += get_se_golomb(&s->gb);
1004                         my += get_se_golomb(&s->gb);
1005                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1006
1007                         val= pack16to32(mx,my);
1008                     }else
1009                         val=0;
1010                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1011                 }
1012             }
1013         }else{
1014             assert(IS_8X16(mb_type));
1015             for(list=0; list<h->list_count; list++){
1016                     for(i=0; i<2; i++){
1017                         unsigned int val;
1018                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1019                             if(h->ref_count[list]==1){
1020                                 val= 0;
1021                             }else if(h->ref_count[list]==2){
1022                                 val= get_bits1(&s->gb)^1;
1023                             }else{
1024                                 val= get_ue_golomb_31(&s->gb);
1025                                 if(val >= h->ref_count[list]){
1026                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1027                                     return -1;
1028                                 }
1029                             }
1030                         }else
1031                             val= LIST_NOT_USED&0xFF;
1032                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1033                     }
1034             }
1035             for(list=0; list<h->list_count; list++){
1036                 for(i=0; i<2; i++){
1037                     unsigned int val;
1038                     if(IS_DIR(mb_type, i, list)){
1039                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1040                         mx += get_se_golomb(&s->gb);
1041                         my += get_se_golomb(&s->gb);
1042                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1043
1044                         val= pack16to32(mx,my);
1045                     }else
1046                         val=0;
1047                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1048                 }
1049             }
1050         }
1051     }
1052
1053     if(IS_INTER(mb_type))
1054         write_back_motion(h, mb_type);
1055
1056     if(!IS_INTRA16x16(mb_type)){
1057         cbp= get_ue_golomb(&s->gb);
1058
1059         if(decode_chroma){
1060             if(cbp > 47){
1061                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1062                 return -1;
1063             }
1064             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1065             else                     cbp= golomb_to_inter_cbp   [cbp];
1066         }else{
1067             if(cbp > 15){
1068                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1069                 return -1;
1070             }
1071             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1072             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1073         }
1074     }
1075
1076     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1077         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1078     }
1079     h->cbp=
1080     h->cbp_table[mb_xy]= cbp;
1081     s->current_picture.f.mb_type[mb_xy] = mb_type;
1082
1083     if(cbp || IS_INTRA16x16(mb_type)){
1084         int i4x4, i8x8, chroma_idx;
1085         int dquant;
1086         int ret;
1087         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1088         const uint8_t *scan, *scan8x8;
1089         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1090
1091         if(IS_INTERLACED(mb_type)){
1092             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1093             scan= s->qscale ? h->field_scan : h->field_scan_q0;
1094         }else{
1095             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1096             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1097         }
1098
1099         dquant= get_se_golomb(&s->gb);
1100
1101         s->qscale += dquant;
1102
1103         if(((unsigned)s->qscale) > max_qp){
1104             if(s->qscale<0) s->qscale+= max_qp+1;
1105             else            s->qscale-= max_qp+1;
1106             if(((unsigned)s->qscale) > max_qp){
1107                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1108                 return -1;
1109             }
1110         }
1111
1112         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1113         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1114
1115         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1116             return -1;
1117         }
1118         h->cbp_table[mb_xy] |= ret << 12;
1119         if(CHROMA444){
1120             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1121                 return -1;
1122             }
1123             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1124                 return -1;
1125             }
1126         } else {
1127             const int num_c8x8 = h->sps.chroma_format_idc;
1128
1129             if(cbp&0x30){
1130                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1131                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1132                                         CHROMA_DC_BLOCK_INDEX+chroma_idx,
1133                                         CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1134                                         NULL, 4*num_c8x8) < 0) {
1135                         return -1;
1136                     }
1137             }
1138
1139             if(cbp&0x20){
1140                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1141                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1142                     DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1143                     for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1144                         for (i4x4=0; i4x4<4; i4x4++) {
1145                             const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1146                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1147                                 return -1;
1148                             mb += 16<<pixel_shift;
1149                         }
1150                     }
1151                 }
1152             }else{
1153                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1154                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1155             }
1156         }
1157     }else{
1158         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1159         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1160         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1161     }
1162     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1163     write_back_non_zero_count(h);
1164
1165     if(MB_MBAFF){
1166         h->ref_count[0] >>= 1;
1167         h->ref_count[1] >>= 1;
1168     }
1169
1170     return 0;
1171 }
1172