]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29 #define UNCHECKED_BITSTREAM_READER 1
30
31 #include "internal.h"
32 #include "avcodec.h"
33 #include "mpegvideo.h"
34 #include "h264.h"
35 #include "h264data.h" // FIXME FIXME FIXME
36 #include "h264_mvpred.h"
37 #include "golomb.h"
38
39 //#undef NDEBUG
40 #include <assert.h>
41
42 static const uint8_t golomb_to_inter_cbp_gray[16]={
43  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
44 };
45
46 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
47 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
48 };
49
50 static const uint8_t chroma_dc_coeff_token_len[4*5]={
51  2, 0, 0, 0,
52  6, 1, 0, 0,
53  6, 6, 3, 0,
54  6, 7, 7, 6,
55  6, 8, 8, 7,
56 };
57
58 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
59  1, 0, 0, 0,
60  7, 1, 0, 0,
61  4, 6, 1, 0,
62  3, 3, 2, 5,
63  2, 3, 2, 0,
64 };
65
66 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
67   1,  0,  0,  0,
68   7,  2,  0,  0,
69   7,  7,  3,  0,
70   9,  7,  7,  5,
71   9,  9,  7,  6,
72  10, 10,  9,  7,
73  11, 11, 10,  7,
74  12, 12, 11, 10,
75  13, 12, 12, 11,
76 };
77
78 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
79   1,   0,  0, 0,
80  15,   1,  0, 0,
81  14,  13,  1, 0,
82   7,  12, 11, 1,
83   6,   5, 10, 1,
84   7,   6,  4, 9,
85   7,   6,  5, 8,
86   7,   6,  5, 4,
87   7,   5,  4, 4,
88 };
89
90 static const uint8_t coeff_token_len[4][4*17]={
91 {
92      1, 0, 0, 0,
93      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
94     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
95     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
96     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
97 },
98 {
99      2, 0, 0, 0,
100      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
101      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
102     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
103     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
104 },
105 {
106      4, 0, 0, 0,
107      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
108      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
109      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
110     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
111 },
112 {
113      6, 0, 0, 0,
114      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
118 }
119 };
120
121 static const uint8_t coeff_token_bits[4][4*17]={
122 {
123      1, 0, 0, 0,
124      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
125      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
126     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
127     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
128 },
129 {
130      3, 0, 0, 0,
131     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
132      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
133     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
134     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
135 },
136 {
137     15, 0, 0, 0,
138     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
139     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
140     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
141     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
142 },
143 {
144      3, 0, 0, 0,
145      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
146     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
147     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
148     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
149 }
150 };
151
152 static const uint8_t total_zeros_len[16][16]= {
153     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
154     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
155     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
156     {5,3,4,4,3,3,3,4,3,4,5,5,5},
157     {4,4,4,3,3,3,3,3,4,5,4,5},
158     {6,5,3,3,3,3,3,3,4,3,6},
159     {6,5,3,3,3,2,3,4,3,6},
160     {6,4,5,3,2,2,3,3,6},
161     {6,6,4,2,2,3,2,5},
162     {5,5,3,2,2,2,4},
163     {4,4,3,3,1,3},
164     {4,4,2,1,3},
165     {3,3,1,2},
166     {2,2,1},
167     {1,1},
168 };
169
170 static const uint8_t total_zeros_bits[16][16]= {
171     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
172     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
173     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
174     {3,7,5,4,6,5,4,3,3,2,2,1,0},
175     {5,4,3,7,6,5,4,3,2,1,1,0},
176     {1,1,7,6,5,4,3,2,1,1,0},
177     {1,1,5,4,3,3,2,1,1,0},
178     {1,1,1,3,3,2,2,1,0},
179     {1,0,1,3,2,1,1,1},
180     {1,0,1,3,2,1,1},
181     {0,1,1,2,1,3},
182     {0,1,1,1,1},
183     {0,1,1,1},
184     {0,1,1},
185     {0,1},
186 };
187
188 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
189     { 1, 2, 3, 3,},
190     { 1, 2, 2, 0,},
191     { 1, 1, 0, 0,},
192 };
193
194 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
195     { 1, 1, 1, 0,},
196     { 1, 1, 0, 0,},
197     { 1, 0, 0, 0,},
198 };
199
200 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
201     { 1, 3, 3, 4, 4, 4, 5, 5 },
202     { 3, 2, 3, 3, 3, 3, 3 },
203     { 3, 3, 2, 2, 3, 3 },
204     { 3, 2, 2, 2, 3 },
205     { 2, 2, 2, 2 },
206     { 2, 2, 1 },
207     { 1, 1 },
208 };
209
210 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
211     { 1, 2, 3, 2, 3, 1, 1, 0 },
212     { 0, 1, 1, 4, 5, 6, 7 },
213     { 0, 1, 1, 2, 6, 7 },
214     { 6, 0, 1, 2, 7 },
215     { 0, 1, 2, 3 },
216     { 0, 1, 1 },
217     { 0, 1 },
218 };
219
220 static const uint8_t run_len[7][16]={
221     {1,1},
222     {1,2,2},
223     {2,2,2,2},
224     {2,2,2,3,3},
225     {2,2,3,3,3,3},
226     {2,3,3,3,3,3,3},
227     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
228 };
229
230 static const uint8_t run_bits[7][16]={
231     {1,0},
232     {1,1,0},
233     {3,2,1,0},
234     {3,2,1,1,0},
235     {3,2,3,2,1,0},
236     {3,0,1,3,2,5,4},
237     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
238 };
239
240 static VLC coeff_token_vlc[4];
241 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
242 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
243
244 static VLC chroma_dc_coeff_token_vlc;
245 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
246 static const int chroma_dc_coeff_token_vlc_table_size = 256;
247
248 static VLC chroma422_dc_coeff_token_vlc;
249 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
250 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
251
252 static VLC total_zeros_vlc[15];
253 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
254 static const int total_zeros_vlc_tables_size = 512;
255
256 static VLC chroma_dc_total_zeros_vlc[3];
257 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
258 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
259
260 static VLC chroma422_dc_total_zeros_vlc[7];
261 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
262 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
263
264 static VLC run_vlc[6];
265 static VLC_TYPE run_vlc_tables[6][8][2];
266 static const int run_vlc_tables_size = 8;
267
268 static VLC run7_vlc;
269 static VLC_TYPE run7_vlc_table[96][2];
270 static const int run7_vlc_table_size = 96;
271
272 #define LEVEL_TAB_BITS 8
273 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
274
275 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
276 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
277 #define COEFF_TOKEN_VLC_BITS           8
278 #define TOTAL_ZEROS_VLC_BITS           9
279 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
280 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
281 #define RUN_VLC_BITS                   3
282 #define RUN7_VLC_BITS                  6
283
284 /**
285  * Get the predicted number of non-zero coefficients.
286  * @param n block index
287  */
288 static inline int pred_non_zero_count(H264Context *h, int n){
289     const int index8= scan8[n];
290     const int left= h->non_zero_count_cache[index8 - 1];
291     const int top = h->non_zero_count_cache[index8 - 8];
292     int i= left + top;
293
294     if(i<64) i= (i+1)>>1;
295
296     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
297
298     return i&31;
299 }
300
301 static av_cold void init_cavlc_level_tab(void){
302     int suffix_length;
303     unsigned int i;
304
305     for(suffix_length=0; suffix_length<7; suffix_length++){
306         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
307             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
308
309             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
310                 int level_code = (prefix << suffix_length) +
311                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
312                 int mask = -(level_code&1);
313                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
314                 cavlc_level_tab[suffix_length][i][0]= level_code;
315                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
316             }else if(prefix + 1 <= LEVEL_TAB_BITS){
317                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
318                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
319             }else{
320                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
321                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
322             }
323         }
324     }
325 }
326
327 av_cold void ff_h264_decode_init_vlc(void){
328     static int done = 0;
329
330     if (!done) {
331         int i;
332         int offset;
333         done = 1;
334
335         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
336         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
337         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
338                  &chroma_dc_coeff_token_len [0], 1, 1,
339                  &chroma_dc_coeff_token_bits[0], 1, 1,
340                  INIT_VLC_USE_NEW_STATIC);
341
342         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
343         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
344         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
345                  &chroma422_dc_coeff_token_len [0], 1, 1,
346                  &chroma422_dc_coeff_token_bits[0], 1, 1,
347                  INIT_VLC_USE_NEW_STATIC);
348
349         offset = 0;
350         for(i=0; i<4; i++){
351             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
352             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
353             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
354                      &coeff_token_len [i][0], 1, 1,
355                      &coeff_token_bits[i][0], 1, 1,
356                      INIT_VLC_USE_NEW_STATIC);
357             offset += coeff_token_vlc_tables_size[i];
358         }
359         /*
360          * This is a one time safety check to make sure that
361          * the packed static coeff_token_vlc table sizes
362          * were initialized correctly.
363          */
364         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
365
366         for(i=0; i<3; i++){
367             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
368             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
369             init_vlc(&chroma_dc_total_zeros_vlc[i],
370                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
371                      &chroma_dc_total_zeros_len [i][0], 1, 1,
372                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
373                      INIT_VLC_USE_NEW_STATIC);
374         }
375
376         for(i=0; i<7; i++){
377             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
378             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
379             init_vlc(&chroma422_dc_total_zeros_vlc[i],
380                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
381                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
382                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
383                      INIT_VLC_USE_NEW_STATIC);
384         }
385
386         for(i=0; i<15; i++){
387             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
388             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
389             init_vlc(&total_zeros_vlc[i],
390                      TOTAL_ZEROS_VLC_BITS, 16,
391                      &total_zeros_len [i][0], 1, 1,
392                      &total_zeros_bits[i][0], 1, 1,
393                      INIT_VLC_USE_NEW_STATIC);
394         }
395
396         for(i=0; i<6; i++){
397             run_vlc[i].table = run_vlc_tables[i];
398             run_vlc[i].table_allocated = run_vlc_tables_size;
399             init_vlc(&run_vlc[i],
400                      RUN_VLC_BITS, 7,
401                      &run_len [i][0], 1, 1,
402                      &run_bits[i][0], 1, 1,
403                      INIT_VLC_USE_NEW_STATIC);
404         }
405         run7_vlc.table = run7_vlc_table,
406         run7_vlc.table_allocated = run7_vlc_table_size;
407         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
408                  &run_len [6][0], 1, 1,
409                  &run_bits[6][0], 1, 1,
410                  INIT_VLC_USE_NEW_STATIC);
411
412         init_cavlc_level_tab();
413     }
414 }
415
416 /**
417  *
418  */
419 static inline int get_level_prefix(GetBitContext *gb){
420     unsigned int buf;
421     int log;
422
423     OPEN_READER(re, gb);
424     UPDATE_CACHE(re, gb);
425     buf=GET_CACHE(re, gb);
426
427     log= 32 - av_log2(buf);
428 #ifdef TRACE
429     print_bin(buf>>(32-log), log);
430     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
431 #endif
432
433     LAST_SKIP_BITS(re, gb, log);
434     CLOSE_READER(re, gb);
435
436     return log-1;
437 }
438
439 /**
440  * Decode a residual block.
441  * @param n block index
442  * @param scantable scantable
443  * @param max_coeff number of coefficients in the block
444  * @return <0 if an error occurred
445  */
446 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
447     MpegEncContext * const s = &h->s;
448     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
449     int level[16];
450     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
451
452     //FIXME put trailing_onex into the context
453
454     if(max_coeff <= 8){
455         if (max_coeff == 4)
456             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
457         else
458             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
459         total_coeff= coeff_token>>2;
460     }else{
461         if(n >= LUMA_DC_BLOCK_INDEX){
462             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
463             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
464             total_coeff= coeff_token>>2;
465         }else{
466             total_coeff= pred_non_zero_count(h, n);
467             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
468             total_coeff= coeff_token>>2;
469         }
470     }
471     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
472
473     //FIXME set last_non_zero?
474
475     if(total_coeff==0)
476         return 0;
477     if(total_coeff > (unsigned)max_coeff) {
478         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
479         return -1;
480     }
481
482     trailing_ones= coeff_token&3;
483     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
484     assert(total_coeff<=16);
485
486     i = show_bits(gb, 3);
487     skip_bits(gb, trailing_ones);
488     level[0] = 1-((i&4)>>1);
489     level[1] = 1-((i&2)   );
490     level[2] = 1-((i&1)<<1);
491
492     if(trailing_ones<total_coeff) {
493         int mask, prefix;
494         int suffix_length = total_coeff > 10 & trailing_ones < 3;
495         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
496         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
497
498         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
499         if(level_code >= 100){
500             prefix= level_code - 100;
501             if(prefix == LEVEL_TAB_BITS)
502                 prefix += get_level_prefix(gb);
503
504             //first coefficient has suffix_length equal to 0 or 1
505             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
506                 if(suffix_length)
507                     level_code= (prefix<<1) + get_bits1(gb); //part
508                 else
509                     level_code= prefix; //part
510             }else if(prefix==14){
511                 if(suffix_length)
512                     level_code= (prefix<<1) + get_bits1(gb); //part
513                 else
514                     level_code= prefix + get_bits(gb, 4); //part
515             }else{
516                 level_code= 30;
517                 if(prefix>=16){
518                     if(prefix > 25+3){
519                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
520                         return -1;
521                     }
522                     level_code += (1<<(prefix-3))-4096;
523                 }
524                 level_code += get_bits(gb, prefix-3); //part
525             }
526
527             if(trailing_ones < 3) level_code += 2;
528
529             suffix_length = 2;
530             mask= -(level_code&1);
531             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
532         }else{
533             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
534
535             suffix_length = 1 + (level_code + 3U > 6U);
536             level[trailing_ones]= level_code;
537         }
538
539         //remaining coefficients have suffix_length > 0
540         for(i=trailing_ones+1;i<total_coeff;i++) {
541             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
542             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
543             level_code= cavlc_level_tab[suffix_length][bitsi][0];
544
545             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
546             if(level_code >= 100){
547                 prefix= level_code - 100;
548                 if(prefix == LEVEL_TAB_BITS){
549                     prefix += get_level_prefix(gb);
550                 }
551                 if(prefix<15){
552                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
553                 }else{
554                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
555                     if(prefix>=16)
556                         level_code += (1<<(prefix-3))-4096;
557                 }
558                 mask= -(level_code&1);
559                 level_code= (((2+level_code)>>1) ^ mask) - mask;
560             }
561             level[i]= level_code;
562             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
563         }
564     }
565
566     if(total_coeff == max_coeff)
567         zeros_left=0;
568     else{
569         if (max_coeff <= 8) {
570             if (max_coeff == 4)
571                 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
572                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
573             else
574                 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
575                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
576         } else {
577             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
578         }
579     }
580
581 #define STORE_BLOCK(type) \
582     scantable += zeros_left + total_coeff - 1; \
583     if(n >= LUMA_DC_BLOCK_INDEX){ \
584         ((type*)block)[*scantable] = level[0]; \
585         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
586             if(zeros_left < 7) \
587                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
588             else \
589                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
590             zeros_left -= run_before; \
591             scantable -= 1 + run_before; \
592             ((type*)block)[*scantable]= level[i]; \
593         } \
594         for(;i<total_coeff;i++) { \
595             scantable--; \
596             ((type*)block)[*scantable]= level[i]; \
597         } \
598     }else{ \
599         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
600         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
601             if(zeros_left < 7) \
602                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
603             else \
604                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
605             zeros_left -= run_before; \
606             scantable -= 1 + run_before; \
607             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
608         } \
609         for(;i<total_coeff;i++) { \
610             scantable--; \
611             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
612         } \
613     }
614
615     if (h->pixel_shift) {
616         STORE_BLOCK(int32_t)
617     } else {
618         STORE_BLOCK(int16_t)
619     }
620
621     if(zeros_left<0){
622         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
623         return -1;
624     }
625
626     return 0;
627 }
628
629 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
630     int i4x4, i8x8;
631     MpegEncContext * const s = &h->s;
632     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
633     if(IS_INTRA16x16(mb_type)){
634         AV_ZERO128(h->mb_luma_dc[p]+0);
635         AV_ZERO128(h->mb_luma_dc[p]+8);
636         AV_ZERO128(h->mb_luma_dc[p]+16);
637         AV_ZERO128(h->mb_luma_dc[p]+24);
638         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
639             return -1; //FIXME continue if partitioned and other return -1 too
640         }
641
642         assert((cbp&15) == 0 || (cbp&15) == 15);
643
644         if(cbp&15){
645             for(i8x8=0; i8x8<4; i8x8++){
646                 for(i4x4=0; i4x4<4; i4x4++){
647                     const int index= i4x4 + 4*i8x8 + p*16;
648                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
649                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
650                         return -1;
651                     }
652                 }
653             }
654             return 0xf;
655         }else{
656             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
657             return 0;
658         }
659     }else{
660         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
661         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
662         int new_cbp = 0;
663         for(i8x8=0; i8x8<4; i8x8++){
664             if(cbp & (1<<i8x8)){
665                 if(IS_8x8DCT(mb_type)){
666                     DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
667                     uint8_t *nnz;
668                     for(i4x4=0; i4x4<4; i4x4++){
669                         const int index= i4x4 + 4*i8x8 + p*16;
670                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
671                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
672                             return -1;
673                     }
674                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
675                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
676                     new_cbp |= !!nnz[0] << i8x8;
677                 }else{
678                     for(i4x4=0; i4x4<4; i4x4++){
679                         const int index= i4x4 + 4*i8x8 + p*16;
680                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
681                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
682                             return -1;
683                         }
684                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
685                     }
686                 }
687             }else{
688                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
689                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
690             }
691         }
692         return new_cbp;
693     }
694 }
695
696 int ff_h264_decode_mb_cavlc(H264Context *h){
697     MpegEncContext * const s = &h->s;
698     int mb_xy;
699     int partition_count;
700     unsigned int mb_type, cbp;
701     int dct8x8_allowed= h->pps.transform_8x8_mode;
702     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
703     const int pixel_shift = h->pixel_shift;
704
705     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
706
707     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
708     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
709                 down the code */
710     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
711         if(s->mb_skip_run==-1)
712             s->mb_skip_run= get_ue_golomb(&s->gb);
713
714         if (s->mb_skip_run--) {
715             if(FRAME_MBAFF && (s->mb_y&1) == 0){
716                 if(s->mb_skip_run==0)
717                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
718             }
719             decode_mb_skip(h);
720             return 0;
721         }
722     }
723     if(FRAME_MBAFF){
724         if( (s->mb_y&1) == 0 )
725             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
726     }
727
728     h->prev_mb_skipped= 0;
729
730     mb_type= get_ue_golomb(&s->gb);
731     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
732         if(mb_type < 23){
733             partition_count= b_mb_type_info[mb_type].partition_count;
734             mb_type=         b_mb_type_info[mb_type].type;
735         }else{
736             mb_type -= 23;
737             goto decode_intra_mb;
738         }
739     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
740         if(mb_type < 5){
741             partition_count= p_mb_type_info[mb_type].partition_count;
742             mb_type=         p_mb_type_info[mb_type].type;
743         }else{
744             mb_type -= 5;
745             goto decode_intra_mb;
746         }
747     }else{
748        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
749         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
750             mb_type--;
751 decode_intra_mb:
752         if(mb_type > 25){
753             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
754             return -1;
755         }
756         partition_count=0;
757         cbp= i_mb_type_info[mb_type].cbp;
758         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
759         mb_type= i_mb_type_info[mb_type].type;
760     }
761
762     if(MB_FIELD)
763         mb_type |= MB_TYPE_INTERLACED;
764
765     h->slice_table[ mb_xy ]= h->slice_num;
766
767     if(IS_INTRA_PCM(mb_type)){
768         unsigned int x;
769         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
770                             h->sps.bit_depth_luma >> 3;
771
772         // We assume these blocks are very rare so we do not optimize it.
773         align_get_bits(&s->gb);
774
775         // The pixels are stored in the same order as levels in h->mb array.
776         for(x=0; x < mb_size; x++){
777             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
778         }
779
780         // In deblocking, the quantizer is 0
781         s->current_picture.f.qscale_table[mb_xy] = 0;
782         // All coeffs are present
783         memset(h->non_zero_count[mb_xy], 16, 48);
784
785         s->current_picture.f.mb_type[mb_xy] = mb_type;
786         return 0;
787     }
788
789     if(MB_MBAFF){
790         h->ref_count[0] <<= 1;
791         h->ref_count[1] <<= 1;
792     }
793
794     fill_decode_neighbors(h, mb_type);
795     fill_decode_caches(h, mb_type);
796
797     //mb_pred
798     if(IS_INTRA(mb_type)){
799         int pred_mode;
800 //            init_top_left_availability(h);
801         if(IS_INTRA4x4(mb_type)){
802             int i;
803             int di = 1;
804             if(dct8x8_allowed && get_bits1(&s->gb)){
805                 mb_type |= MB_TYPE_8x8DCT;
806                 di = 4;
807             }
808
809 //                fill_intra4x4_pred_table(h);
810             for(i=0; i<16; i+=di){
811                 int mode= pred_intra_mode(h, i);
812
813                 if(!get_bits1(&s->gb)){
814                     const int rem_mode= get_bits(&s->gb, 3);
815                     mode = rem_mode + (rem_mode >= mode);
816                 }
817
818                 if(di==4)
819                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
820                 else
821                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
822             }
823             write_back_intra_pred_mode(h);
824             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
825                 return -1;
826         }else{
827             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
828             if(h->intra16x16_pred_mode < 0)
829                 return -1;
830         }
831         if(decode_chroma){
832             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
833             if(pred_mode < 0)
834                 return -1;
835             h->chroma_pred_mode= pred_mode;
836         } else {
837             h->chroma_pred_mode = DC_128_PRED8x8;
838         }
839     }else if(partition_count==4){
840         int i, j, sub_partition_count[4], list, ref[2][4];
841
842         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
843             for(i=0; i<4; i++){
844                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
845                 if(h->sub_mb_type[i] >=13){
846                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
847                     return -1;
848                 }
849                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
850                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
851             }
852             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
853                 ff_h264_pred_direct_motion(h, &mb_type);
854                 h->ref_cache[0][scan8[4]] =
855                 h->ref_cache[1][scan8[4]] =
856                 h->ref_cache[0][scan8[12]] =
857                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
858             }
859         }else{
860             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
861             for(i=0; i<4; i++){
862                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
863                 if(h->sub_mb_type[i] >=4){
864                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
865                     return -1;
866                 }
867                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
868                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
869             }
870         }
871
872         for(list=0; list<h->list_count; list++){
873             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
874             for(i=0; i<4; i++){
875                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
876                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
877                     unsigned int tmp;
878                     if(ref_count == 1){
879                         tmp= 0;
880                     }else if(ref_count == 2){
881                         tmp= get_bits1(&s->gb)^1;
882                     }else{
883                         tmp= get_ue_golomb_31(&s->gb);
884                         if(tmp>=ref_count){
885                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
886                             return -1;
887                         }
888                     }
889                     ref[list][i]= tmp;
890                 }else{
891                  //FIXME
892                     ref[list][i] = -1;
893                 }
894             }
895         }
896
897         if(dct8x8_allowed)
898             dct8x8_allowed = get_dct8x8_allowed(h);
899
900         for(list=0; list<h->list_count; list++){
901             for(i=0; i<4; i++){
902                 if(IS_DIRECT(h->sub_mb_type[i])) {
903                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
904                     continue;
905                 }
906                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
907                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
908
909                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
910                     const int sub_mb_type= h->sub_mb_type[i];
911                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
912                     for(j=0; j<sub_partition_count[i]; j++){
913                         int mx, my;
914                         const int index= 4*i + block_width*j;
915                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
916                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
917                         mx += get_se_golomb(&s->gb);
918                         my += get_se_golomb(&s->gb);
919                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
920
921                         if(IS_SUB_8X8(sub_mb_type)){
922                             mv_cache[ 1 ][0]=
923                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
924                             mv_cache[ 1 ][1]=
925                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
926                         }else if(IS_SUB_8X4(sub_mb_type)){
927                             mv_cache[ 1 ][0]= mx;
928                             mv_cache[ 1 ][1]= my;
929                         }else if(IS_SUB_4X8(sub_mb_type)){
930                             mv_cache[ 8 ][0]= mx;
931                             mv_cache[ 8 ][1]= my;
932                         }
933                         mv_cache[ 0 ][0]= mx;
934                         mv_cache[ 0 ][1]= my;
935                     }
936                 }else{
937                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
938                     p[0] = p[1]=
939                     p[8] = p[9]= 0;
940                 }
941             }
942         }
943     }else if(IS_DIRECT(mb_type)){
944         ff_h264_pred_direct_motion(h, &mb_type);
945         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
946     }else{
947         int list, mx, my, i;
948          //FIXME we should set ref_idx_l? to 0 if we use that later ...
949         if(IS_16X16(mb_type)){
950             for(list=0; list<h->list_count; list++){
951                     unsigned int val;
952                     if(IS_DIR(mb_type, 0, list)){
953                         if(h->ref_count[list]==1){
954                             val= 0;
955                         }else if(h->ref_count[list]==2){
956                             val= get_bits1(&s->gb)^1;
957                         }else{
958                             val= get_ue_golomb_31(&s->gb);
959                             if(val >= h->ref_count[list]){
960                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
961                                 return -1;
962                             }
963                         }
964                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
965                     }
966             }
967             for(list=0; list<h->list_count; list++){
968                 if(IS_DIR(mb_type, 0, list)){
969                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
970                     mx += get_se_golomb(&s->gb);
971                     my += get_se_golomb(&s->gb);
972                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
973
974                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
975                 }
976             }
977         }
978         else if(IS_16X8(mb_type)){
979             for(list=0; list<h->list_count; list++){
980                     for(i=0; i<2; i++){
981                         unsigned int val;
982                         if(IS_DIR(mb_type, i, list)){
983                             if(h->ref_count[list] == 1){
984                                 val= 0;
985                             }else if(h->ref_count[list] == 2){
986                                 val= get_bits1(&s->gb)^1;
987                             }else{
988                                 val= get_ue_golomb_31(&s->gb);
989                                 if(val >= h->ref_count[list]){
990                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
991                                     return -1;
992                                 }
993                             }
994                         }else
995                             val= LIST_NOT_USED&0xFF;
996                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
997                     }
998             }
999             for(list=0; list<h->list_count; list++){
1000                 for(i=0; i<2; i++){
1001                     unsigned int val;
1002                     if(IS_DIR(mb_type, i, list)){
1003                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1004                         mx += get_se_golomb(&s->gb);
1005                         my += get_se_golomb(&s->gb);
1006                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1007
1008                         val= pack16to32(mx,my);
1009                     }else
1010                         val=0;
1011                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1012                 }
1013             }
1014         }else{
1015             assert(IS_8X16(mb_type));
1016             for(list=0; list<h->list_count; list++){
1017                     for(i=0; i<2; i++){
1018                         unsigned int val;
1019                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1020                             if(h->ref_count[list]==1){
1021                                 val= 0;
1022                             }else if(h->ref_count[list]==2){
1023                                 val= get_bits1(&s->gb)^1;
1024                             }else{
1025                                 val= get_ue_golomb_31(&s->gb);
1026                                 if(val >= h->ref_count[list]){
1027                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1028                                     return -1;
1029                                 }
1030                             }
1031                         }else
1032                             val= LIST_NOT_USED&0xFF;
1033                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1034                     }
1035             }
1036             for(list=0; list<h->list_count; list++){
1037                 for(i=0; i<2; i++){
1038                     unsigned int val;
1039                     if(IS_DIR(mb_type, i, list)){
1040                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1041                         mx += get_se_golomb(&s->gb);
1042                         my += get_se_golomb(&s->gb);
1043                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1044
1045                         val= pack16to32(mx,my);
1046                     }else
1047                         val=0;
1048                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1049                 }
1050             }
1051         }
1052     }
1053
1054     if(IS_INTER(mb_type))
1055         write_back_motion(h, mb_type);
1056
1057     if(!IS_INTRA16x16(mb_type)){
1058         cbp= get_ue_golomb(&s->gb);
1059
1060         if(decode_chroma){
1061             if(cbp > 47){
1062                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1063                 return -1;
1064             }
1065             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1066             else                     cbp= golomb_to_inter_cbp   [cbp];
1067         }else{
1068             if(cbp > 15){
1069                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1070                 return -1;
1071             }
1072             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1073             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1074         }
1075     }
1076
1077     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1078         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1079     }
1080     h->cbp=
1081     h->cbp_table[mb_xy]= cbp;
1082     s->current_picture.f.mb_type[mb_xy] = mb_type;
1083
1084     if(cbp || IS_INTRA16x16(mb_type)){
1085         int i4x4, i8x8, chroma_idx;
1086         int dquant;
1087         int ret;
1088         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1089         const uint8_t *scan, *scan8x8;
1090         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1091
1092         if(IS_INTERLACED(mb_type)){
1093             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1094             scan= s->qscale ? h->field_scan : h->field_scan_q0;
1095         }else{
1096             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1097             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1098         }
1099
1100         dquant= get_se_golomb(&s->gb);
1101
1102         s->qscale += dquant;
1103
1104         if(((unsigned)s->qscale) > max_qp){
1105             if(s->qscale<0) s->qscale+= max_qp+1;
1106             else            s->qscale-= max_qp+1;
1107             if(((unsigned)s->qscale) > max_qp){
1108                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1109                 return -1;
1110             }
1111         }
1112
1113         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1114         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1115
1116         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1117             return -1;
1118         }
1119         h->cbp_table[mb_xy] |= ret << 12;
1120         if(CHROMA444){
1121             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1122                 return -1;
1123             }
1124             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1125                 return -1;
1126             }
1127         } else {
1128             const int num_c8x8 = h->sps.chroma_format_idc;
1129
1130             if(cbp&0x30){
1131                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1132                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1133                                         CHROMA_DC_BLOCK_INDEX+chroma_idx,
1134                                         CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
1135                                         NULL, 4*num_c8x8) < 0) {
1136                         return -1;
1137                     }
1138             }
1139
1140             if(cbp&0x20){
1141                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1142                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1143                     DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1144                     for (i8x8=0; i8x8<num_c8x8; i8x8++) {
1145                         for (i4x4=0; i4x4<4; i4x4++) {
1146                             const int index= 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1147                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1148                                 return -1;
1149                             mb += 16<<pixel_shift;
1150                         }
1151                     }
1152                 }
1153             }else{
1154                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1155                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1156             }
1157         }
1158     }else{
1159         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1160         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1161         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1162     }
1163     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1164     write_back_non_zero_count(h);
1165
1166     if(MB_MBAFF){
1167         h->ref_count[0] >>= 1;
1168         h->ref_count[1] >>= 1;
1169     }
1170
1171     return 0;
1172 }