]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
ra288dec: set channel layout
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282
283 /**
284  * Get the predicted number of non-zero coefficients.
285  * @param n block index
286  */
287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292
293     if(i<64) i= (i+1)>>1;
294
295     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296
297     return i&31;
298 }
299
300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length;
302     unsigned int i;
303
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307
308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309                 int level_code = (prefix << suffix_length) +
310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311                 int mask = -(level_code&1);
312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313                 cavlc_level_tab[suffix_length][i][0]= level_code;
314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318             }else{
319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321             }
322         }
323     }
324 }
325
326 av_cold void ff_h264_decode_init_vlc(void){
327     static int done = 0;
328
329     if (!done) {
330         int i;
331         int offset;
332         done = 1;
333
334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337                  &chroma_dc_coeff_token_len [0], 1, 1,
338                  &chroma_dc_coeff_token_bits[0], 1, 1,
339                  INIT_VLC_USE_NEW_STATIC);
340
341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344                  &chroma422_dc_coeff_token_len [0], 1, 1,
345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
346                  INIT_VLC_USE_NEW_STATIC);
347
348         offset = 0;
349         for(i=0; i<4; i++){
350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353                      &coeff_token_len [i][0], 1, 1,
354                      &coeff_token_bits[i][0], 1, 1,
355                      INIT_VLC_USE_NEW_STATIC);
356             offset += coeff_token_vlc_tables_size[i];
357         }
358         /*
359          * This is a one time safety check to make sure that
360          * the packed static coeff_token_vlc table sizes
361          * were initialized correctly.
362          */
363         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364
365         for(i=0; i<3; i++){
366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368             init_vlc(&chroma_dc_total_zeros_vlc[i],
369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
372                      INIT_VLC_USE_NEW_STATIC);
373         }
374
375         for(i=0; i<7; i++){
376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382                      INIT_VLC_USE_NEW_STATIC);
383         }
384
385         for(i=0; i<15; i++){
386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388             init_vlc(&total_zeros_vlc[i],
389                      TOTAL_ZEROS_VLC_BITS, 16,
390                      &total_zeros_len [i][0], 1, 1,
391                      &total_zeros_bits[i][0], 1, 1,
392                      INIT_VLC_USE_NEW_STATIC);
393         }
394
395         for(i=0; i<6; i++){
396             run_vlc[i].table = run_vlc_tables[i];
397             run_vlc[i].table_allocated = run_vlc_tables_size;
398             init_vlc(&run_vlc[i],
399                      RUN_VLC_BITS, 7,
400                      &run_len [i][0], 1, 1,
401                      &run_bits[i][0], 1, 1,
402                      INIT_VLC_USE_NEW_STATIC);
403         }
404         run7_vlc.table = run7_vlc_table,
405         run7_vlc.table_allocated = run7_vlc_table_size;
406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407                  &run_len [6][0], 1, 1,
408                  &run_bits[6][0], 1, 1,
409                  INIT_VLC_USE_NEW_STATIC);
410
411         init_cavlc_level_tab();
412     }
413 }
414
415 /**
416  *
417  */
418 static inline int get_level_prefix(GetBitContext *gb){
419     unsigned int buf;
420     int log;
421
422     OPEN_READER(re, gb);
423     UPDATE_CACHE(re, gb);
424     buf=GET_CACHE(re, gb);
425
426     log= 32 - av_log2(buf);
427 #ifdef TRACE
428     print_bin(buf>>(32-log), log);
429     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431
432     LAST_SKIP_BITS(re, gb, log);
433     CLOSE_READER(re, gb);
434
435     return log-1;
436 }
437
438 /**
439  * Decode a residual block.
440  * @param n block index
441  * @param scantable scantable
442  * @param max_coeff number of coefficients in the block
443  * @return <0 if an error occurred
444  */
445 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446     MpegEncContext * const s = &h->s;
447     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448     int level[16];
449     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
450
451     //FIXME put trailing_onex into the context
452
453     if(max_coeff <= 8){
454         if (max_coeff == 4)
455             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
456         else
457             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
458         total_coeff= coeff_token>>2;
459     }else{
460         if(n >= LUMA_DC_BLOCK_INDEX){
461             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
462             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
463             total_coeff= coeff_token>>2;
464         }else{
465             total_coeff= pred_non_zero_count(h, n);
466             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467             total_coeff= coeff_token>>2;
468         }
469     }
470     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
471
472     //FIXME set last_non_zero?
473
474     if(total_coeff==0)
475         return 0;
476     if(total_coeff > (unsigned)max_coeff) {
477         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
478         return -1;
479     }
480
481     trailing_ones= coeff_token&3;
482     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
483     assert(total_coeff<=16);
484
485     i = show_bits(gb, 3);
486     skip_bits(gb, trailing_ones);
487     level[0] = 1-((i&4)>>1);
488     level[1] = 1-((i&2)   );
489     level[2] = 1-((i&1)<<1);
490
491     if(trailing_ones<total_coeff) {
492         int mask, prefix;
493         int suffix_length = total_coeff > 10 & trailing_ones < 3;
494         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
495         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
496
497         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
498         if(level_code >= 100){
499             prefix= level_code - 100;
500             if(prefix == LEVEL_TAB_BITS)
501                 prefix += get_level_prefix(gb);
502
503             //first coefficient has suffix_length equal to 0 or 1
504             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
505                 if(suffix_length)
506                     level_code= (prefix<<1) + get_bits1(gb); //part
507                 else
508                     level_code= prefix; //part
509             }else if(prefix==14){
510                 if(suffix_length)
511                     level_code= (prefix<<1) + get_bits1(gb); //part
512                 else
513                     level_code= prefix + get_bits(gb, 4); //part
514             }else{
515                 level_code= 30 + get_bits(gb, prefix-3); //part
516                 if(prefix>=16){
517                     if(prefix > 25+3){
518                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
519                         return -1;
520                     }
521                     level_code += (1<<(prefix-3))-4096;
522                 }
523             }
524
525             if(trailing_ones < 3) level_code += 2;
526
527             suffix_length = 2;
528             mask= -(level_code&1);
529             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530         }else{
531             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532
533             suffix_length = 1 + (level_code + 3U > 6U);
534             level[trailing_ones]= level_code;
535         }
536
537         //remaining coefficients have suffix_length > 0
538         for(i=trailing_ones+1;i<total_coeff;i++) {
539             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541             level_code= cavlc_level_tab[suffix_length][bitsi][0];
542
543             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544             if(level_code >= 100){
545                 prefix= level_code - 100;
546                 if(prefix == LEVEL_TAB_BITS){
547                     prefix += get_level_prefix(gb);
548                 }
549                 if(prefix<15){
550                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551                 }else{
552                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
553                     if(prefix>=16)
554                         level_code += (1<<(prefix-3))-4096;
555                 }
556                 mask= -(level_code&1);
557                 level_code= (((2+level_code)>>1) ^ mask) - mask;
558             }
559             level[i]= level_code;
560             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
561         }
562     }
563
564     if(total_coeff == max_coeff)
565         zeros_left=0;
566     else{
567         if (max_coeff <= 8) {
568             if (max_coeff == 4)
569                 zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
570                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
571             else
572                 zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
573                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
574         } else {
575             zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
576         }
577     }
578
579 #define STORE_BLOCK(type) \
580     scantable += zeros_left + total_coeff - 1; \
581     if(n >= LUMA_DC_BLOCK_INDEX){ \
582         ((type*)block)[*scantable] = level[0]; \
583         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
584             if(zeros_left < 7) \
585                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
586             else \
587                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
588             zeros_left -= run_before; \
589             scantable -= 1 + run_before; \
590             ((type*)block)[*scantable]= level[i]; \
591         } \
592         for(;i<total_coeff;i++) { \
593             scantable--; \
594             ((type*)block)[*scantable]= level[i]; \
595         } \
596     }else{ \
597         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
598         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
599             if(zeros_left < 7) \
600                 run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
601             else \
602                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
603             zeros_left -= run_before; \
604             scantable -= 1 + run_before; \
605             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
606         } \
607         for(;i<total_coeff;i++) { \
608             scantable--; \
609             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
610         } \
611     }
612
613     if (h->pixel_shift) {
614         STORE_BLOCK(int32_t)
615     } else {
616         STORE_BLOCK(int16_t)
617     }
618
619     if(zeros_left<0){
620         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
621         return -1;
622     }
623
624     return 0;
625 }
626
627 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
628     int i4x4, i8x8;
629     MpegEncContext * const s = &h->s;
630     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
631     if(IS_INTRA16x16(mb_type)){
632         AV_ZERO128(h->mb_luma_dc[p]+0);
633         AV_ZERO128(h->mb_luma_dc[p]+8);
634         AV_ZERO128(h->mb_luma_dc[p]+16);
635         AV_ZERO128(h->mb_luma_dc[p]+24);
636         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
637             return -1; //FIXME continue if partitioned and other return -1 too
638         }
639
640         assert((cbp&15) == 0 || (cbp&15) == 15);
641
642         if(cbp&15){
643             for(i8x8=0; i8x8<4; i8x8++){
644                 for(i4x4=0; i4x4<4; i4x4++){
645                     const int index= i4x4 + 4*i8x8 + p*16;
646                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
647                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
648                         return -1;
649                     }
650                 }
651             }
652             return 0xf;
653         }else{
654             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
655             return 0;
656         }
657     }else{
658         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
659         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
660         int new_cbp = 0;
661         for(i8x8=0; i8x8<4; i8x8++){
662             if(cbp & (1<<i8x8)){
663                 if(IS_8x8DCT(mb_type)){
664                     DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
665                     uint8_t *nnz;
666                     for(i4x4=0; i4x4<4; i4x4++){
667                         const int index= i4x4 + 4*i8x8 + p*16;
668                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
669                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
670                             return -1;
671                     }
672                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
673                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
674                     new_cbp |= !!nnz[0] << i8x8;
675                 }else{
676                     for(i4x4=0; i4x4<4; i4x4++){
677                         const int index= i4x4 + 4*i8x8 + p*16;
678                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
679                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
680                             return -1;
681                         }
682                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
683                     }
684                 }
685             }else{
686                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
687                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
688             }
689         }
690         return new_cbp;
691     }
692 }
693
694 int ff_h264_decode_mb_cavlc(H264Context *h){
695     MpegEncContext * const s = &h->s;
696     int mb_xy;
697     int partition_count;
698     unsigned int mb_type, cbp;
699     int dct8x8_allowed= h->pps.transform_8x8_mode;
700     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
701     const int pixel_shift = h->pixel_shift;
702
703     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
704
705     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
706     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
707                 down the code */
708     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
709         if(s->mb_skip_run==-1)
710             s->mb_skip_run= get_ue_golomb(&s->gb);
711
712         if (s->mb_skip_run--) {
713             if(FRAME_MBAFF && (s->mb_y&1) == 0){
714                 if(s->mb_skip_run==0)
715                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
716             }
717             decode_mb_skip(h);
718             return 0;
719         }
720     }
721     if(FRAME_MBAFF){
722         if( (s->mb_y&1) == 0 )
723             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
724     }
725
726     h->prev_mb_skipped= 0;
727
728     mb_type= get_ue_golomb(&s->gb);
729     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
730         if(mb_type < 23){
731             partition_count= b_mb_type_info[mb_type].partition_count;
732             mb_type=         b_mb_type_info[mb_type].type;
733         }else{
734             mb_type -= 23;
735             goto decode_intra_mb;
736         }
737     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
738         if(mb_type < 5){
739             partition_count= p_mb_type_info[mb_type].partition_count;
740             mb_type=         p_mb_type_info[mb_type].type;
741         }else{
742             mb_type -= 5;
743             goto decode_intra_mb;
744         }
745     }else{
746        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
747         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
748             mb_type--;
749 decode_intra_mb:
750         if(mb_type > 25){
751             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
752             return -1;
753         }
754         partition_count=0;
755         cbp= i_mb_type_info[mb_type].cbp;
756         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
757         mb_type= i_mb_type_info[mb_type].type;
758     }
759
760     if(MB_FIELD)
761         mb_type |= MB_TYPE_INTERLACED;
762
763     h->slice_table[ mb_xy ]= h->slice_num;
764
765     if(IS_INTRA_PCM(mb_type)){
766         unsigned int x;
767         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
768                             h->sps.bit_depth_luma >> 3;
769
770         // We assume these blocks are very rare so we do not optimize it.
771         align_get_bits(&s->gb);
772
773         // The pixels are stored in the same order as levels in h->mb array.
774         for(x=0; x < mb_size; x++){
775             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
776         }
777
778         // In deblocking, the quantizer is 0
779         s->current_picture.f.qscale_table[mb_xy] = 0;
780         // All coeffs are present
781         memset(h->non_zero_count[mb_xy], 16, 48);
782
783         s->current_picture.f.mb_type[mb_xy] = mb_type;
784         return 0;
785     }
786
787     fill_decode_neighbors(h, mb_type);
788     fill_decode_caches(h, mb_type);
789
790     //mb_pred
791     if(IS_INTRA(mb_type)){
792         int pred_mode;
793 //            init_top_left_availability(h);
794         if(IS_INTRA4x4(mb_type)){
795             int i;
796             int di = 1;
797             if(dct8x8_allowed && get_bits1(&s->gb)){
798                 mb_type |= MB_TYPE_8x8DCT;
799                 di = 4;
800             }
801
802 //                fill_intra4x4_pred_table(h);
803             for(i=0; i<16; i+=di){
804                 int mode= pred_intra_mode(h, i);
805
806                 if(!get_bits1(&s->gb)){
807                     const int rem_mode= get_bits(&s->gb, 3);
808                     mode = rem_mode + (rem_mode >= mode);
809                 }
810
811                 if(di==4)
812                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
813                 else
814                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
815             }
816             write_back_intra_pred_mode(h);
817             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
818                 return -1;
819         }else{
820             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
821             if(h->intra16x16_pred_mode < 0)
822                 return -1;
823         }
824         if(decode_chroma){
825             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
826             if(pred_mode < 0)
827                 return -1;
828             h->chroma_pred_mode= pred_mode;
829         } else {
830             h->chroma_pred_mode = DC_128_PRED8x8;
831         }
832     }else if(partition_count==4){
833         int i, j, sub_partition_count[4], list, ref[2][4];
834
835         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
836             for(i=0; i<4; i++){
837                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
838                 if(h->sub_mb_type[i] >=13){
839                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
840                     return -1;
841                 }
842                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
843                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
844             }
845             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
846                 ff_h264_pred_direct_motion(h, &mb_type);
847                 h->ref_cache[0][scan8[4]] =
848                 h->ref_cache[1][scan8[4]] =
849                 h->ref_cache[0][scan8[12]] =
850                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
851             }
852         }else{
853             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
854             for(i=0; i<4; i++){
855                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
856                 if(h->sub_mb_type[i] >=4){
857                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
858                     return -1;
859                 }
860                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
861                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
862             }
863         }
864
865         for(list=0; list<h->list_count; list++){
866             int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF;
867             for(i=0; i<4; i++){
868                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
869                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
870                     unsigned int tmp;
871                     if(ref_count == 1){
872                         tmp= 0;
873                     }else if(ref_count == 2){
874                         tmp= get_bits1(&s->gb)^1;
875                     }else{
876                         tmp= get_ue_golomb_31(&s->gb);
877                         if(tmp>=ref_count){
878                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
879                             return -1;
880                         }
881                     }
882                     ref[list][i]= tmp;
883                 }else{
884                  //FIXME
885                     ref[list][i] = -1;
886                 }
887             }
888         }
889
890         if(dct8x8_allowed)
891             dct8x8_allowed = get_dct8x8_allowed(h);
892
893         for(list=0; list<h->list_count; list++){
894             for(i=0; i<4; i++){
895                 if(IS_DIRECT(h->sub_mb_type[i])) {
896                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
897                     continue;
898                 }
899                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
900                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
901
902                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
903                     const int sub_mb_type= h->sub_mb_type[i];
904                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
905                     for(j=0; j<sub_partition_count[i]; j++){
906                         int mx, my;
907                         const int index= 4*i + block_width*j;
908                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
909                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
910                         mx += get_se_golomb(&s->gb);
911                         my += get_se_golomb(&s->gb);
912                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
913
914                         if(IS_SUB_8X8(sub_mb_type)){
915                             mv_cache[ 1 ][0]=
916                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
917                             mv_cache[ 1 ][1]=
918                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
919                         }else if(IS_SUB_8X4(sub_mb_type)){
920                             mv_cache[ 1 ][0]= mx;
921                             mv_cache[ 1 ][1]= my;
922                         }else if(IS_SUB_4X8(sub_mb_type)){
923                             mv_cache[ 8 ][0]= mx;
924                             mv_cache[ 8 ][1]= my;
925                         }
926                         mv_cache[ 0 ][0]= mx;
927                         mv_cache[ 0 ][1]= my;
928                     }
929                 }else{
930                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
931                     p[0] = p[1]=
932                     p[8] = p[9]= 0;
933                 }
934             }
935         }
936     }else if(IS_DIRECT(mb_type)){
937         ff_h264_pred_direct_motion(h, &mb_type);
938         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
939     }else{
940         int list, mx, my, i;
941          //FIXME we should set ref_idx_l? to 0 if we use that later ...
942         if(IS_16X16(mb_type)){
943             for(list=0; list<h->list_count; list++){
944                     unsigned int val;
945                     if(IS_DIR(mb_type, 0, list)){
946                         int rc = h->ref_count[list] << MB_MBAFF;
947                         if (rc == 1) {
948                             val= 0;
949                         } else if (rc == 2) {
950                             val= get_bits1(&s->gb)^1;
951                         }else{
952                             val= get_ue_golomb_31(&s->gb);
953                             if (val >= rc) {
954                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
955                                 return -1;
956                             }
957                         }
958                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
959                     }
960             }
961             for(list=0; list<h->list_count; list++){
962                 if(IS_DIR(mb_type, 0, list)){
963                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
964                     mx += get_se_golomb(&s->gb);
965                     my += get_se_golomb(&s->gb);
966                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
967
968                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
969                 }
970             }
971         }
972         else if(IS_16X8(mb_type)){
973             for(list=0; list<h->list_count; list++){
974                     for(i=0; i<2; i++){
975                         unsigned int val;
976                         if(IS_DIR(mb_type, i, list)){
977                             int rc = h->ref_count[list] << MB_MBAFF;
978                             if (rc == 1) {
979                                 val= 0;
980                             } else if (rc == 2) {
981                                 val= get_bits1(&s->gb)^1;
982                             }else{
983                                 val= get_ue_golomb_31(&s->gb);
984                                 if (val >= rc) {
985                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
986                                     return -1;
987                                 }
988                             }
989                         }else
990                             val= LIST_NOT_USED&0xFF;
991                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
992                     }
993             }
994             for(list=0; list<h->list_count; list++){
995                 for(i=0; i<2; i++){
996                     unsigned int val;
997                     if(IS_DIR(mb_type, i, list)){
998                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
999                         mx += get_se_golomb(&s->gb);
1000                         my += get_se_golomb(&s->gb);
1001                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1002
1003                         val= pack16to32(mx,my);
1004                     }else
1005                         val=0;
1006                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1007                 }
1008             }
1009         }else{
1010             assert(IS_8X16(mb_type));
1011             for(list=0; list<h->list_count; list++){
1012                     for(i=0; i<2; i++){
1013                         unsigned int val;
1014                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1015                             int rc = h->ref_count[list] << MB_MBAFF;
1016                             if (rc == 1) {
1017                                 val= 0;
1018                             } else if (rc == 2) {
1019                                 val= get_bits1(&s->gb)^1;
1020                             }else{
1021                                 val= get_ue_golomb_31(&s->gb);
1022                                 if (val >= rc) {
1023                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1024                                     return -1;
1025                                 }
1026                             }
1027                         }else
1028                             val= LIST_NOT_USED&0xFF;
1029                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1030                     }
1031             }
1032             for(list=0; list<h->list_count; list++){
1033                 for(i=0; i<2; i++){
1034                     unsigned int val;
1035                     if(IS_DIR(mb_type, i, list)){
1036                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1037                         mx += get_se_golomb(&s->gb);
1038                         my += get_se_golomb(&s->gb);
1039                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1040
1041                         val= pack16to32(mx,my);
1042                     }else
1043                         val=0;
1044                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1045                 }
1046             }
1047         }
1048     }
1049
1050     if(IS_INTER(mb_type))
1051         write_back_motion(h, mb_type);
1052
1053     if(!IS_INTRA16x16(mb_type)){
1054         cbp= get_ue_golomb(&s->gb);
1055
1056         if(decode_chroma){
1057             if(cbp > 47){
1058                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1059                 return -1;
1060             }
1061             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1062             else                     cbp= golomb_to_inter_cbp   [cbp];
1063         }else{
1064             if(cbp > 15){
1065                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1066                 return -1;
1067             }
1068             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1069             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1070         }
1071     }
1072
1073     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1074         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1075     }
1076     h->cbp=
1077     h->cbp_table[mb_xy]= cbp;
1078     s->current_picture.f.mb_type[mb_xy] = mb_type;
1079
1080     if(cbp || IS_INTRA16x16(mb_type)){
1081         int i4x4, i8x8, chroma_idx;
1082         int dquant;
1083         int ret;
1084         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1085         const uint8_t *scan, *scan8x8;
1086         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1087
1088         if(IS_INTERLACED(mb_type)){
1089             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1090             scan= s->qscale ? h->field_scan : h->field_scan_q0;
1091         }else{
1092             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1093             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1094         }
1095
1096         dquant= get_se_golomb(&s->gb);
1097
1098         s->qscale += dquant;
1099
1100         if(((unsigned)s->qscale) > max_qp){
1101             if(s->qscale<0) s->qscale+= max_qp+1;
1102             else            s->qscale-= max_qp+1;
1103             if(((unsigned)s->qscale) > max_qp){
1104                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1105                 return -1;
1106             }
1107         }
1108
1109         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1110         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1111
1112         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1113             return -1;
1114         }
1115         h->cbp_table[mb_xy] |= ret << 12;
1116         if(CHROMA444){
1117             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1118                 return -1;
1119             }
1120             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1121                 return -1;
1122             }
1123         } else if (CHROMA422) {
1124             if(cbp&0x30){
1125                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1126                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1127                                         CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
1128                                         NULL, 8) < 0) {
1129                         return -1;
1130                     }
1131             }
1132
1133             if(cbp&0x20){
1134                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1135                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1136                     DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1137                     for (i8x8 = 0; i8x8 < 2; i8x8++) {
1138                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
1139                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1140                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1141                                 return -1;
1142                             mb += 16 << pixel_shift;
1143                         }
1144                     }
1145                 }
1146             }else{
1147                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1148                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1149             }
1150         } else /* yuv420 */ {
1151             if(cbp&0x30){
1152                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1153                     if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1154                         return -1;
1155                     }
1156             }
1157
1158             if(cbp&0x20){
1159                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1160                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1161                     for(i4x4=0; i4x4<4; i4x4++){
1162                         const int index= 16 + 16*chroma_idx + i4x4;
1163                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1164                             return -1;
1165                         }
1166                     }
1167                 }
1168             }else{
1169                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1170                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1171             }
1172         }
1173     }else{
1174         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1175         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1176         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1177     }
1178     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1179     write_back_non_zero_count(h);
1180
1181     return 0;
1182 }