]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
avcodec/vc1: Simplify code setting and using extend_x/y
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC(h) 0
29 #define UNCHECKED_BITSTREAM_READER 1
30
31 #include "internal.h"
32 #include "avcodec.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37 #include "mpegutils.h"
38 #include "libavutil/avassert.h"
39
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282
283 /**
284  * Get the predicted number of non-zero coefficients.
285  * @param n block index
286  */
287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292
293     if(i<64) i= (i+1)>>1;
294
295     tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296
297     return i&31;
298 }
299
300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length;
302     unsigned int i;
303
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307
308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309                 int level_code = (prefix << suffix_length) +
310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311                 int mask = -(level_code&1);
312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313                 cavlc_level_tab[suffix_length][i][0]= level_code;
314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318             }else{
319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321             }
322         }
323     }
324 }
325
326 av_cold void ff_h264_decode_init_vlc(void){
327     static int done = 0;
328
329     if (!done) {
330         int i;
331         int offset;
332         done = 1;
333
334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337                  &chroma_dc_coeff_token_len [0], 1, 1,
338                  &chroma_dc_coeff_token_bits[0], 1, 1,
339                  INIT_VLC_USE_NEW_STATIC);
340
341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344                  &chroma422_dc_coeff_token_len [0], 1, 1,
345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
346                  INIT_VLC_USE_NEW_STATIC);
347
348         offset = 0;
349         for(i=0; i<4; i++){
350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353                      &coeff_token_len [i][0], 1, 1,
354                      &coeff_token_bits[i][0], 1, 1,
355                      INIT_VLC_USE_NEW_STATIC);
356             offset += coeff_token_vlc_tables_size[i];
357         }
358         /*
359          * This is a one time safety check to make sure that
360          * the packed static coeff_token_vlc table sizes
361          * were initialized correctly.
362          */
363         av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364
365         for(i=0; i<3; i++){
366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368             init_vlc(&chroma_dc_total_zeros_vlc[i],
369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
372                      INIT_VLC_USE_NEW_STATIC);
373         }
374
375         for(i=0; i<7; i++){
376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382                      INIT_VLC_USE_NEW_STATIC);
383         }
384
385         for(i=0; i<15; i++){
386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388             init_vlc(&total_zeros_vlc[i],
389                      TOTAL_ZEROS_VLC_BITS, 16,
390                      &total_zeros_len [i][0], 1, 1,
391                      &total_zeros_bits[i][0], 1, 1,
392                      INIT_VLC_USE_NEW_STATIC);
393         }
394
395         for(i=0; i<6; i++){
396             run_vlc[i].table = run_vlc_tables[i];
397             run_vlc[i].table_allocated = run_vlc_tables_size;
398             init_vlc(&run_vlc[i],
399                      RUN_VLC_BITS, 7,
400                      &run_len [i][0], 1, 1,
401                      &run_bits[i][0], 1, 1,
402                      INIT_VLC_USE_NEW_STATIC);
403         }
404         run7_vlc.table = run7_vlc_table,
405         run7_vlc.table_allocated = run7_vlc_table_size;
406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407                  &run_len [6][0], 1, 1,
408                  &run_bits[6][0], 1, 1,
409                  INIT_VLC_USE_NEW_STATIC);
410
411         init_cavlc_level_tab();
412     }
413 }
414
415 /**
416  *
417  */
418 static inline int get_level_prefix(GetBitContext *gb){
419     unsigned int buf;
420     int log;
421
422     OPEN_READER(re, gb);
423     UPDATE_CACHE(re, gb);
424     buf=GET_CACHE(re, gb);
425
426     log= 32 - av_log2(buf);
427 #ifdef TRACE
428     print_bin(buf>>(32-log), log);
429     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431
432     LAST_SKIP_BITS(re, gb, log);
433     CLOSE_READER(re, gb);
434
435     return log-1;
436 }
437
438 /**
439  * Decode a residual block.
440  * @param n block index
441  * @param scantable scantable
442  * @param max_coeff number of coefficients in the block
443  * @return <0 if an error occurred
444  */
445 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
447     int level[16];
448     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
449
450     //FIXME put trailing_onex into the context
451
452     if(max_coeff <= 8){
453         if (max_coeff == 4)
454             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
455         else
456             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
457         total_coeff= coeff_token>>2;
458     }else{
459         if(n >= LUMA_DC_BLOCK_INDEX){
460             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
461             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
462             total_coeff= coeff_token>>2;
463         }else{
464             total_coeff= pred_non_zero_count(h, n);
465             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466             total_coeff= coeff_token>>2;
467         }
468     }
469     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
470
471     //FIXME set last_non_zero?
472
473     if(total_coeff==0)
474         return 0;
475     if(total_coeff > (unsigned)max_coeff) {
476         av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
477         return -1;
478     }
479
480     trailing_ones= coeff_token&3;
481     tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
482     av_assert2(total_coeff<=16);
483
484     i = show_bits(gb, 3);
485     skip_bits(gb, trailing_ones);
486     level[0] = 1-((i&4)>>1);
487     level[1] = 1-((i&2)   );
488     level[2] = 1-((i&1)<<1);
489
490     if(trailing_ones<total_coeff) {
491         int mask, prefix;
492         int suffix_length = total_coeff > 10 & trailing_ones < 3;
493         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
494         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
495
496         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
497         if(level_code >= 100){
498             prefix= level_code - 100;
499             if(prefix == LEVEL_TAB_BITS)
500                 prefix += get_level_prefix(gb);
501
502             //first coefficient has suffix_length equal to 0 or 1
503             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
504                 if(suffix_length)
505                     level_code= (prefix<<1) + get_bits1(gb); //part
506                 else
507                     level_code= prefix; //part
508             }else if(prefix==14){
509                 if(suffix_length)
510                     level_code= (prefix<<1) + get_bits1(gb); //part
511                 else
512                     level_code= prefix + get_bits(gb, 4); //part
513             }else{
514                 level_code= 30;
515                 if(prefix>=16){
516                     if(prefix > 25+3){
517                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
518                         return -1;
519                     }
520                     level_code += (1<<(prefix-3))-4096;
521                 }
522                 level_code += get_bits(gb, prefix-3); //part
523             }
524
525             if(trailing_ones < 3) level_code += 2;
526
527             suffix_length = 2;
528             mask= -(level_code&1);
529             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530         }else{
531             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532
533             suffix_length = 1 + (level_code + 3U > 6U);
534             level[trailing_ones]= level_code;
535         }
536
537         //remaining coefficients have suffix_length > 0
538         for(i=trailing_ones+1;i<total_coeff;i++) {
539             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541             level_code= cavlc_level_tab[suffix_length][bitsi][0];
542
543             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544             if(level_code >= 100){
545                 prefix= level_code - 100;
546                 if(prefix == LEVEL_TAB_BITS){
547                     prefix += get_level_prefix(gb);
548                 }
549                 if(prefix<15){
550                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551                 }else{
552                     level_code = 15<<suffix_length;
553                     if (prefix>=16) {
554                         if(prefix > 25+3){
555                             av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
556                             return AVERROR_INVALIDDATA;
557                         }
558                         level_code += (1<<(prefix-3))-4096;
559                     }
560                     level_code += get_bits(gb, prefix-3);
561                 }
562                 mask= -(level_code&1);
563                 level_code= (((2+level_code)>>1) ^ mask) - mask;
564             }
565             level[i]= level_code;
566             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
567         }
568     }
569
570     if(total_coeff == max_coeff)
571         zeros_left=0;
572     else{
573         if (max_coeff <= 8) {
574             if (max_coeff == 4)
575                 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
576                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
577             else
578                 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
579                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
580         } else {
581             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
582         }
583     }
584
585 #define STORE_BLOCK(type) \
586     scantable += zeros_left + total_coeff - 1; \
587     if(n >= LUMA_DC_BLOCK_INDEX){ \
588         ((type*)block)[*scantable] = level[0]; \
589         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
590             if(zeros_left < 7) \
591                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
592             else \
593                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
594             zeros_left -= run_before; \
595             scantable -= 1 + run_before; \
596             ((type*)block)[*scantable]= level[i]; \
597         } \
598         for(;i<total_coeff;i++) { \
599             scantable--; \
600             ((type*)block)[*scantable]= level[i]; \
601         } \
602     }else{ \
603         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
604         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
605             if(zeros_left < 7) \
606                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
607             else \
608                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
609             zeros_left -= run_before; \
610             scantable -= 1 + run_before; \
611             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
612         } \
613         for(;i<total_coeff;i++) { \
614             scantable--; \
615             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
616         } \
617     }
618
619     if (h->pixel_shift) {
620         STORE_BLOCK(int32_t)
621     } else {
622         STORE_BLOCK(int16_t)
623     }
624
625     if(zeros_left<0){
626         av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
627         return -1;
628     }
629
630     return 0;
631 }
632
633 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
634     int i4x4, i8x8;
635     int qscale = p == 0 ? h->qscale : h->chroma_qp[p-1];
636     if(IS_INTRA16x16(mb_type)){
637         AV_ZERO128(h->mb_luma_dc[p]+0);
638         AV_ZERO128(h->mb_luma_dc[p]+8);
639         AV_ZERO128(h->mb_luma_dc[p]+16);
640         AV_ZERO128(h->mb_luma_dc[p]+24);
641         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
642             return -1; //FIXME continue if partitioned and other return -1 too
643         }
644
645         av_assert2((cbp&15) == 0 || (cbp&15) == 15);
646
647         if(cbp&15){
648             for(i8x8=0; i8x8<4; i8x8++){
649                 for(i4x4=0; i4x4<4; i4x4++){
650                     const int index= i4x4 + 4*i8x8 + p*16;
651                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
652                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
653                         return -1;
654                     }
655                 }
656             }
657             return 0xf;
658         }else{
659             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
660             return 0;
661         }
662     }else{
663         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
664         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
665         int new_cbp = 0;
666         for(i8x8=0; i8x8<4; i8x8++){
667             if(cbp & (1<<i8x8)){
668                 if(IS_8x8DCT(mb_type)){
669                     int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
670                     uint8_t *nnz;
671                     for(i4x4=0; i4x4<4; i4x4++){
672                         const int index= i4x4 + 4*i8x8 + p*16;
673                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
674                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
675                             return -1;
676                     }
677                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
678                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
679                     new_cbp |= !!nnz[0] << i8x8;
680                 }else{
681                     for(i4x4=0; i4x4<4; i4x4++){
682                         const int index= i4x4 + 4*i8x8 + p*16;
683                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
684                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
685                             return -1;
686                         }
687                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
688                     }
689                 }
690             }else{
691                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
692                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
693             }
694         }
695         return new_cbp;
696     }
697 }
698
699 int ff_h264_decode_mb_cavlc(H264Context *h){
700     int mb_xy;
701     int partition_count;
702     unsigned int mb_type, cbp;
703     int dct8x8_allowed= h->pps.transform_8x8_mode;
704     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
705     const int pixel_shift = h->pixel_shift;
706     unsigned local_ref_count[2];
707
708     mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
709
710     tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
711     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
712                 down the code */
713     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
714         if(h->mb_skip_run==-1)
715             h->mb_skip_run= get_ue_golomb_long(&h->gb);
716
717         if (h->mb_skip_run--) {
718             if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
719                 if(h->mb_skip_run==0)
720                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
721             }
722             decode_mb_skip(h);
723             return 0;
724         }
725     }
726     if (FRAME_MBAFF(h)) {
727         if( (h->mb_y&1) == 0 )
728             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
729     }
730
731     h->prev_mb_skipped= 0;
732
733     mb_type= get_ue_golomb(&h->gb);
734     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
735         if(mb_type < 23){
736             partition_count= b_mb_type_info[mb_type].partition_count;
737             mb_type=         b_mb_type_info[mb_type].type;
738         }else{
739             mb_type -= 23;
740             goto decode_intra_mb;
741         }
742     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
743         if(mb_type < 5){
744             partition_count= p_mb_type_info[mb_type].partition_count;
745             mb_type=         p_mb_type_info[mb_type].type;
746         }else{
747             mb_type -= 5;
748             goto decode_intra_mb;
749         }
750     }else{
751        av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_I);
752         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
753             mb_type--;
754 decode_intra_mb:
755         if(mb_type > 25){
756             av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
757             return -1;
758         }
759         partition_count=0;
760         cbp= i_mb_type_info[mb_type].cbp;
761         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
762         mb_type= i_mb_type_info[mb_type].type;
763     }
764
765     if(MB_FIELD(h))
766         mb_type |= MB_TYPE_INTERLACED;
767
768     h->slice_table[ mb_xy ]= h->slice_num;
769
770     if(IS_INTRA_PCM(mb_type)){
771         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
772                             h->sps.bit_depth_luma;
773
774         // We assume these blocks are very rare so we do not optimize it.
775         h->intra_pcm_ptr = align_get_bits(&h->gb);
776         if (get_bits_left(&h->gb) < mb_size) {
777             av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
778             return AVERROR_INVALIDDATA;
779         }
780         skip_bits_long(&h->gb, mb_size);
781
782         // In deblocking, the quantizer is 0
783         h->cur_pic.qscale_table[mb_xy] = 0;
784         // All coeffs are present
785         memset(h->non_zero_count[mb_xy], 16, 48);
786
787         h->cur_pic.mb_type[mb_xy] = mb_type;
788         return 0;
789     }
790
791     local_ref_count[0] = h->ref_count[0] << MB_MBAFF(h);
792     local_ref_count[1] = h->ref_count[1] << MB_MBAFF(h);
793
794     fill_decode_neighbors(h, mb_type);
795     fill_decode_caches(h, mb_type);
796
797     //mb_pred
798     if(IS_INTRA(mb_type)){
799         int pred_mode;
800 //            init_top_left_availability(h);
801         if(IS_INTRA4x4(mb_type)){
802             int i;
803             int di = 1;
804             if(dct8x8_allowed && get_bits1(&h->gb)){
805                 mb_type |= MB_TYPE_8x8DCT;
806                 di = 4;
807             }
808
809 //                fill_intra4x4_pred_table(h);
810             for(i=0; i<16; i+=di){
811                 int mode= pred_intra_mode(h, i);
812
813                 if(!get_bits1(&h->gb)){
814                     const int rem_mode= get_bits(&h->gb, 3);
815                     mode = rem_mode + (rem_mode >= mode);
816                 }
817
818                 if(di==4)
819                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
820                 else
821                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
822             }
823             write_back_intra_pred_mode(h);
824             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
825                 return -1;
826         }else{
827             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
828             if(h->intra16x16_pred_mode < 0)
829                 return -1;
830         }
831         if(decode_chroma){
832             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
833             if(pred_mode < 0)
834                 return -1;
835             h->chroma_pred_mode= pred_mode;
836         } else {
837             h->chroma_pred_mode = DC_128_PRED8x8;
838         }
839     }else if(partition_count==4){
840         int i, j, sub_partition_count[4], list, ref[2][4];
841
842         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
843             for(i=0; i<4; i++){
844                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
845                 if(h->sub_mb_type[i] >=13){
846                     av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
847                     return -1;
848                 }
849                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
850                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
851             }
852             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
853                 ff_h264_pred_direct_motion(h, &mb_type);
854                 h->ref_cache[0][scan8[4]] =
855                 h->ref_cache[1][scan8[4]] =
856                 h->ref_cache[0][scan8[12]] =
857                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
858             }
859         }else{
860             av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
861             for(i=0; i<4; i++){
862                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
863                 if(h->sub_mb_type[i] >=4){
864                     av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
865                     return -1;
866                 }
867                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
868                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
869             }
870         }
871
872         for(list=0; list<h->list_count; list++){
873             int ref_count = IS_REF0(mb_type) ? 1 : local_ref_count[list];
874             for(i=0; i<4; i++){
875                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
876                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
877                     unsigned int tmp;
878                     if(ref_count == 1){
879                         tmp= 0;
880                     }else if(ref_count == 2){
881                         tmp= get_bits1(&h->gb)^1;
882                     }else{
883                         tmp= get_ue_golomb_31(&h->gb);
884                         if(tmp>=ref_count){
885                             av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
886                             return -1;
887                         }
888                     }
889                     ref[list][i]= tmp;
890                 }else{
891                  //FIXME
892                     ref[list][i] = -1;
893                 }
894             }
895         }
896
897         if(dct8x8_allowed)
898             dct8x8_allowed = get_dct8x8_allowed(h);
899
900         for(list=0; list<h->list_count; list++){
901             for(i=0; i<4; i++){
902                 if(IS_DIRECT(h->sub_mb_type[i])) {
903                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
904                     continue;
905                 }
906                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
907                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
908
909                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
910                     const int sub_mb_type= h->sub_mb_type[i];
911                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
912                     for(j=0; j<sub_partition_count[i]; j++){
913                         int mx, my;
914                         const int index= 4*i + block_width*j;
915                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
916                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
917                         mx += get_se_golomb(&h->gb);
918                         my += get_se_golomb(&h->gb);
919                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
920
921                         if(IS_SUB_8X8(sub_mb_type)){
922                             mv_cache[ 1 ][0]=
923                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
924                             mv_cache[ 1 ][1]=
925                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
926                         }else if(IS_SUB_8X4(sub_mb_type)){
927                             mv_cache[ 1 ][0]= mx;
928                             mv_cache[ 1 ][1]= my;
929                         }else if(IS_SUB_4X8(sub_mb_type)){
930                             mv_cache[ 8 ][0]= mx;
931                             mv_cache[ 8 ][1]= my;
932                         }
933                         mv_cache[ 0 ][0]= mx;
934                         mv_cache[ 0 ][1]= my;
935                     }
936                 }else{
937                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
938                     p[0] = p[1]=
939                     p[8] = p[9]= 0;
940                 }
941             }
942         }
943     }else if(IS_DIRECT(mb_type)){
944         ff_h264_pred_direct_motion(h, &mb_type);
945         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
946     }else{
947         int list, mx, my, i;
948          //FIXME we should set ref_idx_l? to 0 if we use that later ...
949         if(IS_16X16(mb_type)){
950             for(list=0; list<h->list_count; list++){
951                     unsigned int val;
952                     if(IS_DIR(mb_type, 0, list)){
953                         if(local_ref_count[list]==1){
954                             val= 0;
955                         } else if(local_ref_count[list]==2){
956                             val= get_bits1(&h->gb)^1;
957                         }else{
958                             val= get_ue_golomb_31(&h->gb);
959                             if (val >= local_ref_count[list]){
960                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
961                                 return -1;
962                             }
963                         }
964                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
965                     }
966             }
967             for(list=0; list<h->list_count; list++){
968                 if(IS_DIR(mb_type, 0, list)){
969                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
970                     mx += get_se_golomb(&h->gb);
971                     my += get_se_golomb(&h->gb);
972                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
973
974                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
975                 }
976             }
977         }
978         else if(IS_16X8(mb_type)){
979             for(list=0; list<h->list_count; list++){
980                     for(i=0; i<2; i++){
981                         unsigned int val;
982                         if(IS_DIR(mb_type, i, list)){
983                             if(local_ref_count[list] == 1) {
984                                 val= 0;
985                             } else if(local_ref_count[list] == 2) {
986                                 val= get_bits1(&h->gb)^1;
987                             }else{
988                                 val= get_ue_golomb_31(&h->gb);
989                                 if (val >= local_ref_count[list]){
990                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
991                                     return -1;
992                                 }
993                             }
994                         }else
995                             val= LIST_NOT_USED&0xFF;
996                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
997                     }
998             }
999             for(list=0; list<h->list_count; list++){
1000                 for(i=0; i<2; i++){
1001                     unsigned int val;
1002                     if(IS_DIR(mb_type, i, list)){
1003                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1004                         mx += get_se_golomb(&h->gb);
1005                         my += get_se_golomb(&h->gb);
1006                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1007
1008                         val= pack16to32(mx,my);
1009                     }else
1010                         val=0;
1011                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1012                 }
1013             }
1014         }else{
1015             av_assert2(IS_8X16(mb_type));
1016             for(list=0; list<h->list_count; list++){
1017                     for(i=0; i<2; i++){
1018                         unsigned int val;
1019                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1020                             if(local_ref_count[list]==1){
1021                                 val= 0;
1022                             } else if(local_ref_count[list]==2){
1023                                 val= get_bits1(&h->gb)^1;
1024                             }else{
1025                                 val= get_ue_golomb_31(&h->gb);
1026                                 if (val >= local_ref_count[list]){
1027                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1028                                     return -1;
1029                                 }
1030                             }
1031                         }else
1032                             val= LIST_NOT_USED&0xFF;
1033                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1034                     }
1035             }
1036             for(list=0; list<h->list_count; list++){
1037                 for(i=0; i<2; i++){
1038                     unsigned int val;
1039                     if(IS_DIR(mb_type, i, list)){
1040                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1041                         mx += get_se_golomb(&h->gb);
1042                         my += get_se_golomb(&h->gb);
1043                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1044
1045                         val= pack16to32(mx,my);
1046                     }else
1047                         val=0;
1048                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1049                 }
1050             }
1051         }
1052     }
1053
1054     if(IS_INTER(mb_type))
1055         write_back_motion(h, mb_type);
1056
1057     if(!IS_INTRA16x16(mb_type)){
1058         cbp= get_ue_golomb(&h->gb);
1059
1060         if(decode_chroma){
1061             if(cbp > 47){
1062                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1063                 return -1;
1064             }
1065             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1066             else                     cbp= golomb_to_inter_cbp   [cbp];
1067         }else{
1068             if(cbp > 15){
1069                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1070                 return -1;
1071             }
1072             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1073             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1074         }
1075     } else {
1076         if (!decode_chroma && cbp>15) {
1077             av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1078             return AVERROR_INVALIDDATA;
1079         }
1080     }
1081
1082     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1083         mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1084     }
1085     h->cbp=
1086     h->cbp_table[mb_xy]= cbp;
1087     h->cur_pic.mb_type[mb_xy] = mb_type;
1088
1089     if(cbp || IS_INTRA16x16(mb_type)){
1090         int i4x4, i8x8, chroma_idx;
1091         int dquant;
1092         int ret;
1093         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1094         const uint8_t *scan, *scan8x8;
1095         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1096
1097         if(IS_INTERLACED(mb_type)){
1098             scan8x8= h->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1099             scan= h->qscale ? h->field_scan : h->field_scan_q0;
1100         }else{
1101             scan8x8= h->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1102             scan= h->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1103         }
1104
1105         dquant= get_se_golomb(&h->gb);
1106
1107         h->qscale += dquant;
1108
1109         if(((unsigned)h->qscale) > max_qp){
1110             if(h->qscale<0) h->qscale+= max_qp+1;
1111             else            h->qscale-= max_qp+1;
1112             if(((unsigned)h->qscale) > max_qp){
1113                 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1114                 return -1;
1115             }
1116         }
1117
1118         h->chroma_qp[0]= get_chroma_qp(h, 0, h->qscale);
1119         h->chroma_qp[1]= get_chroma_qp(h, 1, h->qscale);
1120
1121         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1122             return -1;
1123         }
1124         h->cbp_table[mb_xy] |= ret << 12;
1125         if (CHROMA444(h)) {
1126             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1127                 return -1;
1128             }
1129             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1130                 return -1;
1131             }
1132         } else {
1133             const int num_c8x8 = h->sps.chroma_format_idc;
1134
1135             if(cbp&0x30){
1136                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1137                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1138                                         CHROMA_DC_BLOCK_INDEX+chroma_idx,
1139                                         CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
1140                                         NULL, 4*num_c8x8) < 0) {
1141                         return -1;
1142                     }
1143             }
1144
1145             if(cbp&0x20){
1146                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1147                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1148                     int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1149                     for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1150                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
1151                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1152                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1153                                 return -1;
1154                             mb += 16 << pixel_shift;
1155                         }
1156                     }
1157                 }
1158             }else{
1159                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1160                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1161             }
1162         }
1163     }else{
1164         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1165         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1166         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1167     }
1168     h->cur_pic.qscale_table[mb_xy] = h->qscale;
1169     write_back_non_zero_count(h);
1170
1171     return 0;
1172 }