]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
remove impossible condition from msrle_decode_pal4()
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t coeff_token_len[4][4*17]={
66 {
67      1, 0, 0, 0,
68      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
69     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
70     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
71     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
72 },
73 {
74      2, 0, 0, 0,
75      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
76      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
77     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
78     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
79 },
80 {
81      4, 0, 0, 0,
82      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
83      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
84      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
85     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
86 },
87 {
88      6, 0, 0, 0,
89      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
90      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
91      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
92      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
93 }
94 };
95
96 static const uint8_t coeff_token_bits[4][4*17]={
97 {
98      1, 0, 0, 0,
99      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
100      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
101     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
102     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
103 },
104 {
105      3, 0, 0, 0,
106     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
107      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
108     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
109     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
110 },
111 {
112     15, 0, 0, 0,
113     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
114     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
115     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
116     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
117 },
118 {
119      3, 0, 0, 0,
120      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
121     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
122     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
123     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
124 }
125 };
126
127 static const uint8_t total_zeros_len[16][16]= {
128     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131     {5,3,4,4,3,3,3,4,3,4,5,5,5},
132     {4,4,4,3,3,3,3,3,4,5,4,5},
133     {6,5,3,3,3,3,3,3,4,3,6},
134     {6,5,3,3,3,2,3,4,3,6},
135     {6,4,5,3,2,2,3,3,6},
136     {6,6,4,2,2,3,2,5},
137     {5,5,3,2,2,2,4},
138     {4,4,3,3,1,3},
139     {4,4,2,1,3},
140     {3,3,1,2},
141     {2,2,1},
142     {1,1},
143 };
144
145 static const uint8_t total_zeros_bits[16][16]= {
146     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149     {3,7,5,4,6,5,4,3,3,2,2,1,0},
150     {5,4,3,7,6,5,4,3,2,1,1,0},
151     {1,1,7,6,5,4,3,2,1,1,0},
152     {1,1,5,4,3,3,2,1,1,0},
153     {1,1,1,3,3,2,2,1,0},
154     {1,0,1,3,2,1,1,1},
155     {1,0,1,3,2,1,1},
156     {0,1,1,2,1,3},
157     {0,1,1,1,1},
158     {0,1,1,1},
159     {0,1,1},
160     {0,1},
161 };
162
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
164     { 1, 2, 3, 3,},
165     { 1, 2, 2, 0,},
166     { 1, 1, 0, 0,},
167 };
168
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
170     { 1, 1, 1, 0,},
171     { 1, 1, 0, 0,},
172     { 1, 0, 0, 0,},
173 };
174
175 static const uint8_t run_len[7][16]={
176     {1,1},
177     {1,2,2},
178     {2,2,2,2},
179     {2,2,2,3,3},
180     {2,2,3,3,3,3},
181     {2,3,3,3,3,3,3},
182     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
183 };
184
185 static const uint8_t run_bits[7][16]={
186     {1,0},
187     {1,1,0},
188     {3,2,1,0},
189     {3,2,1,1,0},
190     {3,2,3,2,1,0},
191     {3,0,1,3,2,5,4},
192     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
193 };
194
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
198
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
202
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
206
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
210
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
214
215 static VLC run7_vlc;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
218
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
221
222
223 /**
224  * gets the predicted number of non-zero coefficients.
225  * @param n block index
226  */
227 static inline int pred_non_zero_count(H264Context *h, int n){
228     const int index8= scan8[n];
229     const int left= h->non_zero_count_cache[index8 - 1];
230     const int top = h->non_zero_count_cache[index8 - 8];
231     int i= left + top;
232
233     if(i<64) i= (i+1)>>1;
234
235     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
236
237     return i&31;
238 }
239
240 static av_cold void init_cavlc_level_tab(void){
241     int suffix_length, mask;
242     unsigned int i;
243
244     for(suffix_length=0; suffix_length<7; suffix_length++){
245         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
248
249             mask= -(level_code&1);
250             level_code= (((2+level_code)>>1) ^ mask) - mask;
251             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252                 cavlc_level_tab[suffix_length][i][0]= level_code;
253                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254             }else if(prefix + 1 <= LEVEL_TAB_BITS){
255                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
256                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
257             }else{
258                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
260             }
261         }
262     }
263 }
264
265 av_cold void ff_h264_decode_init_vlc(void){
266     static int done = 0;
267
268     if (!done) {
269         int i;
270         int offset;
271         done = 1;
272
273         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276                  &chroma_dc_coeff_token_len [0], 1, 1,
277                  &chroma_dc_coeff_token_bits[0], 1, 1,
278                  INIT_VLC_USE_NEW_STATIC);
279
280         offset = 0;
281         for(i=0; i<4; i++){
282             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285                      &coeff_token_len [i][0], 1, 1,
286                      &coeff_token_bits[i][0], 1, 1,
287                      INIT_VLC_USE_NEW_STATIC);
288             offset += coeff_token_vlc_tables_size[i];
289         }
290         /*
291          * This is a one time safety check to make sure that
292          * the packed static coeff_token_vlc table sizes
293          * were initialized correctly.
294          */
295         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
296
297         for(i=0; i<3; i++){
298             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300             init_vlc(&chroma_dc_total_zeros_vlc[i],
301                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302                      &chroma_dc_total_zeros_len [i][0], 1, 1,
303                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
304                      INIT_VLC_USE_NEW_STATIC);
305         }
306         for(i=0; i<15; i++){
307             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309             init_vlc(&total_zeros_vlc[i],
310                      TOTAL_ZEROS_VLC_BITS, 16,
311                      &total_zeros_len [i][0], 1, 1,
312                      &total_zeros_bits[i][0], 1, 1,
313                      INIT_VLC_USE_NEW_STATIC);
314         }
315
316         for(i=0; i<6; i++){
317             run_vlc[i].table = run_vlc_tables[i];
318             run_vlc[i].table_allocated = run_vlc_tables_size;
319             init_vlc(&run_vlc[i],
320                      RUN_VLC_BITS, 7,
321                      &run_len [i][0], 1, 1,
322                      &run_bits[i][0], 1, 1,
323                      INIT_VLC_USE_NEW_STATIC);
324         }
325         run7_vlc.table = run7_vlc_table,
326         run7_vlc.table_allocated = run7_vlc_table_size;
327         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328                  &run_len [6][0], 1, 1,
329                  &run_bits[6][0], 1, 1,
330                  INIT_VLC_USE_NEW_STATIC);
331
332         init_cavlc_level_tab();
333     }
334 }
335
336 /**
337  *
338  */
339 static inline int get_level_prefix(GetBitContext *gb){
340     unsigned int buf;
341     int log;
342
343     OPEN_READER(re, gb);
344     UPDATE_CACHE(re, gb);
345     buf=GET_CACHE(re, gb);
346
347     log= 32 - av_log2(buf);
348 #ifdef TRACE
349     print_bin(buf>>(32-log), log);
350     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
351 #endif
352
353     LAST_SKIP_BITS(re, gb, log);
354     CLOSE_READER(re, gb);
355
356     return log-1;
357 }
358
359 /**
360  * decodes a residual block.
361  * @param n block index
362  * @param scantable scantable
363  * @param max_coeff number of coefficients in the block
364  * @return <0 if an error occurred
365  */
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367     MpegEncContext * const s = &h->s;
368     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
369     int level[16];
370     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
371
372     //FIXME put trailing_onex into the context
373
374     if(n == CHROMA_DC_BLOCK_INDEX){
375         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376         total_coeff= coeff_token>>2;
377     }else{
378         if(n == LUMA_DC_BLOCK_INDEX){
379             total_coeff= pred_non_zero_count(h, 0);
380             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381             total_coeff= coeff_token>>2;
382         }else{
383             total_coeff= pred_non_zero_count(h, n);
384             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385             total_coeff= coeff_token>>2;
386             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
387         }
388     }
389
390     //FIXME set last_non_zero?
391
392     if(total_coeff==0)
393         return 0;
394     if(total_coeff > (unsigned)max_coeff) {
395         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
396         return -1;
397     }
398
399     trailing_ones= coeff_token&3;
400     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401     assert(total_coeff<=16);
402
403     i = show_bits(gb, 3);
404     skip_bits(gb, trailing_ones);
405     level[0] = 1-((i&4)>>1);
406     level[1] = 1-((i&2)   );
407     level[2] = 1-((i&1)<<1);
408
409     if(trailing_ones<total_coeff) {
410         int mask, prefix;
411         int suffix_length = total_coeff > 10 & trailing_ones < 3;
412         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
414
415         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416         if(level_code >= 100){
417             prefix= level_code - 100;
418             if(prefix == LEVEL_TAB_BITS)
419                 prefix += get_level_prefix(gb);
420
421             //first coefficient has suffix_length equal to 0 or 1
422             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
423                 if(suffix_length)
424                     level_code= (prefix<<1) + get_bits1(gb); //part
425                 else
426                     level_code= prefix; //part
427             }else if(prefix==14){
428                 if(suffix_length)
429                     level_code= (prefix<<1) + get_bits1(gb); //part
430                 else
431                     level_code= prefix + get_bits(gb, 4); //part
432             }else{
433                 level_code= 30 + get_bits(gb, prefix-3); //part
434                 if(prefix>=16){
435                     if(prefix > 25+3){
436                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
437                         return -1;
438                     }
439                     level_code += (1<<(prefix-3))-4096;
440                 }
441             }
442
443             if(trailing_ones < 3) level_code += 2;
444
445             suffix_length = 2;
446             mask= -(level_code&1);
447             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
448         }else{
449             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
450
451             suffix_length = 1 + (level_code + 3U > 6U);
452             level[trailing_ones]= level_code;
453         }
454
455         //remaining coefficients have suffix_length > 0
456         for(i=trailing_ones+1;i<total_coeff;i++) {
457             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
458             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
459             level_code= cavlc_level_tab[suffix_length][bitsi][0];
460
461             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
462             if(level_code >= 100){
463                 prefix= level_code - 100;
464                 if(prefix == LEVEL_TAB_BITS){
465                     prefix += get_level_prefix(gb);
466                 }
467                 if(prefix<15){
468                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
469                 }else{
470                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
471                     if(prefix>=16)
472                         level_code += (1<<(prefix-3))-4096;
473                 }
474                 mask= -(level_code&1);
475                 level_code= (((2+level_code)>>1) ^ mask) - mask;
476             }
477             level[i]= level_code;
478             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
479         }
480     }
481
482     if(total_coeff == max_coeff)
483         zeros_left=0;
484     else{
485         if(n == CHROMA_DC_BLOCK_INDEX)
486             zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
487         else
488             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
489     }
490
491     scantable += zeros_left + total_coeff - 1;
492     if(n > 24){
493         block[*scantable] = level[0];
494         for(i=1;i<total_coeff && zeros_left > 0;i++) {
495             if(zeros_left < 7)
496                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
497             else
498                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
499             zeros_left -= run_before;
500             scantable -= 1 + run_before;
501             block[*scantable]= level[i];
502         }
503         for(;i<total_coeff;i++) {
504             scantable--;
505             block[*scantable]= level[i];
506         }
507     }else{
508         block[*scantable] = (level[0] * qmul[*scantable] + 32)>>6;
509         for(i=1;i<total_coeff && zeros_left > 0;i++) {
510             if(zeros_left < 7)
511                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
512             else
513                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
514             zeros_left -= run_before;
515             scantable -= 1 + run_before;
516             block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
517         }
518         for(;i<total_coeff;i++) {
519             scantable--;
520             block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
521         }
522     }
523
524     if(zeros_left<0){
525         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
526         return -1;
527     }
528
529     return 0;
530 }
531
532 int ff_h264_decode_mb_cavlc(H264Context *h){
533     MpegEncContext * const s = &h->s;
534     int mb_xy;
535     int partition_count;
536     unsigned int mb_type, cbp;
537     int dct8x8_allowed= h->pps.transform_8x8_mode;
538
539     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
540
541     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
542     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
543                 down the code */
544     if(h->slice_type_nos != FF_I_TYPE){
545         if(s->mb_skip_run==-1)
546             s->mb_skip_run= get_ue_golomb(&s->gb);
547
548         if (s->mb_skip_run--) {
549             if(FRAME_MBAFF && (s->mb_y&1) == 0){
550                 if(s->mb_skip_run==0)
551                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
552             }
553             decode_mb_skip(h);
554             return 0;
555         }
556     }
557     if(FRAME_MBAFF){
558         if( (s->mb_y&1) == 0 )
559             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
560     }
561
562     h->prev_mb_skipped= 0;
563
564     mb_type= get_ue_golomb(&s->gb);
565     if(h->slice_type_nos == FF_B_TYPE){
566         if(mb_type < 23){
567             partition_count= b_mb_type_info[mb_type].partition_count;
568             mb_type=         b_mb_type_info[mb_type].type;
569         }else{
570             mb_type -= 23;
571             goto decode_intra_mb;
572         }
573     }else if(h->slice_type_nos == FF_P_TYPE){
574         if(mb_type < 5){
575             partition_count= p_mb_type_info[mb_type].partition_count;
576             mb_type=         p_mb_type_info[mb_type].type;
577         }else{
578             mb_type -= 5;
579             goto decode_intra_mb;
580         }
581     }else{
582        assert(h->slice_type_nos == FF_I_TYPE);
583         if(h->slice_type == FF_SI_TYPE && mb_type)
584             mb_type--;
585 decode_intra_mb:
586         if(mb_type > 25){
587             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
588             return -1;
589         }
590         partition_count=0;
591         cbp= i_mb_type_info[mb_type].cbp;
592         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
593         mb_type= i_mb_type_info[mb_type].type;
594     }
595
596     if(MB_FIELD)
597         mb_type |= MB_TYPE_INTERLACED;
598
599     h->slice_table[ mb_xy ]= h->slice_num;
600
601     if(IS_INTRA_PCM(mb_type)){
602         unsigned int x;
603
604         // We assume these blocks are very rare so we do not optimize it.
605         align_get_bits(&s->gb);
606
607         // The pixels are stored in the same order as levels in h->mb array.
608         for(x=0; x < (CHROMA ? 384 : 256); x++){
609             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
610         }
611
612         // In deblocking, the quantizer is 0
613         s->current_picture.qscale_table[mb_xy]= 0;
614         // All coeffs are present
615         memset(h->non_zero_count[mb_xy], 16, 32);
616
617         s->current_picture.mb_type[mb_xy]= mb_type;
618         return 0;
619     }
620
621     if(MB_MBAFF){
622         h->ref_count[0] <<= 1;
623         h->ref_count[1] <<= 1;
624     }
625
626     fill_decode_neighbors(h, mb_type);
627     fill_decode_caches(h, mb_type);
628
629     //mb_pred
630     if(IS_INTRA(mb_type)){
631         int pred_mode;
632 //            init_top_left_availability(h);
633         if(IS_INTRA4x4(mb_type)){
634             int i;
635             int di = 1;
636             if(dct8x8_allowed && get_bits1(&s->gb)){
637                 mb_type |= MB_TYPE_8x8DCT;
638                 di = 4;
639             }
640
641 //                fill_intra4x4_pred_table(h);
642             for(i=0; i<16; i+=di){
643                 int mode= pred_intra_mode(h, i);
644
645                 if(!get_bits1(&s->gb)){
646                     const int rem_mode= get_bits(&s->gb, 3);
647                     mode = rem_mode + (rem_mode >= mode);
648                 }
649
650                 if(di==4)
651                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
652                 else
653                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
654             }
655             ff_h264_write_back_intra_pred_mode(h);
656             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
657                 return -1;
658         }else{
659             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
660             if(h->intra16x16_pred_mode < 0)
661                 return -1;
662         }
663         if(CHROMA){
664             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
665             if(pred_mode < 0)
666                 return -1;
667             h->chroma_pred_mode= pred_mode;
668         }
669     }else if(partition_count==4){
670         int i, j, sub_partition_count[4], list, ref[2][4];
671
672         if(h->slice_type_nos == FF_B_TYPE){
673             for(i=0; i<4; i++){
674                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
675                 if(h->sub_mb_type[i] >=13){
676                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
677                     return -1;
678                 }
679                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
680                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
681             }
682             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
683                 ff_h264_pred_direct_motion(h, &mb_type);
684                 h->ref_cache[0][scan8[4]] =
685                 h->ref_cache[1][scan8[4]] =
686                 h->ref_cache[0][scan8[12]] =
687                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
688             }
689         }else{
690             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
691             for(i=0; i<4; i++){
692                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
693                 if(h->sub_mb_type[i] >=4){
694                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
695                     return -1;
696                 }
697                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
698                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
699             }
700         }
701
702         for(list=0; list<h->list_count; list++){
703             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
704             for(i=0; i<4; i++){
705                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
706                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
707                     unsigned int tmp;
708                     if(ref_count == 1){
709                         tmp= 0;
710                     }else if(ref_count == 2){
711                         tmp= get_bits1(&s->gb)^1;
712                     }else{
713                         tmp= get_ue_golomb_31(&s->gb);
714                         if(tmp>=ref_count){
715                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
716                             return -1;
717                         }
718                     }
719                     ref[list][i]= tmp;
720                 }else{
721                  //FIXME
722                     ref[list][i] = -1;
723                 }
724             }
725         }
726
727         if(dct8x8_allowed)
728             dct8x8_allowed = get_dct8x8_allowed(h);
729
730         for(list=0; list<h->list_count; list++){
731             for(i=0; i<4; i++){
732                 if(IS_DIRECT(h->sub_mb_type[i])) {
733                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
734                     continue;
735                 }
736                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
737                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
738
739                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
740                     const int sub_mb_type= h->sub_mb_type[i];
741                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
742                     for(j=0; j<sub_partition_count[i]; j++){
743                         int mx, my;
744                         const int index= 4*i + block_width*j;
745                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
746                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
747                         mx += get_se_golomb(&s->gb);
748                         my += get_se_golomb(&s->gb);
749                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
750
751                         if(IS_SUB_8X8(sub_mb_type)){
752                             mv_cache[ 1 ][0]=
753                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
754                             mv_cache[ 1 ][1]=
755                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
756                         }else if(IS_SUB_8X4(sub_mb_type)){
757                             mv_cache[ 1 ][0]= mx;
758                             mv_cache[ 1 ][1]= my;
759                         }else if(IS_SUB_4X8(sub_mb_type)){
760                             mv_cache[ 8 ][0]= mx;
761                             mv_cache[ 8 ][1]= my;
762                         }
763                         mv_cache[ 0 ][0]= mx;
764                         mv_cache[ 0 ][1]= my;
765                     }
766                 }else{
767                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
768                     p[0] = p[1]=
769                     p[8] = p[9]= 0;
770                 }
771             }
772         }
773     }else if(IS_DIRECT(mb_type)){
774         ff_h264_pred_direct_motion(h, &mb_type);
775         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
776     }else{
777         int list, mx, my, i;
778          //FIXME we should set ref_idx_l? to 0 if we use that later ...
779         if(IS_16X16(mb_type)){
780             for(list=0; list<h->list_count; list++){
781                     unsigned int val;
782                     if(IS_DIR(mb_type, 0, list)){
783                         if(h->ref_count[list]==1){
784                             val= 0;
785                         }else if(h->ref_count[list]==2){
786                             val= get_bits1(&s->gb)^1;
787                         }else{
788                             val= get_ue_golomb_31(&s->gb);
789                             if(val >= h->ref_count[list]){
790                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
791                                 return -1;
792                             }
793                         }
794                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
795                     }
796             }
797             for(list=0; list<h->list_count; list++){
798                 if(IS_DIR(mb_type, 0, list)){
799                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
800                     mx += get_se_golomb(&s->gb);
801                     my += get_se_golomb(&s->gb);
802                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
803
804                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
805                 }
806             }
807         }
808         else if(IS_16X8(mb_type)){
809             for(list=0; list<h->list_count; list++){
810                     for(i=0; i<2; i++){
811                         unsigned int val;
812                         if(IS_DIR(mb_type, i, list)){
813                             if(h->ref_count[list] == 1){
814                                 val= 0;
815                             }else if(h->ref_count[list] == 2){
816                                 val= get_bits1(&s->gb)^1;
817                             }else{
818                                 val= get_ue_golomb_31(&s->gb);
819                                 if(val >= h->ref_count[list]){
820                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
821                                     return -1;
822                                 }
823                             }
824                         }else
825                             val= LIST_NOT_USED&0xFF;
826                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
827                     }
828             }
829             for(list=0; list<h->list_count; list++){
830                 for(i=0; i<2; i++){
831                     unsigned int val;
832                     if(IS_DIR(mb_type, i, list)){
833                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
834                         mx += get_se_golomb(&s->gb);
835                         my += get_se_golomb(&s->gb);
836                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
837
838                         val= pack16to32(mx,my);
839                     }else
840                         val=0;
841                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
842                 }
843             }
844         }else{
845             assert(IS_8X16(mb_type));
846             for(list=0; list<h->list_count; list++){
847                     for(i=0; i<2; i++){
848                         unsigned int val;
849                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
850                             if(h->ref_count[list]==1){
851                                 val= 0;
852                             }else if(h->ref_count[list]==2){
853                                 val= get_bits1(&s->gb)^1;
854                             }else{
855                                 val= get_ue_golomb_31(&s->gb);
856                                 if(val >= h->ref_count[list]){
857                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
858                                     return -1;
859                                 }
860                             }
861                         }else
862                             val= LIST_NOT_USED&0xFF;
863                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
864                     }
865             }
866             for(list=0; list<h->list_count; list++){
867                 for(i=0; i<2; i++){
868                     unsigned int val;
869                     if(IS_DIR(mb_type, i, list)){
870                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
871                         mx += get_se_golomb(&s->gb);
872                         my += get_se_golomb(&s->gb);
873                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
874
875                         val= pack16to32(mx,my);
876                     }else
877                         val=0;
878                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
879                 }
880             }
881         }
882     }
883
884     if(IS_INTER(mb_type))
885         write_back_motion(h, mb_type);
886
887     if(!IS_INTRA16x16(mb_type)){
888         cbp= get_ue_golomb(&s->gb);
889         if(cbp > 47){
890             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
891             return -1;
892         }
893
894         if(CHROMA){
895             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
896             else                     cbp= golomb_to_inter_cbp   [cbp];
897         }else{
898             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
899             else                     cbp= golomb_to_inter_cbp_gray[cbp];
900         }
901     }
902
903     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
904         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
905     }
906     h->cbp=
907     h->cbp_table[mb_xy]= cbp;
908     s->current_picture.mb_type[mb_xy]= mb_type;
909
910     if(cbp || IS_INTRA16x16(mb_type)){
911         int i8x8, i4x4, chroma_idx;
912         int dquant;
913         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
914         const uint8_t *scan, *scan8x8, *dc_scan;
915
916         if(IS_INTERLACED(mb_type)){
917             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
918             scan= s->qscale ? h->field_scan : h->field_scan_q0;
919             dc_scan= luma_dc_field_scan;
920         }else{
921             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
922             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
923             dc_scan= luma_dc_zigzag_scan;
924         }
925
926         dquant= get_se_golomb(&s->gb);
927
928         s->qscale += dquant;
929
930         if(((unsigned)s->qscale) > 51){
931             if(s->qscale<0) s->qscale+= 52;
932             else            s->qscale-= 52;
933             if(((unsigned)s->qscale) > 51){
934                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
935                 return -1;
936             }
937         }
938
939         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
940         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
941         if(IS_INTRA16x16(mb_type)){
942             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
943                 return -1; //FIXME continue if partitioned and other return -1 too
944             }
945
946             assert((cbp&15) == 0 || (cbp&15) == 15);
947
948             if(cbp&15){
949                 for(i8x8=0; i8x8<4; i8x8++){
950                     for(i4x4=0; i4x4<4; i4x4++){
951                         const int index= i4x4 + 4*i8x8;
952                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
953                             return -1;
954                         }
955                     }
956                 }
957             }else{
958                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
959             }
960         }else{
961             for(i8x8=0; i8x8<4; i8x8++){
962                 if(cbp & (1<<i8x8)){
963                     if(IS_8x8DCT(mb_type)){
964                         DCTELEM *buf = &h->mb[64*i8x8];
965                         uint8_t *nnz;
966                         for(i4x4=0; i4x4<4; i4x4++){
967                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
968                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
969                                 return -1;
970                         }
971                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
972                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
973                     }else{
974                         for(i4x4=0; i4x4<4; i4x4++){
975                             const int index= i4x4 + 4*i8x8;
976
977                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
978                                 return -1;
979                             }
980                         }
981                     }
982                 }else{
983                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
984                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
985                 }
986             }
987         }
988
989         if(cbp&0x30){
990             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
991                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
992                     return -1;
993                 }
994         }
995
996         if(cbp&0x20){
997             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
998                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
999                 for(i4x4=0; i4x4<4; i4x4++){
1000                     const int index= 16 + 4*chroma_idx + i4x4;
1001                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1002                         return -1;
1003                     }
1004                 }
1005             }
1006         }else{
1007             uint8_t * const nnz= &h->non_zero_count_cache[0];
1008             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1009             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1010         }
1011     }else{
1012         uint8_t * const nnz= &h->non_zero_count_cache[0];
1013         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1014         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1015         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1016     }
1017     s->current_picture.qscale_table[mb_xy]= s->qscale;
1018     write_back_non_zero_count(h);
1019
1020     if(MB_MBAFF){
1021         h->ref_count[0] >>= 1;
1022         h->ref_count[1] >>= 1;
1023     }
1024
1025     return 0;
1026 }
1027