]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
More av_cold for infrequently called functions.
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file libavcodec/h264_cavlc.c
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #include "internal.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h" // FIXME FIXME FIXME
33 #include "h264_mvpred.h"
34 #include "golomb.h"
35
36 #if ARCH_X86
37 #include "x86/h264_i386.h"
38 #endif
39
40 //#undef NDEBUG
41 #include <assert.h>
42
43 static const uint8_t golomb_to_intra4x4_cbp[48]={
44  47, 31, 15,  0, 23, 27, 29, 30,  7, 11, 13, 14, 39, 43, 45, 46,
45  16,  3,  5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44,  1,  2,  4,
46   8, 17, 18, 20, 24,  6,  9, 22, 25, 32, 33, 34, 36, 40, 38, 41
47 };
48
49 static const uint8_t golomb_to_inter_cbp[48]={
50   0, 16,  1,  2,  4,  8, 32,  3,  5, 10, 12, 15, 47,  7, 11, 13,
51  14,  6,  9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
52  17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
53 };
54
55 static const uint8_t golomb_to_inter_cbp_gray[16]={
56  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
57 };
58
59 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
60 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
61 };
62
63 static const uint8_t chroma_dc_coeff_token_len[4*5]={
64  2, 0, 0, 0,
65  6, 1, 0, 0,
66  6, 6, 3, 0,
67  6, 7, 7, 6,
68  6, 8, 8, 7,
69 };
70
71 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
72  1, 0, 0, 0,
73  7, 1, 0, 0,
74  4, 6, 1, 0,
75  3, 3, 2, 5,
76  2, 3, 2, 0,
77 };
78
79 static const uint8_t coeff_token_len[4][4*17]={
80 {
81      1, 0, 0, 0,
82      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
83     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
84     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
85     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
86 },
87 {
88      2, 0, 0, 0,
89      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
90      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
91     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
92     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
93 },
94 {
95      4, 0, 0, 0,
96      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
97      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
98      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
99     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
100 },
101 {
102      6, 0, 0, 0,
103      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
104      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
105      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
106      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
107 }
108 };
109
110 static const uint8_t coeff_token_bits[4][4*17]={
111 {
112      1, 0, 0, 0,
113      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
114      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
115     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
116     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
117 },
118 {
119      3, 0, 0, 0,
120     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
121      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
122     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
123     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
124 },
125 {
126     15, 0, 0, 0,
127     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
128     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
129     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
130     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
131 },
132 {
133      3, 0, 0, 0,
134      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
135     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
136     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
137     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
138 }
139 };
140
141 static const uint8_t total_zeros_len[16][16]= {
142     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
143     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
144     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
145     {5,3,4,4,3,3,3,4,3,4,5,5,5},
146     {4,4,4,3,3,3,3,3,4,5,4,5},
147     {6,5,3,3,3,3,3,3,4,3,6},
148     {6,5,3,3,3,2,3,4,3,6},
149     {6,4,5,3,2,2,3,3,6},
150     {6,6,4,2,2,3,2,5},
151     {5,5,3,2,2,2,4},
152     {4,4,3,3,1,3},
153     {4,4,2,1,3},
154     {3,3,1,2},
155     {2,2,1},
156     {1,1},
157 };
158
159 static const uint8_t total_zeros_bits[16][16]= {
160     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
161     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
162     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
163     {3,7,5,4,6,5,4,3,3,2,2,1,0},
164     {5,4,3,7,6,5,4,3,2,1,1,0},
165     {1,1,7,6,5,4,3,2,1,1,0},
166     {1,1,5,4,3,3,2,1,1,0},
167     {1,1,1,3,3,2,2,1,0},
168     {1,0,1,3,2,1,1,1},
169     {1,0,1,3,2,1,1},
170     {0,1,1,2,1,3},
171     {0,1,1,1,1},
172     {0,1,1,1},
173     {0,1,1},
174     {0,1},
175 };
176
177 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
178     { 1, 2, 3, 3,},
179     { 1, 2, 2, 0,},
180     { 1, 1, 0, 0,},
181 };
182
183 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
184     { 1, 1, 1, 0,},
185     { 1, 1, 0, 0,},
186     { 1, 0, 0, 0,},
187 };
188
189 static const uint8_t run_len[7][16]={
190     {1,1},
191     {1,2,2},
192     {2,2,2,2},
193     {2,2,2,3,3},
194     {2,2,3,3,3,3},
195     {2,3,3,3,3,3,3},
196     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
197 };
198
199 static const uint8_t run_bits[7][16]={
200     {1,0},
201     {1,1,0},
202     {3,2,1,0},
203     {3,2,1,1,0},
204     {3,2,3,2,1,0},
205     {3,0,1,3,2,5,4},
206     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
207 };
208
209 static VLC coeff_token_vlc[4];
210 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
211 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
212
213 static VLC chroma_dc_coeff_token_vlc;
214 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
215 static const int chroma_dc_coeff_token_vlc_table_size = 256;
216
217 static VLC total_zeros_vlc[15];
218 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
219 static const int total_zeros_vlc_tables_size = 512;
220
221 static VLC chroma_dc_total_zeros_vlc[3];
222 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
223 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
224
225 static VLC run_vlc[6];
226 static VLC_TYPE run_vlc_tables[6][8][2];
227 static const int run_vlc_tables_size = 8;
228
229 static VLC run7_vlc;
230 static VLC_TYPE run7_vlc_table[96][2];
231 static const int run7_vlc_table_size = 96;
232
233 #define LEVEL_TAB_BITS 8
234 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
235
236
237 /**
238  * gets the predicted number of non-zero coefficients.
239  * @param n block index
240  */
241 static inline int pred_non_zero_count(H264Context *h, int n){
242     const int index8= scan8[n];
243     const int left= h->non_zero_count_cache[index8 - 1];
244     const int top = h->non_zero_count_cache[index8 - 8];
245     int i= left + top;
246
247     if(i<64) i= (i+1)>>1;
248
249     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
250
251     return i&31;
252 }
253
254 static av_cold void init_cavlc_level_tab(void){
255     int suffix_length, mask;
256     unsigned int i;
257
258     for(suffix_length=0; suffix_length<7; suffix_length++){
259         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
260             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
261             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
262
263             mask= -(level_code&1);
264             level_code= (((2+level_code)>>1) ^ mask) - mask;
265             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
266                 cavlc_level_tab[suffix_length][i][0]= level_code;
267                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
268             }else if(prefix + 1 <= LEVEL_TAB_BITS){
269                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
270                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
271             }else{
272                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
273                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
274             }
275         }
276     }
277 }
278
279 av_cold void ff_h264_decode_init_vlc(void){
280     static int done = 0;
281
282     if (!done) {
283         int i;
284         int offset;
285         done = 1;
286
287         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
288         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
289         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
290                  &chroma_dc_coeff_token_len [0], 1, 1,
291                  &chroma_dc_coeff_token_bits[0], 1, 1,
292                  INIT_VLC_USE_NEW_STATIC);
293
294         offset = 0;
295         for(i=0; i<4; i++){
296             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
297             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
298             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
299                      &coeff_token_len [i][0], 1, 1,
300                      &coeff_token_bits[i][0], 1, 1,
301                      INIT_VLC_USE_NEW_STATIC);
302             offset += coeff_token_vlc_tables_size[i];
303         }
304         /*
305          * This is a one time safety check to make sure that
306          * the packed static coeff_token_vlc table sizes
307          * were initialized correctly.
308          */
309         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
310
311         for(i=0; i<3; i++){
312             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
313             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
314             init_vlc(&chroma_dc_total_zeros_vlc[i],
315                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
316                      &chroma_dc_total_zeros_len [i][0], 1, 1,
317                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
318                      INIT_VLC_USE_NEW_STATIC);
319         }
320         for(i=0; i<15; i++){
321             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
322             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
323             init_vlc(&total_zeros_vlc[i],
324                      TOTAL_ZEROS_VLC_BITS, 16,
325                      &total_zeros_len [i][0], 1, 1,
326                      &total_zeros_bits[i][0], 1, 1,
327                      INIT_VLC_USE_NEW_STATIC);
328         }
329
330         for(i=0; i<6; i++){
331             run_vlc[i].table = run_vlc_tables[i];
332             run_vlc[i].table_allocated = run_vlc_tables_size;
333             init_vlc(&run_vlc[i],
334                      RUN_VLC_BITS, 7,
335                      &run_len [i][0], 1, 1,
336                      &run_bits[i][0], 1, 1,
337                      INIT_VLC_USE_NEW_STATIC);
338         }
339         run7_vlc.table = run7_vlc_table,
340         run7_vlc.table_allocated = run7_vlc_table_size;
341         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
342                  &run_len [6][0], 1, 1,
343                  &run_bits[6][0], 1, 1,
344                  INIT_VLC_USE_NEW_STATIC);
345
346         init_cavlc_level_tab();
347     }
348 }
349
350 /**
351  *
352  */
353 static inline int get_level_prefix(GetBitContext *gb){
354     unsigned int buf;
355     int log;
356
357     OPEN_READER(re, gb);
358     UPDATE_CACHE(re, gb);
359     buf=GET_CACHE(re, gb);
360
361     log= 32 - av_log2(buf);
362 #ifdef TRACE
363     print_bin(buf>>(32-log), log);
364     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
365 #endif
366
367     LAST_SKIP_BITS(re, gb, log);
368     CLOSE_READER(re, gb);
369
370     return log-1;
371 }
372
373 /**
374  * decodes a residual block.
375  * @param n block index
376  * @param scantable scantable
377  * @param max_coeff number of coefficients in the block
378  * @return <0 if an error occurred
379  */
380 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
381     MpegEncContext * const s = &h->s;
382     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
383     int level[16];
384     int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
385
386     //FIXME put trailing_onex into the context
387
388     if(n == CHROMA_DC_BLOCK_INDEX){
389         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
390         total_coeff= coeff_token>>2;
391     }else{
392         if(n == LUMA_DC_BLOCK_INDEX){
393             total_coeff= pred_non_zero_count(h, 0);
394             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
395             total_coeff= coeff_token>>2;
396         }else{
397             total_coeff= pred_non_zero_count(h, n);
398             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
399             total_coeff= coeff_token>>2;
400             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
401         }
402     }
403
404     //FIXME set last_non_zero?
405
406     if(total_coeff==0)
407         return 0;
408     if(total_coeff > (unsigned)max_coeff) {
409         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
410         return -1;
411     }
412
413     trailing_ones= coeff_token&3;
414     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
415     assert(total_coeff<=16);
416
417     i = show_bits(gb, 3);
418     skip_bits(gb, trailing_ones);
419     level[0] = 1-((i&4)>>1);
420     level[1] = 1-((i&2)   );
421     level[2] = 1-((i&1)<<1);
422
423     if(trailing_ones<total_coeff) {
424         int mask, prefix;
425         int suffix_length = total_coeff > 10 && trailing_ones < 3;
426         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
427         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
428
429         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
430         if(level_code >= 100){
431             prefix= level_code - 100;
432             if(prefix == LEVEL_TAB_BITS)
433                 prefix += get_level_prefix(gb);
434
435             //first coefficient has suffix_length equal to 0 or 1
436             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
437                 if(suffix_length)
438                     level_code= (prefix<<1) + get_bits1(gb); //part
439                 else
440                     level_code= prefix; //part
441             }else if(prefix==14){
442                 if(suffix_length)
443                     level_code= (prefix<<1) + get_bits1(gb); //part
444                 else
445                     level_code= prefix + get_bits(gb, 4); //part
446             }else{
447                 level_code= 30 + get_bits(gb, prefix-3); //part
448                 if(prefix>=16)
449                     level_code += (1<<(prefix-3))-4096;
450             }
451
452             if(trailing_ones < 3) level_code += 2;
453
454             suffix_length = 2;
455             mask= -(level_code&1);
456             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
457         }else{
458             if(trailing_ones < 3) level_code += (level_code>>31)|1;
459
460             suffix_length = 1;
461             if(level_code + 3U > 6U)
462                 suffix_length++;
463             level[trailing_ones]= level_code;
464         }
465
466         //remaining coefficients have suffix_length > 0
467         for(i=trailing_ones+1;i<total_coeff;i++) {
468             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
469             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
470             level_code= cavlc_level_tab[suffix_length][bitsi][0];
471
472             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
473             if(level_code >= 100){
474                 prefix= level_code - 100;
475                 if(prefix == LEVEL_TAB_BITS){
476                     prefix += get_level_prefix(gb);
477                 }
478                 if(prefix<15){
479                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
480                 }else{
481                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
482                     if(prefix>=16)
483                         level_code += (1<<(prefix-3))-4096;
484                 }
485                 mask= -(level_code&1);
486                 level_code= (((2+level_code)>>1) ^ mask) - mask;
487             }
488             level[i]= level_code;
489
490             if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
491                 suffix_length++;
492         }
493     }
494
495     if(total_coeff == max_coeff)
496         zeros_left=0;
497     else{
498         if(n == CHROMA_DC_BLOCK_INDEX)
499             zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
500         else
501             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
502     }
503
504     coeff_num = zeros_left + total_coeff - 1;
505     j = scantable[coeff_num];
506     if(n > 24){
507         block[j] = level[0];
508         for(i=1;i<total_coeff;i++) {
509             if(zeros_left <= 0)
510                 run_before = 0;
511             else if(zeros_left < 7){
512                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
513             }else{
514                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
515             }
516             zeros_left -= run_before;
517             coeff_num -= 1 + run_before;
518             j= scantable[ coeff_num ];
519
520             block[j]= level[i];
521         }
522     }else{
523         block[j] = (level[0] * qmul[j] + 32)>>6;
524         for(i=1;i<total_coeff;i++) {
525             if(zeros_left <= 0)
526                 run_before = 0;
527             else if(zeros_left < 7){
528                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
529             }else{
530                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
531             }
532             zeros_left -= run_before;
533             coeff_num -= 1 + run_before;
534             j= scantable[ coeff_num ];
535
536             block[j]= (level[i] * qmul[j] + 32)>>6;
537         }
538     }
539
540     if(zeros_left<0){
541         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
542         return -1;
543     }
544
545     return 0;
546 }
547
548 int ff_h264_decode_mb_cavlc(H264Context *h){
549     MpegEncContext * const s = &h->s;
550     int mb_xy;
551     int partition_count;
552     unsigned int mb_type, cbp;
553     int dct8x8_allowed= h->pps.transform_8x8_mode;
554
555     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
556
557     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
558     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
559                 down the code */
560     if(h->slice_type_nos != FF_I_TYPE){
561         if(s->mb_skip_run==-1)
562             s->mb_skip_run= get_ue_golomb(&s->gb);
563
564         if (s->mb_skip_run--) {
565             if(FRAME_MBAFF && (s->mb_y&1) == 0){
566                 if(s->mb_skip_run==0)
567                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
568                 else
569                     predict_field_decoding_flag(h);
570             }
571             decode_mb_skip(h);
572             return 0;
573         }
574     }
575     if(FRAME_MBAFF){
576         if( (s->mb_y&1) == 0 )
577             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
578     }
579
580     h->prev_mb_skipped= 0;
581
582     mb_type= get_ue_golomb(&s->gb);
583     if(h->slice_type_nos == FF_B_TYPE){
584         if(mb_type < 23){
585             partition_count= b_mb_type_info[mb_type].partition_count;
586             mb_type=         b_mb_type_info[mb_type].type;
587         }else{
588             mb_type -= 23;
589             goto decode_intra_mb;
590         }
591     }else if(h->slice_type_nos == FF_P_TYPE){
592         if(mb_type < 5){
593             partition_count= p_mb_type_info[mb_type].partition_count;
594             mb_type=         p_mb_type_info[mb_type].type;
595         }else{
596             mb_type -= 5;
597             goto decode_intra_mb;
598         }
599     }else{
600        assert(h->slice_type_nos == FF_I_TYPE);
601         if(h->slice_type == FF_SI_TYPE && mb_type)
602             mb_type--;
603 decode_intra_mb:
604         if(mb_type > 25){
605             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
606             return -1;
607         }
608         partition_count=0;
609         cbp= i_mb_type_info[mb_type].cbp;
610         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
611         mb_type= i_mb_type_info[mb_type].type;
612     }
613
614     if(MB_FIELD)
615         mb_type |= MB_TYPE_INTERLACED;
616
617     h->slice_table[ mb_xy ]= h->slice_num;
618
619     if(IS_INTRA_PCM(mb_type)){
620         unsigned int x;
621
622         // We assume these blocks are very rare so we do not optimize it.
623         align_get_bits(&s->gb);
624
625         // The pixels are stored in the same order as levels in h->mb array.
626         for(x=0; x < (CHROMA ? 384 : 256); x++){
627             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
628         }
629
630         // In deblocking, the quantizer is 0
631         s->current_picture.qscale_table[mb_xy]= 0;
632         // All coeffs are present
633         memset(h->non_zero_count[mb_xy], 16, 16);
634
635         s->current_picture.mb_type[mb_xy]= mb_type;
636         return 0;
637     }
638
639     if(MB_MBAFF){
640         h->ref_count[0] <<= 1;
641         h->ref_count[1] <<= 1;
642     }
643
644     fill_caches(h, mb_type, 0);
645
646     //mb_pred
647     if(IS_INTRA(mb_type)){
648         int pred_mode;
649 //            init_top_left_availability(h);
650         if(IS_INTRA4x4(mb_type)){
651             int i;
652             int di = 1;
653             if(dct8x8_allowed && get_bits1(&s->gb)){
654                 mb_type |= MB_TYPE_8x8DCT;
655                 di = 4;
656             }
657
658 //                fill_intra4x4_pred_table(h);
659             for(i=0; i<16; i+=di){
660                 int mode= pred_intra_mode(h, i);
661
662                 if(!get_bits1(&s->gb)){
663                     const int rem_mode= get_bits(&s->gb, 3);
664                     mode = rem_mode + (rem_mode >= mode);
665                 }
666
667                 if(di==4)
668                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
669                 else
670                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
671             }
672             ff_h264_write_back_intra_pred_mode(h);
673             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
674                 return -1;
675         }else{
676             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
677             if(h->intra16x16_pred_mode < 0)
678                 return -1;
679         }
680         if(CHROMA){
681             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
682             if(pred_mode < 0)
683                 return -1;
684             h->chroma_pred_mode= pred_mode;
685         }
686     }else if(partition_count==4){
687         int i, j, sub_partition_count[4], list, ref[2][4];
688
689         if(h->slice_type_nos == FF_B_TYPE){
690             for(i=0; i<4; i++){
691                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
692                 if(h->sub_mb_type[i] >=13){
693                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
694                     return -1;
695                 }
696                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
697                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
698             }
699             if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
700                || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
701                 ff_h264_pred_direct_motion(h, &mb_type);
702                 h->ref_cache[0][scan8[4]] =
703                 h->ref_cache[1][scan8[4]] =
704                 h->ref_cache[0][scan8[12]] =
705                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
706             }
707         }else{
708             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
709             for(i=0; i<4; i++){
710                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
711                 if(h->sub_mb_type[i] >=4){
712                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
713                     return -1;
714                 }
715                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
716                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
717             }
718         }
719
720         for(list=0; list<h->list_count; list++){
721             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
722             for(i=0; i<4; i++){
723                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
724                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
725                     unsigned int tmp;
726                     if(ref_count == 1){
727                         tmp= 0;
728                     }else if(ref_count == 2){
729                         tmp= get_bits1(&s->gb)^1;
730                     }else{
731                         tmp= get_ue_golomb_31(&s->gb);
732                         if(tmp>=ref_count){
733                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
734                             return -1;
735                         }
736                     }
737                     ref[list][i]= tmp;
738                 }else{
739                  //FIXME
740                     ref[list][i] = -1;
741                 }
742             }
743         }
744
745         if(dct8x8_allowed)
746             dct8x8_allowed = get_dct8x8_allowed(h);
747
748         for(list=0; list<h->list_count; list++){
749             for(i=0; i<4; i++){
750                 if(IS_DIRECT(h->sub_mb_type[i])) {
751                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
752                     continue;
753                 }
754                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
755                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
756
757                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
758                     const int sub_mb_type= h->sub_mb_type[i];
759                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
760                     for(j=0; j<sub_partition_count[i]; j++){
761                         int mx, my;
762                         const int index= 4*i + block_width*j;
763                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
764                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
765                         mx += get_se_golomb(&s->gb);
766                         my += get_se_golomb(&s->gb);
767                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
768
769                         if(IS_SUB_8X8(sub_mb_type)){
770                             mv_cache[ 1 ][0]=
771                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
772                             mv_cache[ 1 ][1]=
773                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
774                         }else if(IS_SUB_8X4(sub_mb_type)){
775                             mv_cache[ 1 ][0]= mx;
776                             mv_cache[ 1 ][1]= my;
777                         }else if(IS_SUB_4X8(sub_mb_type)){
778                             mv_cache[ 8 ][0]= mx;
779                             mv_cache[ 8 ][1]= my;
780                         }
781                         mv_cache[ 0 ][0]= mx;
782                         mv_cache[ 0 ][1]= my;
783                     }
784                 }else{
785                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
786                     p[0] = p[1]=
787                     p[8] = p[9]= 0;
788                 }
789             }
790         }
791     }else if(IS_DIRECT(mb_type)){
792         ff_h264_pred_direct_motion(h, &mb_type);
793         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
794     }else{
795         int list, mx, my, i;
796          //FIXME we should set ref_idx_l? to 0 if we use that later ...
797         if(IS_16X16(mb_type)){
798             for(list=0; list<h->list_count; list++){
799                     unsigned int val;
800                     if(IS_DIR(mb_type, 0, list)){
801                         if(h->ref_count[list]==1){
802                             val= 0;
803                         }else if(h->ref_count[list]==2){
804                             val= get_bits1(&s->gb)^1;
805                         }else{
806                             val= get_ue_golomb_31(&s->gb);
807                             if(val >= h->ref_count[list]){
808                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
809                                 return -1;
810                             }
811                         }
812                     }else
813                         val= LIST_NOT_USED&0xFF;
814                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
815             }
816             for(list=0; list<h->list_count; list++){
817                 unsigned int val;
818                 if(IS_DIR(mb_type, 0, list)){
819                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
820                     mx += get_se_golomb(&s->gb);
821                     my += get_se_golomb(&s->gb);
822                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
823
824                     val= pack16to32(mx,my);
825                 }else
826                     val=0;
827                 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
828             }
829         }
830         else if(IS_16X8(mb_type)){
831             for(list=0; list<h->list_count; list++){
832                     for(i=0; i<2; i++){
833                         unsigned int val;
834                         if(IS_DIR(mb_type, i, list)){
835                             if(h->ref_count[list] == 1){
836                                 val= 0;
837                             }else if(h->ref_count[list] == 2){
838                                 val= get_bits1(&s->gb)^1;
839                             }else{
840                                 val= get_ue_golomb_31(&s->gb);
841                                 if(val >= h->ref_count[list]){
842                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
843                                     return -1;
844                                 }
845                             }
846                         }else
847                             val= LIST_NOT_USED&0xFF;
848                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
849                     }
850             }
851             for(list=0; list<h->list_count; list++){
852                 for(i=0; i<2; i++){
853                     unsigned int val;
854                     if(IS_DIR(mb_type, i, list)){
855                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
856                         mx += get_se_golomb(&s->gb);
857                         my += get_se_golomb(&s->gb);
858                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
859
860                         val= pack16to32(mx,my);
861                     }else
862                         val=0;
863                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
864                 }
865             }
866         }else{
867             assert(IS_8X16(mb_type));
868             for(list=0; list<h->list_count; list++){
869                     for(i=0; i<2; i++){
870                         unsigned int val;
871                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
872                             if(h->ref_count[list]==1){
873                                 val= 0;
874                             }else if(h->ref_count[list]==2){
875                                 val= get_bits1(&s->gb)^1;
876                             }else{
877                                 val= get_ue_golomb_31(&s->gb);
878                                 if(val >= h->ref_count[list]){
879                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
880                                     return -1;
881                                 }
882                             }
883                         }else
884                             val= LIST_NOT_USED&0xFF;
885                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
886                     }
887             }
888             for(list=0; list<h->list_count; list++){
889                 for(i=0; i<2; i++){
890                     unsigned int val;
891                     if(IS_DIR(mb_type, i, list)){
892                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
893                         mx += get_se_golomb(&s->gb);
894                         my += get_se_golomb(&s->gb);
895                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
896
897                         val= pack16to32(mx,my);
898                     }else
899                         val=0;
900                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
901                 }
902             }
903         }
904     }
905
906     if(IS_INTER(mb_type))
907         write_back_motion(h, mb_type);
908
909     if(!IS_INTRA16x16(mb_type)){
910         cbp= get_ue_golomb(&s->gb);
911         if(cbp > 47){
912             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
913             return -1;
914         }
915
916         if(CHROMA){
917             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
918             else                     cbp= golomb_to_inter_cbp   [cbp];
919         }else{
920             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
921             else                     cbp= golomb_to_inter_cbp_gray[cbp];
922         }
923     }
924     h->cbp = cbp;
925
926     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
927         if(get_bits1(&s->gb)){
928             mb_type |= MB_TYPE_8x8DCT;
929             h->cbp_table[mb_xy]= cbp;
930         }
931     }
932     s->current_picture.mb_type[mb_xy]= mb_type;
933
934     if(cbp || IS_INTRA16x16(mb_type)){
935         int i8x8, i4x4, chroma_idx;
936         int dquant;
937         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
938         const uint8_t *scan, *scan8x8, *dc_scan;
939
940 //        fill_non_zero_count_cache(h);
941
942         if(IS_INTERLACED(mb_type)){
943             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
944             scan= s->qscale ? h->field_scan : h->field_scan_q0;
945             dc_scan= luma_dc_field_scan;
946         }else{
947             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
948             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
949             dc_scan= luma_dc_zigzag_scan;
950         }
951
952         dquant= get_se_golomb(&s->gb);
953
954         if( dquant > 25 || dquant < -26 ){
955             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
956             return -1;
957         }
958
959         s->qscale += dquant;
960         if(((unsigned)s->qscale) > 51){
961             if(s->qscale<0) s->qscale+= 52;
962             else            s->qscale-= 52;
963         }
964
965         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
966         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
967         if(IS_INTRA16x16(mb_type)){
968             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
969                 return -1; //FIXME continue if partitioned and other return -1 too
970             }
971
972             assert((cbp&15) == 0 || (cbp&15) == 15);
973
974             if(cbp&15){
975                 for(i8x8=0; i8x8<4; i8x8++){
976                     for(i4x4=0; i4x4<4; i4x4++){
977                         const int index= i4x4 + 4*i8x8;
978                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
979                             return -1;
980                         }
981                     }
982                 }
983             }else{
984                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
985             }
986         }else{
987             for(i8x8=0; i8x8<4; i8x8++){
988                 if(cbp & (1<<i8x8)){
989                     if(IS_8x8DCT(mb_type)){
990                         DCTELEM *buf = &h->mb[64*i8x8];
991                         uint8_t *nnz;
992                         for(i4x4=0; i4x4<4; i4x4++){
993                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
994                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
995                                 return -1;
996                         }
997                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
998                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
999                     }else{
1000                         for(i4x4=0; i4x4<4; i4x4++){
1001                             const int index= i4x4 + 4*i8x8;
1002
1003                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
1004                                 return -1;
1005                             }
1006                         }
1007                     }
1008                 }else{
1009                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
1010                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
1011                 }
1012             }
1013         }
1014
1015         if(cbp&0x30){
1016             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1017                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
1018                     return -1;
1019                 }
1020         }
1021
1022         if(cbp&0x20){
1023             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1024                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1025                 for(i4x4=0; i4x4<4; i4x4++){
1026                     const int index= 16 + 4*chroma_idx + i4x4;
1027                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1028                         return -1;
1029                     }
1030                 }
1031             }
1032         }else{
1033             uint8_t * const nnz= &h->non_zero_count_cache[0];
1034             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1035             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1036         }
1037     }else{
1038         uint8_t * const nnz= &h->non_zero_count_cache[0];
1039         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1040         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1041         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1042     }
1043     s->current_picture.qscale_table[mb_xy]= s->qscale;
1044     write_back_non_zero_count(h);
1045
1046     if(MB_MBAFF){
1047         h->ref_count[0] >>= 1;
1048         h->ref_count[1] >>= 1;
1049     }
1050
1051     return 0;
1052 }
1053