]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
ef92218b4fd7e20e40a6167515f9efce94f8ec20
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file libavcodec/h264_cavlc.c
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t coeff_token_len[4][4*17]={
66 {
67      1, 0, 0, 0,
68      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
69     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
70     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
71     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
72 },
73 {
74      2, 0, 0, 0,
75      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
76      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
77     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
78     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
79 },
80 {
81      4, 0, 0, 0,
82      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
83      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
84      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
85     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
86 },
87 {
88      6, 0, 0, 0,
89      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
90      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
91      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
92      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
93 }
94 };
95
96 static const uint8_t coeff_token_bits[4][4*17]={
97 {
98      1, 0, 0, 0,
99      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
100      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
101     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
102     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
103 },
104 {
105      3, 0, 0, 0,
106     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
107      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
108     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
109     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
110 },
111 {
112     15, 0, 0, 0,
113     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
114     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
115     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
116     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
117 },
118 {
119      3, 0, 0, 0,
120      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
121     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
122     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
123     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
124 }
125 };
126
127 static const uint8_t total_zeros_len[16][16]= {
128     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131     {5,3,4,4,3,3,3,4,3,4,5,5,5},
132     {4,4,4,3,3,3,3,3,4,5,4,5},
133     {6,5,3,3,3,3,3,3,4,3,6},
134     {6,5,3,3,3,2,3,4,3,6},
135     {6,4,5,3,2,2,3,3,6},
136     {6,6,4,2,2,3,2,5},
137     {5,5,3,2,2,2,4},
138     {4,4,3,3,1,3},
139     {4,4,2,1,3},
140     {3,3,1,2},
141     {2,2,1},
142     {1,1},
143 };
144
145 static const uint8_t total_zeros_bits[16][16]= {
146     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149     {3,7,5,4,6,5,4,3,3,2,2,1,0},
150     {5,4,3,7,6,5,4,3,2,1,1,0},
151     {1,1,7,6,5,4,3,2,1,1,0},
152     {1,1,5,4,3,3,2,1,1,0},
153     {1,1,1,3,3,2,2,1,0},
154     {1,0,1,3,2,1,1,1},
155     {1,0,1,3,2,1,1},
156     {0,1,1,2,1,3},
157     {0,1,1,1,1},
158     {0,1,1,1},
159     {0,1,1},
160     {0,1},
161 };
162
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
164     { 1, 2, 3, 3,},
165     { 1, 2, 2, 0,},
166     { 1, 1, 0, 0,},
167 };
168
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
170     { 1, 1, 1, 0,},
171     { 1, 1, 0, 0,},
172     { 1, 0, 0, 0,},
173 };
174
175 static const uint8_t run_len[7][16]={
176     {1,1},
177     {1,2,2},
178     {2,2,2,2},
179     {2,2,2,3,3},
180     {2,2,3,3,3,3},
181     {2,3,3,3,3,3,3},
182     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
183 };
184
185 static const uint8_t run_bits[7][16]={
186     {1,0},
187     {1,1,0},
188     {3,2,1,0},
189     {3,2,1,1,0},
190     {3,2,3,2,1,0},
191     {3,0,1,3,2,5,4},
192     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
193 };
194
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
198
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
202
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
206
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
210
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
214
215 static VLC run7_vlc;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
218
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
221
222
223 /**
224  * gets the predicted number of non-zero coefficients.
225  * @param n block index
226  */
227 static inline int pred_non_zero_count(H264Context *h, int n){
228     const int index8= scan8[n];
229     const int left= h->non_zero_count_cache[index8 - 1];
230     const int top = h->non_zero_count_cache[index8 - 8];
231     int i= left + top;
232
233     if(i<64) i= (i+1)>>1;
234
235     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
236
237     return i&31;
238 }
239
240 static av_cold void init_cavlc_level_tab(void){
241     int suffix_length, mask;
242     unsigned int i;
243
244     for(suffix_length=0; suffix_length<7; suffix_length++){
245         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
248
249             mask= -(level_code&1);
250             level_code= (((2+level_code)>>1) ^ mask) - mask;
251             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252                 cavlc_level_tab[suffix_length][i][0]= level_code;
253                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254             }else if(prefix + 1 <= LEVEL_TAB_BITS){
255                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
256                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
257             }else{
258                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
260             }
261         }
262     }
263 }
264
265 av_cold void ff_h264_decode_init_vlc(void){
266     static int done = 0;
267
268     if (!done) {
269         int i;
270         int offset;
271         done = 1;
272
273         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276                  &chroma_dc_coeff_token_len [0], 1, 1,
277                  &chroma_dc_coeff_token_bits[0], 1, 1,
278                  INIT_VLC_USE_NEW_STATIC);
279
280         offset = 0;
281         for(i=0; i<4; i++){
282             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285                      &coeff_token_len [i][0], 1, 1,
286                      &coeff_token_bits[i][0], 1, 1,
287                      INIT_VLC_USE_NEW_STATIC);
288             offset += coeff_token_vlc_tables_size[i];
289         }
290         /*
291          * This is a one time safety check to make sure that
292          * the packed static coeff_token_vlc table sizes
293          * were initialized correctly.
294          */
295         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
296
297         for(i=0; i<3; i++){
298             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300             init_vlc(&chroma_dc_total_zeros_vlc[i],
301                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302                      &chroma_dc_total_zeros_len [i][0], 1, 1,
303                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
304                      INIT_VLC_USE_NEW_STATIC);
305         }
306         for(i=0; i<15; i++){
307             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309             init_vlc(&total_zeros_vlc[i],
310                      TOTAL_ZEROS_VLC_BITS, 16,
311                      &total_zeros_len [i][0], 1, 1,
312                      &total_zeros_bits[i][0], 1, 1,
313                      INIT_VLC_USE_NEW_STATIC);
314         }
315
316         for(i=0; i<6; i++){
317             run_vlc[i].table = run_vlc_tables[i];
318             run_vlc[i].table_allocated = run_vlc_tables_size;
319             init_vlc(&run_vlc[i],
320                      RUN_VLC_BITS, 7,
321                      &run_len [i][0], 1, 1,
322                      &run_bits[i][0], 1, 1,
323                      INIT_VLC_USE_NEW_STATIC);
324         }
325         run7_vlc.table = run7_vlc_table,
326         run7_vlc.table_allocated = run7_vlc_table_size;
327         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328                  &run_len [6][0], 1, 1,
329                  &run_bits[6][0], 1, 1,
330                  INIT_VLC_USE_NEW_STATIC);
331
332         init_cavlc_level_tab();
333     }
334 }
335
336 /**
337  *
338  */
339 static inline int get_level_prefix(GetBitContext *gb){
340     unsigned int buf;
341     int log;
342
343     OPEN_READER(re, gb);
344     UPDATE_CACHE(re, gb);
345     buf=GET_CACHE(re, gb);
346
347     log= 32 - av_log2(buf);
348 #ifdef TRACE
349     print_bin(buf>>(32-log), log);
350     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
351 #endif
352
353     LAST_SKIP_BITS(re, gb, log);
354     CLOSE_READER(re, gb);
355
356     return log-1;
357 }
358
359 /**
360  * decodes a residual block.
361  * @param n block index
362  * @param scantable scantable
363  * @param max_coeff number of coefficients in the block
364  * @return <0 if an error occurred
365  */
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367     MpegEncContext * const s = &h->s;
368     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
369     int level[16];
370     int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
371
372     //FIXME put trailing_onex into the context
373
374     if(n == CHROMA_DC_BLOCK_INDEX){
375         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376         total_coeff= coeff_token>>2;
377     }else{
378         if(n == LUMA_DC_BLOCK_INDEX){
379             total_coeff= pred_non_zero_count(h, 0);
380             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381             total_coeff= coeff_token>>2;
382         }else{
383             total_coeff= pred_non_zero_count(h, n);
384             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385             total_coeff= coeff_token>>2;
386             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
387         }
388     }
389
390     //FIXME set last_non_zero?
391
392     if(total_coeff==0)
393         return 0;
394     if(total_coeff > (unsigned)max_coeff) {
395         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
396         return -1;
397     }
398
399     trailing_ones= coeff_token&3;
400     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401     assert(total_coeff<=16);
402
403     i = show_bits(gb, 3);
404     skip_bits(gb, trailing_ones);
405     level[0] = 1-((i&4)>>1);
406     level[1] = 1-((i&2)   );
407     level[2] = 1-((i&1)<<1);
408
409     if(trailing_ones<total_coeff) {
410         int mask, prefix;
411         int suffix_length = total_coeff > 10 & trailing_ones < 3;
412         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
414
415         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416         if(level_code >= 100){
417             prefix= level_code - 100;
418             if(prefix == LEVEL_TAB_BITS)
419                 prefix += get_level_prefix(gb);
420
421             //first coefficient has suffix_length equal to 0 or 1
422             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
423                 if(suffix_length)
424                     level_code= (prefix<<1) + get_bits1(gb); //part
425                 else
426                     level_code= prefix; //part
427             }else if(prefix==14){
428                 if(suffix_length)
429                     level_code= (prefix<<1) + get_bits1(gb); //part
430                 else
431                     level_code= prefix + get_bits(gb, 4); //part
432             }else{
433                 level_code= 30 + get_bits(gb, prefix-3); //part
434                 if(prefix>=16)
435                     level_code += (1<<(prefix-3))-4096;
436             }
437
438             if(trailing_ones < 3) level_code += 2;
439
440             suffix_length = 2;
441             mask= -(level_code&1);
442             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
443         }else{
444             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
445
446             suffix_length = 1 + (level_code + 3U > 6U);
447             level[trailing_ones]= level_code;
448         }
449
450         //remaining coefficients have suffix_length > 0
451         for(i=trailing_ones+1;i<total_coeff;i++) {
452             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
453             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
454             level_code= cavlc_level_tab[suffix_length][bitsi][0];
455
456             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
457             if(level_code >= 100){
458                 prefix= level_code - 100;
459                 if(prefix == LEVEL_TAB_BITS){
460                     prefix += get_level_prefix(gb);
461                 }
462                 if(prefix<15){
463                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
464                 }else{
465                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
466                     if(prefix>=16)
467                         level_code += (1<<(prefix-3))-4096;
468                 }
469                 mask= -(level_code&1);
470                 level_code= (((2+level_code)>>1) ^ mask) - mask;
471             }
472             level[i]= level_code;
473             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
474         }
475     }
476
477     if(total_coeff == max_coeff)
478         zeros_left=0;
479     else{
480         if(n == CHROMA_DC_BLOCK_INDEX)
481             zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
482         else
483             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
484     }
485
486     coeff_num = zeros_left + total_coeff - 1;
487     j = scantable[coeff_num];
488     if(n > 24){
489         block[j] = level[0];
490         for(i=1;i<total_coeff;i++) {
491             if(zeros_left <= 0)
492                 run_before = 0;
493             else if(zeros_left < 7){
494                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
495             }else{
496                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
497             }
498             zeros_left -= run_before;
499             coeff_num -= 1 + run_before;
500             j= scantable[ coeff_num ];
501
502             block[j]= level[i];
503         }
504     }else{
505         block[j] = (level[0] * qmul[j] + 32)>>6;
506         for(i=1;i<total_coeff;i++) {
507             if(zeros_left <= 0)
508                 run_before = 0;
509             else if(zeros_left < 7){
510                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
511             }else{
512                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
513             }
514             zeros_left -= run_before;
515             coeff_num -= 1 + run_before;
516             j= scantable[ coeff_num ];
517
518             block[j]= (level[i] * qmul[j] + 32)>>6;
519         }
520     }
521
522     if(zeros_left<0){
523         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
524         return -1;
525     }
526
527     return 0;
528 }
529
530 int ff_h264_decode_mb_cavlc(H264Context *h){
531     MpegEncContext * const s = &h->s;
532     int mb_xy;
533     int partition_count;
534     unsigned int mb_type, cbp;
535     int dct8x8_allowed= h->pps.transform_8x8_mode;
536
537     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
538
539     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
540     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
541                 down the code */
542     if(h->slice_type_nos != FF_I_TYPE){
543         if(s->mb_skip_run==-1)
544             s->mb_skip_run= get_ue_golomb(&s->gb);
545
546         if (s->mb_skip_run--) {
547             if(FRAME_MBAFF && (s->mb_y&1) == 0){
548                 if(s->mb_skip_run==0)
549                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
550             }
551             decode_mb_skip(h);
552             return 0;
553         }
554     }
555     if(FRAME_MBAFF){
556         if( (s->mb_y&1) == 0 )
557             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
558     }
559
560     h->prev_mb_skipped= 0;
561
562     mb_type= get_ue_golomb(&s->gb);
563     if(h->slice_type_nos == FF_B_TYPE){
564         if(mb_type < 23){
565             partition_count= b_mb_type_info[mb_type].partition_count;
566             mb_type=         b_mb_type_info[mb_type].type;
567         }else{
568             mb_type -= 23;
569             goto decode_intra_mb;
570         }
571     }else if(h->slice_type_nos == FF_P_TYPE){
572         if(mb_type < 5){
573             partition_count= p_mb_type_info[mb_type].partition_count;
574             mb_type=         p_mb_type_info[mb_type].type;
575         }else{
576             mb_type -= 5;
577             goto decode_intra_mb;
578         }
579     }else{
580        assert(h->slice_type_nos == FF_I_TYPE);
581         if(h->slice_type == FF_SI_TYPE && mb_type)
582             mb_type--;
583 decode_intra_mb:
584         if(mb_type > 25){
585             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
586             return -1;
587         }
588         partition_count=0;
589         cbp= i_mb_type_info[mb_type].cbp;
590         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
591         mb_type= i_mb_type_info[mb_type].type;
592     }
593
594     if(MB_FIELD)
595         mb_type |= MB_TYPE_INTERLACED;
596
597     h->slice_table[ mb_xy ]= h->slice_num;
598
599     if(IS_INTRA_PCM(mb_type)){
600         unsigned int x;
601
602         // We assume these blocks are very rare so we do not optimize it.
603         align_get_bits(&s->gb);
604
605         // The pixels are stored in the same order as levels in h->mb array.
606         for(x=0; x < (CHROMA ? 384 : 256); x++){
607             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
608         }
609
610         // In deblocking, the quantizer is 0
611         s->current_picture.qscale_table[mb_xy]= 0;
612         // All coeffs are present
613         memset(h->non_zero_count[mb_xy], 16, 32);
614
615         s->current_picture.mb_type[mb_xy]= mb_type;
616         return 0;
617     }
618
619     if(MB_MBAFF){
620         h->ref_count[0] <<= 1;
621         h->ref_count[1] <<= 1;
622     }
623
624     fill_decode_neighbors(h, mb_type);
625     fill_decode_caches(h, mb_type);
626
627     //mb_pred
628     if(IS_INTRA(mb_type)){
629         int pred_mode;
630 //            init_top_left_availability(h);
631         if(IS_INTRA4x4(mb_type)){
632             int i;
633             int di = 1;
634             if(dct8x8_allowed && get_bits1(&s->gb)){
635                 mb_type |= MB_TYPE_8x8DCT;
636                 di = 4;
637             }
638
639 //                fill_intra4x4_pred_table(h);
640             for(i=0; i<16; i+=di){
641                 int mode= pred_intra_mode(h, i);
642
643                 if(!get_bits1(&s->gb)){
644                     const int rem_mode= get_bits(&s->gb, 3);
645                     mode = rem_mode + (rem_mode >= mode);
646                 }
647
648                 if(di==4)
649                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
650                 else
651                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
652             }
653             ff_h264_write_back_intra_pred_mode(h);
654             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
655                 return -1;
656         }else{
657             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
658             if(h->intra16x16_pred_mode < 0)
659                 return -1;
660         }
661         if(CHROMA){
662             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
663             if(pred_mode < 0)
664                 return -1;
665             h->chroma_pred_mode= pred_mode;
666         }
667     }else if(partition_count==4){
668         int i, j, sub_partition_count[4], list, ref[2][4];
669
670         if(h->slice_type_nos == FF_B_TYPE){
671             for(i=0; i<4; i++){
672                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
673                 if(h->sub_mb_type[i] >=13){
674                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
675                     return -1;
676                 }
677                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
678                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
679             }
680             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
681                 ff_h264_pred_direct_motion(h, &mb_type);
682                 h->ref_cache[0][scan8[4]] =
683                 h->ref_cache[1][scan8[4]] =
684                 h->ref_cache[0][scan8[12]] =
685                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
686             }
687         }else{
688             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
689             for(i=0; i<4; i++){
690                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
691                 if(h->sub_mb_type[i] >=4){
692                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
693                     return -1;
694                 }
695                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
696                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
697             }
698         }
699
700         for(list=0; list<h->list_count; list++){
701             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
702             for(i=0; i<4; i++){
703                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
704                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
705                     unsigned int tmp;
706                     if(ref_count == 1){
707                         tmp= 0;
708                     }else if(ref_count == 2){
709                         tmp= get_bits1(&s->gb)^1;
710                     }else{
711                         tmp= get_ue_golomb_31(&s->gb);
712                         if(tmp>=ref_count){
713                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
714                             return -1;
715                         }
716                     }
717                     ref[list][i]= tmp;
718                 }else{
719                  //FIXME
720                     ref[list][i] = -1;
721                 }
722             }
723         }
724
725         if(dct8x8_allowed)
726             dct8x8_allowed = get_dct8x8_allowed(h);
727
728         for(list=0; list<h->list_count; list++){
729             for(i=0; i<4; i++){
730                 if(IS_DIRECT(h->sub_mb_type[i])) {
731                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
732                     continue;
733                 }
734                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
735                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
736
737                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
738                     const int sub_mb_type= h->sub_mb_type[i];
739                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
740                     for(j=0; j<sub_partition_count[i]; j++){
741                         int mx, my;
742                         const int index= 4*i + block_width*j;
743                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
744                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
745                         mx += get_se_golomb(&s->gb);
746                         my += get_se_golomb(&s->gb);
747                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
748
749                         if(IS_SUB_8X8(sub_mb_type)){
750                             mv_cache[ 1 ][0]=
751                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
752                             mv_cache[ 1 ][1]=
753                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
754                         }else if(IS_SUB_8X4(sub_mb_type)){
755                             mv_cache[ 1 ][0]= mx;
756                             mv_cache[ 1 ][1]= my;
757                         }else if(IS_SUB_4X8(sub_mb_type)){
758                             mv_cache[ 8 ][0]= mx;
759                             mv_cache[ 8 ][1]= my;
760                         }
761                         mv_cache[ 0 ][0]= mx;
762                         mv_cache[ 0 ][1]= my;
763                     }
764                 }else{
765                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
766                     p[0] = p[1]=
767                     p[8] = p[9]= 0;
768                 }
769             }
770         }
771     }else if(IS_DIRECT(mb_type)){
772         ff_h264_pred_direct_motion(h, &mb_type);
773         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
774     }else{
775         int list, mx, my, i;
776          //FIXME we should set ref_idx_l? to 0 if we use that later ...
777         if(IS_16X16(mb_type)){
778             for(list=0; list<h->list_count; list++){
779                     unsigned int val;
780                     if(IS_DIR(mb_type, 0, list)){
781                         if(h->ref_count[list]==1){
782                             val= 0;
783                         }else if(h->ref_count[list]==2){
784                             val= get_bits1(&s->gb)^1;
785                         }else{
786                             val= get_ue_golomb_31(&s->gb);
787                             if(val >= h->ref_count[list]){
788                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
789                                 return -1;
790                             }
791                         }
792                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
793                     }
794             }
795             for(list=0; list<h->list_count; list++){
796                 if(IS_DIR(mb_type, 0, list)){
797                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
798                     mx += get_se_golomb(&s->gb);
799                     my += get_se_golomb(&s->gb);
800                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
801
802                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
803                 }
804             }
805         }
806         else if(IS_16X8(mb_type)){
807             for(list=0; list<h->list_count; list++){
808                     for(i=0; i<2; i++){
809                         unsigned int val;
810                         if(IS_DIR(mb_type, i, list)){
811                             if(h->ref_count[list] == 1){
812                                 val= 0;
813                             }else if(h->ref_count[list] == 2){
814                                 val= get_bits1(&s->gb)^1;
815                             }else{
816                                 val= get_ue_golomb_31(&s->gb);
817                                 if(val >= h->ref_count[list]){
818                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
819                                     return -1;
820                                 }
821                             }
822                         }else
823                             val= LIST_NOT_USED&0xFF;
824                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
825                     }
826             }
827             for(list=0; list<h->list_count; list++){
828                 for(i=0; i<2; i++){
829                     unsigned int val;
830                     if(IS_DIR(mb_type, i, list)){
831                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
832                         mx += get_se_golomb(&s->gb);
833                         my += get_se_golomb(&s->gb);
834                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
835
836                         val= pack16to32(mx,my);
837                     }else
838                         val=0;
839                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
840                 }
841             }
842         }else{
843             assert(IS_8X16(mb_type));
844             for(list=0; list<h->list_count; list++){
845                     for(i=0; i<2; i++){
846                         unsigned int val;
847                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
848                             if(h->ref_count[list]==1){
849                                 val= 0;
850                             }else if(h->ref_count[list]==2){
851                                 val= get_bits1(&s->gb)^1;
852                             }else{
853                                 val= get_ue_golomb_31(&s->gb);
854                                 if(val >= h->ref_count[list]){
855                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
856                                     return -1;
857                                 }
858                             }
859                         }else
860                             val= LIST_NOT_USED&0xFF;
861                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
862                     }
863             }
864             for(list=0; list<h->list_count; list++){
865                 for(i=0; i<2; i++){
866                     unsigned int val;
867                     if(IS_DIR(mb_type, i, list)){
868                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
869                         mx += get_se_golomb(&s->gb);
870                         my += get_se_golomb(&s->gb);
871                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
872
873                         val= pack16to32(mx,my);
874                     }else
875                         val=0;
876                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
877                 }
878             }
879         }
880     }
881
882     if(IS_INTER(mb_type))
883         write_back_motion(h, mb_type);
884
885     if(!IS_INTRA16x16(mb_type)){
886         cbp= get_ue_golomb(&s->gb);
887         if(cbp > 47){
888             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
889             return -1;
890         }
891
892         if(CHROMA){
893             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
894             else                     cbp= golomb_to_inter_cbp   [cbp];
895         }else{
896             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
897             else                     cbp= golomb_to_inter_cbp_gray[cbp];
898         }
899     }
900
901     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
902         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
903     }
904     h->cbp=
905     h->cbp_table[mb_xy]= cbp;
906     s->current_picture.mb_type[mb_xy]= mb_type;
907
908     if(cbp || IS_INTRA16x16(mb_type)){
909         int i8x8, i4x4, chroma_idx;
910         int dquant;
911         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
912         const uint8_t *scan, *scan8x8, *dc_scan;
913
914         if(IS_INTERLACED(mb_type)){
915             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
916             scan= s->qscale ? h->field_scan : h->field_scan_q0;
917             dc_scan= luma_dc_field_scan;
918         }else{
919             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
920             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
921             dc_scan= luma_dc_zigzag_scan;
922         }
923
924         dquant= get_se_golomb(&s->gb);
925
926         s->qscale += dquant;
927
928         if(((unsigned)s->qscale) > 51){
929             if(s->qscale<0) s->qscale+= 52;
930             else            s->qscale-= 52;
931             if(((unsigned)s->qscale) > 51){
932                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
933                 return -1;
934             }
935         }
936
937         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
938         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
939         if(IS_INTRA16x16(mb_type)){
940             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
941                 return -1; //FIXME continue if partitioned and other return -1 too
942             }
943
944             assert((cbp&15) == 0 || (cbp&15) == 15);
945
946             if(cbp&15){
947                 for(i8x8=0; i8x8<4; i8x8++){
948                     for(i4x4=0; i4x4<4; i4x4++){
949                         const int index= i4x4 + 4*i8x8;
950                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
951                             return -1;
952                         }
953                     }
954                 }
955             }else{
956                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
957             }
958         }else{
959             for(i8x8=0; i8x8<4; i8x8++){
960                 if(cbp & (1<<i8x8)){
961                     if(IS_8x8DCT(mb_type)){
962                         DCTELEM *buf = &h->mb[64*i8x8];
963                         uint8_t *nnz;
964                         for(i4x4=0; i4x4<4; i4x4++){
965                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
966                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
967                                 return -1;
968                         }
969                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
970                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
971                     }else{
972                         for(i4x4=0; i4x4<4; i4x4++){
973                             const int index= i4x4 + 4*i8x8;
974
975                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
976                                 return -1;
977                             }
978                         }
979                     }
980                 }else{
981                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
982                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
983                 }
984             }
985         }
986
987         if(cbp&0x30){
988             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
989                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
990                     return -1;
991                 }
992         }
993
994         if(cbp&0x20){
995             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
996                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
997                 for(i4x4=0; i4x4<4; i4x4++){
998                     const int index= 16 + 4*chroma_idx + i4x4;
999                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1000                         return -1;
1001                     }
1002                 }
1003             }
1004         }else{
1005             uint8_t * const nnz= &h->non_zero_count_cache[0];
1006             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1007             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1008         }
1009     }else{
1010         uint8_t * const nnz= &h->non_zero_count_cache[0];
1011         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1012         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1013         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1014     }
1015     s->current_picture.qscale_table[mb_xy]= s->qscale;
1016     write_back_non_zero_count(h);
1017
1018     if(MB_MBAFF){
1019         h->ref_count[0] >>= 1;
1020         h->ref_count[1] >>= 1;
1021     }
1022
1023     return 0;
1024 }
1025