]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
b662ee821fa8874283dfc711b435987fea6cda8f
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file libavcodec/h264_cavlc.c
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t coeff_token_len[4][4*17]={
66 {
67      1, 0, 0, 0,
68      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
69     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
70     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
71     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
72 },
73 {
74      2, 0, 0, 0,
75      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
76      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
77     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
78     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
79 },
80 {
81      4, 0, 0, 0,
82      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
83      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
84      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
85     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
86 },
87 {
88      6, 0, 0, 0,
89      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
90      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
91      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
92      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
93 }
94 };
95
96 static const uint8_t coeff_token_bits[4][4*17]={
97 {
98      1, 0, 0, 0,
99      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
100      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
101     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
102     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
103 },
104 {
105      3, 0, 0, 0,
106     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
107      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
108     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
109     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
110 },
111 {
112     15, 0, 0, 0,
113     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
114     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
115     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
116     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
117 },
118 {
119      3, 0, 0, 0,
120      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
121     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
122     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
123     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
124 }
125 };
126
127 static const uint8_t total_zeros_len[16][16]= {
128     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131     {5,3,4,4,3,3,3,4,3,4,5,5,5},
132     {4,4,4,3,3,3,3,3,4,5,4,5},
133     {6,5,3,3,3,3,3,3,4,3,6},
134     {6,5,3,3,3,2,3,4,3,6},
135     {6,4,5,3,2,2,3,3,6},
136     {6,6,4,2,2,3,2,5},
137     {5,5,3,2,2,2,4},
138     {4,4,3,3,1,3},
139     {4,4,2,1,3},
140     {3,3,1,2},
141     {2,2,1},
142     {1,1},
143 };
144
145 static const uint8_t total_zeros_bits[16][16]= {
146     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149     {3,7,5,4,6,5,4,3,3,2,2,1,0},
150     {5,4,3,7,6,5,4,3,2,1,1,0},
151     {1,1,7,6,5,4,3,2,1,1,0},
152     {1,1,5,4,3,3,2,1,1,0},
153     {1,1,1,3,3,2,2,1,0},
154     {1,0,1,3,2,1,1,1},
155     {1,0,1,3,2,1,1},
156     {0,1,1,2,1,3},
157     {0,1,1,1,1},
158     {0,1,1,1},
159     {0,1,1},
160     {0,1},
161 };
162
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
164     { 1, 2, 3, 3,},
165     { 1, 2, 2, 0,},
166     { 1, 1, 0, 0,},
167 };
168
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
170     { 1, 1, 1, 0,},
171     { 1, 1, 0, 0,},
172     { 1, 0, 0, 0,},
173 };
174
175 static const uint8_t run_len[7][16]={
176     {1,1},
177     {1,2,2},
178     {2,2,2,2},
179     {2,2,2,3,3},
180     {2,2,3,3,3,3},
181     {2,3,3,3,3,3,3},
182     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
183 };
184
185 static const uint8_t run_bits[7][16]={
186     {1,0},
187     {1,1,0},
188     {3,2,1,0},
189     {3,2,1,1,0},
190     {3,2,3,2,1,0},
191     {3,0,1,3,2,5,4},
192     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
193 };
194
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
198
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
202
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
206
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
210
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
214
215 static VLC run7_vlc;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
218
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
221
222
223 /**
224  * gets the predicted number of non-zero coefficients.
225  * @param n block index
226  */
227 static inline int pred_non_zero_count(H264Context *h, int n){
228     const int index8= scan8[n];
229     const int left= h->non_zero_count_cache[index8 - 1];
230     const int top = h->non_zero_count_cache[index8 - 8];
231     int i= left + top;
232
233     if(i<64) i= (i+1)>>1;
234
235     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
236
237     return i&31;
238 }
239
240 static av_cold void init_cavlc_level_tab(void){
241     int suffix_length, mask;
242     unsigned int i;
243
244     for(suffix_length=0; suffix_length<7; suffix_length++){
245         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
248
249             mask= -(level_code&1);
250             level_code= (((2+level_code)>>1) ^ mask) - mask;
251             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252                 cavlc_level_tab[suffix_length][i][0]= level_code;
253                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254             }else if(prefix + 1 <= LEVEL_TAB_BITS){
255                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
256                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
257             }else{
258                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
260             }
261         }
262     }
263 }
264
265 av_cold void ff_h264_decode_init_vlc(void){
266     static int done = 0;
267
268     if (!done) {
269         int i;
270         int offset;
271         done = 1;
272
273         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276                  &chroma_dc_coeff_token_len [0], 1, 1,
277                  &chroma_dc_coeff_token_bits[0], 1, 1,
278                  INIT_VLC_USE_NEW_STATIC);
279
280         offset = 0;
281         for(i=0; i<4; i++){
282             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285                      &coeff_token_len [i][0], 1, 1,
286                      &coeff_token_bits[i][0], 1, 1,
287                      INIT_VLC_USE_NEW_STATIC);
288             offset += coeff_token_vlc_tables_size[i];
289         }
290         /*
291          * This is a one time safety check to make sure that
292          * the packed static coeff_token_vlc table sizes
293          * were initialized correctly.
294          */
295         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
296
297         for(i=0; i<3; i++){
298             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300             init_vlc(&chroma_dc_total_zeros_vlc[i],
301                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302                      &chroma_dc_total_zeros_len [i][0], 1, 1,
303                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
304                      INIT_VLC_USE_NEW_STATIC);
305         }
306         for(i=0; i<15; i++){
307             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309             init_vlc(&total_zeros_vlc[i],
310                      TOTAL_ZEROS_VLC_BITS, 16,
311                      &total_zeros_len [i][0], 1, 1,
312                      &total_zeros_bits[i][0], 1, 1,
313                      INIT_VLC_USE_NEW_STATIC);
314         }
315
316         for(i=0; i<6; i++){
317             run_vlc[i].table = run_vlc_tables[i];
318             run_vlc[i].table_allocated = run_vlc_tables_size;
319             init_vlc(&run_vlc[i],
320                      RUN_VLC_BITS, 7,
321                      &run_len [i][0], 1, 1,
322                      &run_bits[i][0], 1, 1,
323                      INIT_VLC_USE_NEW_STATIC);
324         }
325         run7_vlc.table = run7_vlc_table,
326         run7_vlc.table_allocated = run7_vlc_table_size;
327         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328                  &run_len [6][0], 1, 1,
329                  &run_bits[6][0], 1, 1,
330                  INIT_VLC_USE_NEW_STATIC);
331
332         init_cavlc_level_tab();
333     }
334 }
335
336 /**
337  *
338  */
339 static inline int get_level_prefix(GetBitContext *gb){
340     unsigned int buf;
341     int log;
342
343     OPEN_READER(re, gb);
344     UPDATE_CACHE(re, gb);
345     buf=GET_CACHE(re, gb);
346
347     log= 32 - av_log2(buf);
348 #ifdef TRACE
349     print_bin(buf>>(32-log), log);
350     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
351 #endif
352
353     LAST_SKIP_BITS(re, gb, log);
354     CLOSE_READER(re, gb);
355
356     return log-1;
357 }
358
359 /**
360  * decodes a residual block.
361  * @param n block index
362  * @param scantable scantable
363  * @param max_coeff number of coefficients in the block
364  * @return <0 if an error occurred
365  */
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367     MpegEncContext * const s = &h->s;
368     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
369     int level[16];
370     int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
371
372     //FIXME put trailing_onex into the context
373
374     if(n == CHROMA_DC_BLOCK_INDEX){
375         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376         total_coeff= coeff_token>>2;
377     }else{
378         if(n == LUMA_DC_BLOCK_INDEX){
379             total_coeff= pred_non_zero_count(h, 0);
380             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381             total_coeff= coeff_token>>2;
382         }else{
383             total_coeff= pred_non_zero_count(h, n);
384             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385             total_coeff= coeff_token>>2;
386             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
387         }
388     }
389
390     //FIXME set last_non_zero?
391
392     if(total_coeff==0)
393         return 0;
394     if(total_coeff > (unsigned)max_coeff) {
395         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
396         return -1;
397     }
398
399     trailing_ones= coeff_token&3;
400     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401     assert(total_coeff<=16);
402
403     i = show_bits(gb, 3);
404     skip_bits(gb, trailing_ones);
405     level[0] = 1-((i&4)>>1);
406     level[1] = 1-((i&2)   );
407     level[2] = 1-((i&1)<<1);
408
409     if(trailing_ones<total_coeff) {
410         int mask, prefix;
411         int suffix_length = total_coeff > 10 && trailing_ones < 3;
412         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
414
415         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416         if(level_code >= 100){
417             prefix= level_code - 100;
418             if(prefix == LEVEL_TAB_BITS)
419                 prefix += get_level_prefix(gb);
420
421             //first coefficient has suffix_length equal to 0 or 1
422             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
423                 if(suffix_length)
424                     level_code= (prefix<<1) + get_bits1(gb); //part
425                 else
426                     level_code= prefix; //part
427             }else if(prefix==14){
428                 if(suffix_length)
429                     level_code= (prefix<<1) + get_bits1(gb); //part
430                 else
431                     level_code= prefix + get_bits(gb, 4); //part
432             }else{
433                 level_code= 30 + get_bits(gb, prefix-3); //part
434                 if(prefix>=16)
435                     level_code += (1<<(prefix-3))-4096;
436             }
437
438             if(trailing_ones < 3) level_code += 2;
439
440             suffix_length = 2;
441             mask= -(level_code&1);
442             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
443         }else{
444             if(trailing_ones < 3) level_code += (level_code>>31)|1;
445
446             suffix_length = 1;
447             if(level_code + 3U > 6U)
448                 suffix_length++;
449             level[trailing_ones]= level_code;
450         }
451
452         //remaining coefficients have suffix_length > 0
453         for(i=trailing_ones+1;i<total_coeff;i++) {
454             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
455             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
456             level_code= cavlc_level_tab[suffix_length][bitsi][0];
457
458             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
459             if(level_code >= 100){
460                 prefix= level_code - 100;
461                 if(prefix == LEVEL_TAB_BITS){
462                     prefix += get_level_prefix(gb);
463                 }
464                 if(prefix<15){
465                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
466                 }else{
467                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
468                     if(prefix>=16)
469                         level_code += (1<<(prefix-3))-4096;
470                 }
471                 mask= -(level_code&1);
472                 level_code= (((2+level_code)>>1) ^ mask) - mask;
473             }
474             level[i]= level_code;
475
476             if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
477                 suffix_length++;
478         }
479     }
480
481     if(total_coeff == max_coeff)
482         zeros_left=0;
483     else{
484         if(n == CHROMA_DC_BLOCK_INDEX)
485             zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
486         else
487             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
488     }
489
490     coeff_num = zeros_left + total_coeff - 1;
491     j = scantable[coeff_num];
492     if(n > 24){
493         block[j] = level[0];
494         for(i=1;i<total_coeff;i++) {
495             if(zeros_left <= 0)
496                 run_before = 0;
497             else if(zeros_left < 7){
498                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
499             }else{
500                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
501             }
502             zeros_left -= run_before;
503             coeff_num -= 1 + run_before;
504             j= scantable[ coeff_num ];
505
506             block[j]= level[i];
507         }
508     }else{
509         block[j] = (level[0] * qmul[j] + 32)>>6;
510         for(i=1;i<total_coeff;i++) {
511             if(zeros_left <= 0)
512                 run_before = 0;
513             else if(zeros_left < 7){
514                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
515             }else{
516                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
517             }
518             zeros_left -= run_before;
519             coeff_num -= 1 + run_before;
520             j= scantable[ coeff_num ];
521
522             block[j]= (level[i] * qmul[j] + 32)>>6;
523         }
524     }
525
526     if(zeros_left<0){
527         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
528         return -1;
529     }
530
531     return 0;
532 }
533
534 int ff_h264_decode_mb_cavlc(H264Context *h){
535     MpegEncContext * const s = &h->s;
536     int mb_xy;
537     int partition_count;
538     unsigned int mb_type, cbp;
539     int dct8x8_allowed= h->pps.transform_8x8_mode;
540
541     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
542
543     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
544     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
545                 down the code */
546     if(h->slice_type_nos != FF_I_TYPE){
547         if(s->mb_skip_run==-1)
548             s->mb_skip_run= get_ue_golomb(&s->gb);
549
550         if (s->mb_skip_run--) {
551             if(FRAME_MBAFF && (s->mb_y&1) == 0){
552                 if(s->mb_skip_run==0)
553                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
554                 else
555                     predict_field_decoding_flag(h);
556             }
557             decode_mb_skip(h);
558             return 0;
559         }
560     }
561     if(FRAME_MBAFF){
562         if( (s->mb_y&1) == 0 )
563             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
564     }
565
566     h->prev_mb_skipped= 0;
567
568     mb_type= get_ue_golomb(&s->gb);
569     if(h->slice_type_nos == FF_B_TYPE){
570         if(mb_type < 23){
571             partition_count= b_mb_type_info[mb_type].partition_count;
572             mb_type=         b_mb_type_info[mb_type].type;
573         }else{
574             mb_type -= 23;
575             goto decode_intra_mb;
576         }
577     }else if(h->slice_type_nos == FF_P_TYPE){
578         if(mb_type < 5){
579             partition_count= p_mb_type_info[mb_type].partition_count;
580             mb_type=         p_mb_type_info[mb_type].type;
581         }else{
582             mb_type -= 5;
583             goto decode_intra_mb;
584         }
585     }else{
586        assert(h->slice_type_nos == FF_I_TYPE);
587         if(h->slice_type == FF_SI_TYPE && mb_type)
588             mb_type--;
589 decode_intra_mb:
590         if(mb_type > 25){
591             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
592             return -1;
593         }
594         partition_count=0;
595         cbp= i_mb_type_info[mb_type].cbp;
596         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
597         mb_type= i_mb_type_info[mb_type].type;
598     }
599
600     if(MB_FIELD)
601         mb_type |= MB_TYPE_INTERLACED;
602
603     h->slice_table[ mb_xy ]= h->slice_num;
604
605     if(IS_INTRA_PCM(mb_type)){
606         unsigned int x;
607
608         // We assume these blocks are very rare so we do not optimize it.
609         align_get_bits(&s->gb);
610
611         // The pixels are stored in the same order as levels in h->mb array.
612         for(x=0; x < (CHROMA ? 384 : 256); x++){
613             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
614         }
615
616         // In deblocking, the quantizer is 0
617         s->current_picture.qscale_table[mb_xy]= 0;
618         // All coeffs are present
619         memset(h->non_zero_count[mb_xy], 16, 32);
620
621         s->current_picture.mb_type[mb_xy]= mb_type;
622         return 0;
623     }
624
625     if(MB_MBAFF){
626         h->ref_count[0] <<= 1;
627         h->ref_count[1] <<= 1;
628     }
629
630     fill_decode_caches(h, mb_type);
631
632     //mb_pred
633     if(IS_INTRA(mb_type)){
634         int pred_mode;
635 //            init_top_left_availability(h);
636         if(IS_INTRA4x4(mb_type)){
637             int i;
638             int di = 1;
639             if(dct8x8_allowed && get_bits1(&s->gb)){
640                 mb_type |= MB_TYPE_8x8DCT;
641                 di = 4;
642             }
643
644 //                fill_intra4x4_pred_table(h);
645             for(i=0; i<16; i+=di){
646                 int mode= pred_intra_mode(h, i);
647
648                 if(!get_bits1(&s->gb)){
649                     const int rem_mode= get_bits(&s->gb, 3);
650                     mode = rem_mode + (rem_mode >= mode);
651                 }
652
653                 if(di==4)
654                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
655                 else
656                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
657             }
658             ff_h264_write_back_intra_pred_mode(h);
659             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
660                 return -1;
661         }else{
662             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
663             if(h->intra16x16_pred_mode < 0)
664                 return -1;
665         }
666         if(CHROMA){
667             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
668             if(pred_mode < 0)
669                 return -1;
670             h->chroma_pred_mode= pred_mode;
671         }
672     }else if(partition_count==4){
673         int i, j, sub_partition_count[4], list, ref[2][4];
674
675         if(h->slice_type_nos == FF_B_TYPE){
676             for(i=0; i<4; i++){
677                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
678                 if(h->sub_mb_type[i] >=13){
679                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
680                     return -1;
681                 }
682                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
683                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
684             }
685             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
686                 ff_h264_pred_direct_motion(h, &mb_type);
687                 h->ref_cache[0][scan8[4]] =
688                 h->ref_cache[1][scan8[4]] =
689                 h->ref_cache[0][scan8[12]] =
690                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
691             }
692         }else{
693             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
694             for(i=0; i<4; i++){
695                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
696                 if(h->sub_mb_type[i] >=4){
697                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
698                     return -1;
699                 }
700                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
701                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
702             }
703         }
704
705         for(list=0; list<h->list_count; list++){
706             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
707             for(i=0; i<4; i++){
708                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
709                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
710                     unsigned int tmp;
711                     if(ref_count == 1){
712                         tmp= 0;
713                     }else if(ref_count == 2){
714                         tmp= get_bits1(&s->gb)^1;
715                     }else{
716                         tmp= get_ue_golomb_31(&s->gb);
717                         if(tmp>=ref_count){
718                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
719                             return -1;
720                         }
721                     }
722                     ref[list][i]= tmp;
723                 }else{
724                  //FIXME
725                     ref[list][i] = -1;
726                 }
727             }
728         }
729
730         if(dct8x8_allowed)
731             dct8x8_allowed = get_dct8x8_allowed(h);
732
733         for(list=0; list<h->list_count; list++){
734             for(i=0; i<4; i++){
735                 if(IS_DIRECT(h->sub_mb_type[i])) {
736                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
737                     continue;
738                 }
739                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
740                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
741
742                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
743                     const int sub_mb_type= h->sub_mb_type[i];
744                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
745                     for(j=0; j<sub_partition_count[i]; j++){
746                         int mx, my;
747                         const int index= 4*i + block_width*j;
748                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
749                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
750                         mx += get_se_golomb(&s->gb);
751                         my += get_se_golomb(&s->gb);
752                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
753
754                         if(IS_SUB_8X8(sub_mb_type)){
755                             mv_cache[ 1 ][0]=
756                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
757                             mv_cache[ 1 ][1]=
758                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
759                         }else if(IS_SUB_8X4(sub_mb_type)){
760                             mv_cache[ 1 ][0]= mx;
761                             mv_cache[ 1 ][1]= my;
762                         }else if(IS_SUB_4X8(sub_mb_type)){
763                             mv_cache[ 8 ][0]= mx;
764                             mv_cache[ 8 ][1]= my;
765                         }
766                         mv_cache[ 0 ][0]= mx;
767                         mv_cache[ 0 ][1]= my;
768                     }
769                 }else{
770                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
771                     p[0] = p[1]=
772                     p[8] = p[9]= 0;
773                 }
774             }
775         }
776     }else if(IS_DIRECT(mb_type)){
777         ff_h264_pred_direct_motion(h, &mb_type);
778         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
779     }else{
780         int list, mx, my, i;
781          //FIXME we should set ref_idx_l? to 0 if we use that later ...
782         if(IS_16X16(mb_type)){
783             for(list=0; list<h->list_count; list++){
784                     unsigned int val;
785                     if(IS_DIR(mb_type, 0, list)){
786                         if(h->ref_count[list]==1){
787                             val= 0;
788                         }else if(h->ref_count[list]==2){
789                             val= get_bits1(&s->gb)^1;
790                         }else{
791                             val= get_ue_golomb_31(&s->gb);
792                             if(val >= h->ref_count[list]){
793                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
794                                 return -1;
795                             }
796                         }
797                     }else
798                         val= LIST_NOT_USED&0xFF;
799                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
800             }
801             for(list=0; list<h->list_count; list++){
802                 unsigned int val;
803                 if(IS_DIR(mb_type, 0, list)){
804                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
805                     mx += get_se_golomb(&s->gb);
806                     my += get_se_golomb(&s->gb);
807                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
808
809                     val= pack16to32(mx,my);
810                 }else
811                     val=0;
812                 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
813             }
814         }
815         else if(IS_16X8(mb_type)){
816             for(list=0; list<h->list_count; list++){
817                     for(i=0; i<2; i++){
818                         unsigned int val;
819                         if(IS_DIR(mb_type, i, list)){
820                             if(h->ref_count[list] == 1){
821                                 val= 0;
822                             }else if(h->ref_count[list] == 2){
823                                 val= get_bits1(&s->gb)^1;
824                             }else{
825                                 val= get_ue_golomb_31(&s->gb);
826                                 if(val >= h->ref_count[list]){
827                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
828                                     return -1;
829                                 }
830                             }
831                         }else
832                             val= LIST_NOT_USED&0xFF;
833                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
834                     }
835             }
836             for(list=0; list<h->list_count; list++){
837                 for(i=0; i<2; i++){
838                     unsigned int val;
839                     if(IS_DIR(mb_type, i, list)){
840                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
841                         mx += get_se_golomb(&s->gb);
842                         my += get_se_golomb(&s->gb);
843                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
844
845                         val= pack16to32(mx,my);
846                     }else
847                         val=0;
848                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
849                 }
850             }
851         }else{
852             assert(IS_8X16(mb_type));
853             for(list=0; list<h->list_count; list++){
854                     for(i=0; i<2; i++){
855                         unsigned int val;
856                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
857                             if(h->ref_count[list]==1){
858                                 val= 0;
859                             }else if(h->ref_count[list]==2){
860                                 val= get_bits1(&s->gb)^1;
861                             }else{
862                                 val= get_ue_golomb_31(&s->gb);
863                                 if(val >= h->ref_count[list]){
864                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
865                                     return -1;
866                                 }
867                             }
868                         }else
869                             val= LIST_NOT_USED&0xFF;
870                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
871                     }
872             }
873             for(list=0; list<h->list_count; list++){
874                 for(i=0; i<2; i++){
875                     unsigned int val;
876                     if(IS_DIR(mb_type, i, list)){
877                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
878                         mx += get_se_golomb(&s->gb);
879                         my += get_se_golomb(&s->gb);
880                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
881
882                         val= pack16to32(mx,my);
883                     }else
884                         val=0;
885                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
886                 }
887             }
888         }
889     }
890
891     if(IS_INTER(mb_type))
892         write_back_motion(h, mb_type);
893
894     if(!IS_INTRA16x16(mb_type)){
895         cbp= get_ue_golomb(&s->gb);
896         if(cbp > 47){
897             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
898             return -1;
899         }
900
901         if(CHROMA){
902             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
903             else                     cbp= golomb_to_inter_cbp   [cbp];
904         }else{
905             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
906             else                     cbp= golomb_to_inter_cbp_gray[cbp];
907         }
908     }
909
910     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
911         if(get_bits1(&s->gb)){
912             mb_type |= MB_TYPE_8x8DCT;
913         }
914     }
915     h->cbp=
916     h->cbp_table[mb_xy]= cbp;
917     s->current_picture.mb_type[mb_xy]= mb_type;
918
919     if(cbp || IS_INTRA16x16(mb_type)){
920         int i8x8, i4x4, chroma_idx;
921         int dquant;
922         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
923         const uint8_t *scan, *scan8x8, *dc_scan;
924
925 //        fill_non_zero_count_cache(h);
926
927         if(IS_INTERLACED(mb_type)){
928             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
929             scan= s->qscale ? h->field_scan : h->field_scan_q0;
930             dc_scan= luma_dc_field_scan;
931         }else{
932             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
933             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
934             dc_scan= luma_dc_zigzag_scan;
935         }
936
937         dquant= get_se_golomb(&s->gb);
938
939         if( dquant > 25 || dquant < -26 ){
940             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
941             return -1;
942         }
943
944         s->qscale += dquant;
945         if(((unsigned)s->qscale) > 51){
946             if(s->qscale<0) s->qscale+= 52;
947             else            s->qscale-= 52;
948         }
949
950         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
951         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
952         if(IS_INTRA16x16(mb_type)){
953             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
954                 return -1; //FIXME continue if partitioned and other return -1 too
955             }
956
957             assert((cbp&15) == 0 || (cbp&15) == 15);
958
959             if(cbp&15){
960                 for(i8x8=0; i8x8<4; i8x8++){
961                     for(i4x4=0; i4x4<4; i4x4++){
962                         const int index= i4x4 + 4*i8x8;
963                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
964                             return -1;
965                         }
966                     }
967                 }
968             }else{
969                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
970             }
971         }else{
972             for(i8x8=0; i8x8<4; i8x8++){
973                 if(cbp & (1<<i8x8)){
974                     if(IS_8x8DCT(mb_type)){
975                         DCTELEM *buf = &h->mb[64*i8x8];
976                         uint8_t *nnz;
977                         for(i4x4=0; i4x4<4; i4x4++){
978                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
979                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
980                                 return -1;
981                         }
982                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
983                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
984                     }else{
985                         for(i4x4=0; i4x4<4; i4x4++){
986                             const int index= i4x4 + 4*i8x8;
987
988                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
989                                 return -1;
990                             }
991                         }
992                     }
993                 }else{
994                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
995                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
996                 }
997             }
998         }
999
1000         if(cbp&0x30){
1001             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1002                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
1003                     return -1;
1004                 }
1005         }
1006
1007         if(cbp&0x20){
1008             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1009                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1010                 for(i4x4=0; i4x4<4; i4x4++){
1011                     const int index= 16 + 4*chroma_idx + i4x4;
1012                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1013                         return -1;
1014                     }
1015                 }
1016             }
1017         }else{
1018             uint8_t * const nnz= &h->non_zero_count_cache[0];
1019             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1020             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1021         }
1022     }else{
1023         uint8_t * const nnz= &h->non_zero_count_cache[0];
1024         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1025         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1026         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1027     }
1028     s->current_picture.qscale_table[mb_xy]= s->qscale;
1029     write_back_non_zero_count(h);
1030
1031     if(MB_MBAFF){
1032         h->ref_count[0] >>= 1;
1033         h->ref_count[1] >>= 1;
1034     }
1035
1036     return 0;
1037 }
1038