]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
c170e7cc924ae4a0a11efe699e9dc0d1c516d907
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file libavcodec/h264_cavlc.c
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 #if ARCH_X86
39 #include "x86/h264_i386.h"
40 #endif
41
42 //#undef NDEBUG
43 #include <assert.h>
44
45 static const uint8_t golomb_to_inter_cbp_gray[16]={
46  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
47 };
48
49 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
50 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
51 };
52
53 static const uint8_t chroma_dc_coeff_token_len[4*5]={
54  2, 0, 0, 0,
55  6, 1, 0, 0,
56  6, 6, 3, 0,
57  6, 7, 7, 6,
58  6, 8, 8, 7,
59 };
60
61 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
62  1, 0, 0, 0,
63  7, 1, 0, 0,
64  4, 6, 1, 0,
65  3, 3, 2, 5,
66  2, 3, 2, 0,
67 };
68
69 static const uint8_t coeff_token_len[4][4*17]={
70 {
71      1, 0, 0, 0,
72      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
73     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
74     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
75     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
76 },
77 {
78      2, 0, 0, 0,
79      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
80      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
81     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
82     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
83 },
84 {
85      4, 0, 0, 0,
86      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
87      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
88      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
89     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
90 },
91 {
92      6, 0, 0, 0,
93      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
94      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
95      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
96      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
97 }
98 };
99
100 static const uint8_t coeff_token_bits[4][4*17]={
101 {
102      1, 0, 0, 0,
103      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
104      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
105     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
106     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
107 },
108 {
109      3, 0, 0, 0,
110     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
111      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
112     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
113     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
114 },
115 {
116     15, 0, 0, 0,
117     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
118     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
119     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
120     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
121 },
122 {
123      3, 0, 0, 0,
124      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
125     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
126     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
127     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
128 }
129 };
130
131 static const uint8_t total_zeros_len[16][16]= {
132     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
133     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
134     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
135     {5,3,4,4,3,3,3,4,3,4,5,5,5},
136     {4,4,4,3,3,3,3,3,4,5,4,5},
137     {6,5,3,3,3,3,3,3,4,3,6},
138     {6,5,3,3,3,2,3,4,3,6},
139     {6,4,5,3,2,2,3,3,6},
140     {6,6,4,2,2,3,2,5},
141     {5,5,3,2,2,2,4},
142     {4,4,3,3,1,3},
143     {4,4,2,1,3},
144     {3,3,1,2},
145     {2,2,1},
146     {1,1},
147 };
148
149 static const uint8_t total_zeros_bits[16][16]= {
150     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
151     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
152     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
153     {3,7,5,4,6,5,4,3,3,2,2,1,0},
154     {5,4,3,7,6,5,4,3,2,1,1,0},
155     {1,1,7,6,5,4,3,2,1,1,0},
156     {1,1,5,4,3,3,2,1,1,0},
157     {1,1,1,3,3,2,2,1,0},
158     {1,0,1,3,2,1,1,1},
159     {1,0,1,3,2,1,1},
160     {0,1,1,2,1,3},
161     {0,1,1,1,1},
162     {0,1,1,1},
163     {0,1,1},
164     {0,1},
165 };
166
167 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
168     { 1, 2, 3, 3,},
169     { 1, 2, 2, 0,},
170     { 1, 1, 0, 0,},
171 };
172
173 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
174     { 1, 1, 1, 0,},
175     { 1, 1, 0, 0,},
176     { 1, 0, 0, 0,},
177 };
178
179 static const uint8_t run_len[7][16]={
180     {1,1},
181     {1,2,2},
182     {2,2,2,2},
183     {2,2,2,3,3},
184     {2,2,3,3,3,3},
185     {2,3,3,3,3,3,3},
186     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
187 };
188
189 static const uint8_t run_bits[7][16]={
190     {1,0},
191     {1,1,0},
192     {3,2,1,0},
193     {3,2,1,1,0},
194     {3,2,3,2,1,0},
195     {3,0,1,3,2,5,4},
196     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
197 };
198
199 static VLC coeff_token_vlc[4];
200 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
201 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
202
203 static VLC chroma_dc_coeff_token_vlc;
204 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
205 static const int chroma_dc_coeff_token_vlc_table_size = 256;
206
207 static VLC total_zeros_vlc[15];
208 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
209 static const int total_zeros_vlc_tables_size = 512;
210
211 static VLC chroma_dc_total_zeros_vlc[3];
212 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
213 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
214
215 static VLC run_vlc[6];
216 static VLC_TYPE run_vlc_tables[6][8][2];
217 static const int run_vlc_tables_size = 8;
218
219 static VLC run7_vlc;
220 static VLC_TYPE run7_vlc_table[96][2];
221 static const int run7_vlc_table_size = 96;
222
223 #define LEVEL_TAB_BITS 8
224 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
225
226
227 /**
228  * gets the predicted number of non-zero coefficients.
229  * @param n block index
230  */
231 static inline int pred_non_zero_count(H264Context *h, int n){
232     const int index8= scan8[n];
233     const int left= h->non_zero_count_cache[index8 - 1];
234     const int top = h->non_zero_count_cache[index8 - 8];
235     int i= left + top;
236
237     if(i<64) i= (i+1)>>1;
238
239     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
240
241     return i&31;
242 }
243
244 static av_cold void init_cavlc_level_tab(void){
245     int suffix_length, mask;
246     unsigned int i;
247
248     for(suffix_length=0; suffix_length<7; suffix_length++){
249         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
250             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
251             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
252
253             mask= -(level_code&1);
254             level_code= (((2+level_code)>>1) ^ mask) - mask;
255             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
256                 cavlc_level_tab[suffix_length][i][0]= level_code;
257                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
258             }else if(prefix + 1 <= LEVEL_TAB_BITS){
259                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
260                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
261             }else{
262                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
263                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
264             }
265         }
266     }
267 }
268
269 av_cold void ff_h264_decode_init_vlc(void){
270     static int done = 0;
271
272     if (!done) {
273         int i;
274         int offset;
275         done = 1;
276
277         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
278         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
279         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
280                  &chroma_dc_coeff_token_len [0], 1, 1,
281                  &chroma_dc_coeff_token_bits[0], 1, 1,
282                  INIT_VLC_USE_NEW_STATIC);
283
284         offset = 0;
285         for(i=0; i<4; i++){
286             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
287             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
288             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
289                      &coeff_token_len [i][0], 1, 1,
290                      &coeff_token_bits[i][0], 1, 1,
291                      INIT_VLC_USE_NEW_STATIC);
292             offset += coeff_token_vlc_tables_size[i];
293         }
294         /*
295          * This is a one time safety check to make sure that
296          * the packed static coeff_token_vlc table sizes
297          * were initialized correctly.
298          */
299         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
300
301         for(i=0; i<3; i++){
302             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
303             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
304             init_vlc(&chroma_dc_total_zeros_vlc[i],
305                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
306                      &chroma_dc_total_zeros_len [i][0], 1, 1,
307                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
308                      INIT_VLC_USE_NEW_STATIC);
309         }
310         for(i=0; i<15; i++){
311             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
312             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
313             init_vlc(&total_zeros_vlc[i],
314                      TOTAL_ZEROS_VLC_BITS, 16,
315                      &total_zeros_len [i][0], 1, 1,
316                      &total_zeros_bits[i][0], 1, 1,
317                      INIT_VLC_USE_NEW_STATIC);
318         }
319
320         for(i=0; i<6; i++){
321             run_vlc[i].table = run_vlc_tables[i];
322             run_vlc[i].table_allocated = run_vlc_tables_size;
323             init_vlc(&run_vlc[i],
324                      RUN_VLC_BITS, 7,
325                      &run_len [i][0], 1, 1,
326                      &run_bits[i][0], 1, 1,
327                      INIT_VLC_USE_NEW_STATIC);
328         }
329         run7_vlc.table = run7_vlc_table,
330         run7_vlc.table_allocated = run7_vlc_table_size;
331         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
332                  &run_len [6][0], 1, 1,
333                  &run_bits[6][0], 1, 1,
334                  INIT_VLC_USE_NEW_STATIC);
335
336         init_cavlc_level_tab();
337     }
338 }
339
340 /**
341  *
342  */
343 static inline int get_level_prefix(GetBitContext *gb){
344     unsigned int buf;
345     int log;
346
347     OPEN_READER(re, gb);
348     UPDATE_CACHE(re, gb);
349     buf=GET_CACHE(re, gb);
350
351     log= 32 - av_log2(buf);
352 #ifdef TRACE
353     print_bin(buf>>(32-log), log);
354     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
355 #endif
356
357     LAST_SKIP_BITS(re, gb, log);
358     CLOSE_READER(re, gb);
359
360     return log-1;
361 }
362
363 /**
364  * decodes a residual block.
365  * @param n block index
366  * @param scantable scantable
367  * @param max_coeff number of coefficients in the block
368  * @return <0 if an error occurred
369  */
370 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
371     MpegEncContext * const s = &h->s;
372     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
373     int level[16];
374     int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
375
376     //FIXME put trailing_onex into the context
377
378     if(n == CHROMA_DC_BLOCK_INDEX){
379         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
380         total_coeff= coeff_token>>2;
381     }else{
382         if(n == LUMA_DC_BLOCK_INDEX){
383             total_coeff= pred_non_zero_count(h, 0);
384             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385             total_coeff= coeff_token>>2;
386         }else{
387             total_coeff= pred_non_zero_count(h, n);
388             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
389             total_coeff= coeff_token>>2;
390             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
391         }
392     }
393
394     //FIXME set last_non_zero?
395
396     if(total_coeff==0)
397         return 0;
398     if(total_coeff > (unsigned)max_coeff) {
399         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
400         return -1;
401     }
402
403     trailing_ones= coeff_token&3;
404     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
405     assert(total_coeff<=16);
406
407     i = show_bits(gb, 3);
408     skip_bits(gb, trailing_ones);
409     level[0] = 1-((i&4)>>1);
410     level[1] = 1-((i&2)   );
411     level[2] = 1-((i&1)<<1);
412
413     if(trailing_ones<total_coeff) {
414         int mask, prefix;
415         int suffix_length = total_coeff > 10 && trailing_ones < 3;
416         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
417         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
418
419         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
420         if(level_code >= 100){
421             prefix= level_code - 100;
422             if(prefix == LEVEL_TAB_BITS)
423                 prefix += get_level_prefix(gb);
424
425             //first coefficient has suffix_length equal to 0 or 1
426             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
427                 if(suffix_length)
428                     level_code= (prefix<<1) + get_bits1(gb); //part
429                 else
430                     level_code= prefix; //part
431             }else if(prefix==14){
432                 if(suffix_length)
433                     level_code= (prefix<<1) + get_bits1(gb); //part
434                 else
435                     level_code= prefix + get_bits(gb, 4); //part
436             }else{
437                 level_code= 30 + get_bits(gb, prefix-3); //part
438                 if(prefix>=16)
439                     level_code += (1<<(prefix-3))-4096;
440             }
441
442             if(trailing_ones < 3) level_code += 2;
443
444             suffix_length = 2;
445             mask= -(level_code&1);
446             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
447         }else{
448             if(trailing_ones < 3) level_code += (level_code>>31)|1;
449
450             suffix_length = 1;
451             if(level_code + 3U > 6U)
452                 suffix_length++;
453             level[trailing_ones]= level_code;
454         }
455
456         //remaining coefficients have suffix_length > 0
457         for(i=trailing_ones+1;i<total_coeff;i++) {
458             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
459             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
460             level_code= cavlc_level_tab[suffix_length][bitsi][0];
461
462             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
463             if(level_code >= 100){
464                 prefix= level_code - 100;
465                 if(prefix == LEVEL_TAB_BITS){
466                     prefix += get_level_prefix(gb);
467                 }
468                 if(prefix<15){
469                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
470                 }else{
471                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
472                     if(prefix>=16)
473                         level_code += (1<<(prefix-3))-4096;
474                 }
475                 mask= -(level_code&1);
476                 level_code= (((2+level_code)>>1) ^ mask) - mask;
477             }
478             level[i]= level_code;
479
480             if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
481                 suffix_length++;
482         }
483     }
484
485     if(total_coeff == max_coeff)
486         zeros_left=0;
487     else{
488         if(n == CHROMA_DC_BLOCK_INDEX)
489             zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
490         else
491             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
492     }
493
494     coeff_num = zeros_left + total_coeff - 1;
495     j = scantable[coeff_num];
496     if(n > 24){
497         block[j] = level[0];
498         for(i=1;i<total_coeff;i++) {
499             if(zeros_left <= 0)
500                 run_before = 0;
501             else if(zeros_left < 7){
502                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
503             }else{
504                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
505             }
506             zeros_left -= run_before;
507             coeff_num -= 1 + run_before;
508             j= scantable[ coeff_num ];
509
510             block[j]= level[i];
511         }
512     }else{
513         block[j] = (level[0] * qmul[j] + 32)>>6;
514         for(i=1;i<total_coeff;i++) {
515             if(zeros_left <= 0)
516                 run_before = 0;
517             else if(zeros_left < 7){
518                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
519             }else{
520                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
521             }
522             zeros_left -= run_before;
523             coeff_num -= 1 + run_before;
524             j= scantable[ coeff_num ];
525
526             block[j]= (level[i] * qmul[j] + 32)>>6;
527         }
528     }
529
530     if(zeros_left<0){
531         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
532         return -1;
533     }
534
535     return 0;
536 }
537
538 int ff_h264_decode_mb_cavlc(H264Context *h){
539     MpegEncContext * const s = &h->s;
540     int mb_xy;
541     int partition_count;
542     unsigned int mb_type, cbp;
543     int dct8x8_allowed= h->pps.transform_8x8_mode;
544
545     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
546
547     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
548     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
549                 down the code */
550     if(h->slice_type_nos != FF_I_TYPE){
551         if(s->mb_skip_run==-1)
552             s->mb_skip_run= get_ue_golomb(&s->gb);
553
554         if (s->mb_skip_run--) {
555             if(FRAME_MBAFF && (s->mb_y&1) == 0){
556                 if(s->mb_skip_run==0)
557                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
558                 else
559                     predict_field_decoding_flag(h);
560             }
561             decode_mb_skip(h);
562             return 0;
563         }
564     }
565     if(FRAME_MBAFF){
566         if( (s->mb_y&1) == 0 )
567             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
568     }
569
570     h->prev_mb_skipped= 0;
571
572     mb_type= get_ue_golomb(&s->gb);
573     if(h->slice_type_nos == FF_B_TYPE){
574         if(mb_type < 23){
575             partition_count= b_mb_type_info[mb_type].partition_count;
576             mb_type=         b_mb_type_info[mb_type].type;
577         }else{
578             mb_type -= 23;
579             goto decode_intra_mb;
580         }
581     }else if(h->slice_type_nos == FF_P_TYPE){
582         if(mb_type < 5){
583             partition_count= p_mb_type_info[mb_type].partition_count;
584             mb_type=         p_mb_type_info[mb_type].type;
585         }else{
586             mb_type -= 5;
587             goto decode_intra_mb;
588         }
589     }else{
590        assert(h->slice_type_nos == FF_I_TYPE);
591         if(h->slice_type == FF_SI_TYPE && mb_type)
592             mb_type--;
593 decode_intra_mb:
594         if(mb_type > 25){
595             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
596             return -1;
597         }
598         partition_count=0;
599         cbp= i_mb_type_info[mb_type].cbp;
600         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
601         mb_type= i_mb_type_info[mb_type].type;
602     }
603
604     if(MB_FIELD)
605         mb_type |= MB_TYPE_INTERLACED;
606
607     h->slice_table[ mb_xy ]= h->slice_num;
608
609     if(IS_INTRA_PCM(mb_type)){
610         unsigned int x;
611
612         // We assume these blocks are very rare so we do not optimize it.
613         align_get_bits(&s->gb);
614
615         // The pixels are stored in the same order as levels in h->mb array.
616         for(x=0; x < (CHROMA ? 384 : 256); x++){
617             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
618         }
619
620         // In deblocking, the quantizer is 0
621         s->current_picture.qscale_table[mb_xy]= 0;
622         // All coeffs are present
623         memset(h->non_zero_count[mb_xy], 16, 16);
624
625         s->current_picture.mb_type[mb_xy]= mb_type;
626         return 0;
627     }
628
629     if(MB_MBAFF){
630         h->ref_count[0] <<= 1;
631         h->ref_count[1] <<= 1;
632     }
633
634     fill_caches(h, mb_type, 0);
635
636     //mb_pred
637     if(IS_INTRA(mb_type)){
638         int pred_mode;
639 //            init_top_left_availability(h);
640         if(IS_INTRA4x4(mb_type)){
641             int i;
642             int di = 1;
643             if(dct8x8_allowed && get_bits1(&s->gb)){
644                 mb_type |= MB_TYPE_8x8DCT;
645                 di = 4;
646             }
647
648 //                fill_intra4x4_pred_table(h);
649             for(i=0; i<16; i+=di){
650                 int mode= pred_intra_mode(h, i);
651
652                 if(!get_bits1(&s->gb)){
653                     const int rem_mode= get_bits(&s->gb, 3);
654                     mode = rem_mode + (rem_mode >= mode);
655                 }
656
657                 if(di==4)
658                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
659                 else
660                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
661             }
662             ff_h264_write_back_intra_pred_mode(h);
663             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
664                 return -1;
665         }else{
666             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
667             if(h->intra16x16_pred_mode < 0)
668                 return -1;
669         }
670         if(CHROMA){
671             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
672             if(pred_mode < 0)
673                 return -1;
674             h->chroma_pred_mode= pred_mode;
675         }
676     }else if(partition_count==4){
677         int i, j, sub_partition_count[4], list, ref[2][4];
678
679         if(h->slice_type_nos == FF_B_TYPE){
680             for(i=0; i<4; i++){
681                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
682                 if(h->sub_mb_type[i] >=13){
683                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
684                     return -1;
685                 }
686                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
687                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
688             }
689             if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
690                || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
691                 ff_h264_pred_direct_motion(h, &mb_type);
692                 h->ref_cache[0][scan8[4]] =
693                 h->ref_cache[1][scan8[4]] =
694                 h->ref_cache[0][scan8[12]] =
695                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
696             }
697         }else{
698             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
699             for(i=0; i<4; i++){
700                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
701                 if(h->sub_mb_type[i] >=4){
702                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
703                     return -1;
704                 }
705                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
706                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
707             }
708         }
709
710         for(list=0; list<h->list_count; list++){
711             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
712             for(i=0; i<4; i++){
713                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
714                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
715                     unsigned int tmp;
716                     if(ref_count == 1){
717                         tmp= 0;
718                     }else if(ref_count == 2){
719                         tmp= get_bits1(&s->gb)^1;
720                     }else{
721                         tmp= get_ue_golomb_31(&s->gb);
722                         if(tmp>=ref_count){
723                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
724                             return -1;
725                         }
726                     }
727                     ref[list][i]= tmp;
728                 }else{
729                  //FIXME
730                     ref[list][i] = -1;
731                 }
732             }
733         }
734
735         if(dct8x8_allowed)
736             dct8x8_allowed = get_dct8x8_allowed(h);
737
738         for(list=0; list<h->list_count; list++){
739             for(i=0; i<4; i++){
740                 if(IS_DIRECT(h->sub_mb_type[i])) {
741                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
742                     continue;
743                 }
744                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
745                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
746
747                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
748                     const int sub_mb_type= h->sub_mb_type[i];
749                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
750                     for(j=0; j<sub_partition_count[i]; j++){
751                         int mx, my;
752                         const int index= 4*i + block_width*j;
753                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
754                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
755                         mx += get_se_golomb(&s->gb);
756                         my += get_se_golomb(&s->gb);
757                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
758
759                         if(IS_SUB_8X8(sub_mb_type)){
760                             mv_cache[ 1 ][0]=
761                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
762                             mv_cache[ 1 ][1]=
763                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
764                         }else if(IS_SUB_8X4(sub_mb_type)){
765                             mv_cache[ 1 ][0]= mx;
766                             mv_cache[ 1 ][1]= my;
767                         }else if(IS_SUB_4X8(sub_mb_type)){
768                             mv_cache[ 8 ][0]= mx;
769                             mv_cache[ 8 ][1]= my;
770                         }
771                         mv_cache[ 0 ][0]= mx;
772                         mv_cache[ 0 ][1]= my;
773                     }
774                 }else{
775                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
776                     p[0] = p[1]=
777                     p[8] = p[9]= 0;
778                 }
779             }
780         }
781     }else if(IS_DIRECT(mb_type)){
782         ff_h264_pred_direct_motion(h, &mb_type);
783         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
784     }else{
785         int list, mx, my, i;
786          //FIXME we should set ref_idx_l? to 0 if we use that later ...
787         if(IS_16X16(mb_type)){
788             for(list=0; list<h->list_count; list++){
789                     unsigned int val;
790                     if(IS_DIR(mb_type, 0, list)){
791                         if(h->ref_count[list]==1){
792                             val= 0;
793                         }else if(h->ref_count[list]==2){
794                             val= get_bits1(&s->gb)^1;
795                         }else{
796                             val= get_ue_golomb_31(&s->gb);
797                             if(val >= h->ref_count[list]){
798                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
799                                 return -1;
800                             }
801                         }
802                     }else
803                         val= LIST_NOT_USED&0xFF;
804                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
805             }
806             for(list=0; list<h->list_count; list++){
807                 unsigned int val;
808                 if(IS_DIR(mb_type, 0, list)){
809                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
810                     mx += get_se_golomb(&s->gb);
811                     my += get_se_golomb(&s->gb);
812                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
813
814                     val= pack16to32(mx,my);
815                 }else
816                     val=0;
817                 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
818             }
819         }
820         else if(IS_16X8(mb_type)){
821             for(list=0; list<h->list_count; list++){
822                     for(i=0; i<2; i++){
823                         unsigned int val;
824                         if(IS_DIR(mb_type, i, list)){
825                             if(h->ref_count[list] == 1){
826                                 val= 0;
827                             }else if(h->ref_count[list] == 2){
828                                 val= get_bits1(&s->gb)^1;
829                             }else{
830                                 val= get_ue_golomb_31(&s->gb);
831                                 if(val >= h->ref_count[list]){
832                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
833                                     return -1;
834                                 }
835                             }
836                         }else
837                             val= LIST_NOT_USED&0xFF;
838                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
839                     }
840             }
841             for(list=0; list<h->list_count; list++){
842                 for(i=0; i<2; i++){
843                     unsigned int val;
844                     if(IS_DIR(mb_type, i, list)){
845                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
846                         mx += get_se_golomb(&s->gb);
847                         my += get_se_golomb(&s->gb);
848                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
849
850                         val= pack16to32(mx,my);
851                     }else
852                         val=0;
853                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
854                 }
855             }
856         }else{
857             assert(IS_8X16(mb_type));
858             for(list=0; list<h->list_count; list++){
859                     for(i=0; i<2; i++){
860                         unsigned int val;
861                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
862                             if(h->ref_count[list]==1){
863                                 val= 0;
864                             }else if(h->ref_count[list]==2){
865                                 val= get_bits1(&s->gb)^1;
866                             }else{
867                                 val= get_ue_golomb_31(&s->gb);
868                                 if(val >= h->ref_count[list]){
869                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
870                                     return -1;
871                                 }
872                             }
873                         }else
874                             val= LIST_NOT_USED&0xFF;
875                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
876                     }
877             }
878             for(list=0; list<h->list_count; list++){
879                 for(i=0; i<2; i++){
880                     unsigned int val;
881                     if(IS_DIR(mb_type, i, list)){
882                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
883                         mx += get_se_golomb(&s->gb);
884                         my += get_se_golomb(&s->gb);
885                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
886
887                         val= pack16to32(mx,my);
888                     }else
889                         val=0;
890                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
891                 }
892             }
893         }
894     }
895
896     if(IS_INTER(mb_type))
897         write_back_motion(h, mb_type);
898
899     if(!IS_INTRA16x16(mb_type)){
900         cbp= get_ue_golomb(&s->gb);
901         if(cbp > 47){
902             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
903             return -1;
904         }
905
906         if(CHROMA){
907             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
908             else                     cbp= golomb_to_inter_cbp   [cbp];
909         }else{
910             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
911             else                     cbp= golomb_to_inter_cbp_gray[cbp];
912         }
913     }
914     h->cbp = cbp;
915
916     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
917         if(get_bits1(&s->gb)){
918             mb_type |= MB_TYPE_8x8DCT;
919             h->cbp_table[mb_xy]= cbp;
920         }
921     }
922     s->current_picture.mb_type[mb_xy]= mb_type;
923
924     if(cbp || IS_INTRA16x16(mb_type)){
925         int i8x8, i4x4, chroma_idx;
926         int dquant;
927         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
928         const uint8_t *scan, *scan8x8, *dc_scan;
929
930 //        fill_non_zero_count_cache(h);
931
932         if(IS_INTERLACED(mb_type)){
933             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
934             scan= s->qscale ? h->field_scan : h->field_scan_q0;
935             dc_scan= luma_dc_field_scan;
936         }else{
937             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
938             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
939             dc_scan= luma_dc_zigzag_scan;
940         }
941
942         dquant= get_se_golomb(&s->gb);
943
944         if( dquant > 25 || dquant < -26 ){
945             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
946             return -1;
947         }
948
949         s->qscale += dquant;
950         if(((unsigned)s->qscale) > 51){
951             if(s->qscale<0) s->qscale+= 52;
952             else            s->qscale-= 52;
953         }
954
955         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
956         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
957         if(IS_INTRA16x16(mb_type)){
958             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
959                 return -1; //FIXME continue if partitioned and other return -1 too
960             }
961
962             assert((cbp&15) == 0 || (cbp&15) == 15);
963
964             if(cbp&15){
965                 for(i8x8=0; i8x8<4; i8x8++){
966                     for(i4x4=0; i4x4<4; i4x4++){
967                         const int index= i4x4 + 4*i8x8;
968                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
969                             return -1;
970                         }
971                     }
972                 }
973             }else{
974                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
975             }
976         }else{
977             for(i8x8=0; i8x8<4; i8x8++){
978                 if(cbp & (1<<i8x8)){
979                     if(IS_8x8DCT(mb_type)){
980                         DCTELEM *buf = &h->mb[64*i8x8];
981                         uint8_t *nnz;
982                         for(i4x4=0; i4x4<4; i4x4++){
983                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
984                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
985                                 return -1;
986                         }
987                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
988                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
989                     }else{
990                         for(i4x4=0; i4x4<4; i4x4++){
991                             const int index= i4x4 + 4*i8x8;
992
993                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
994                                 return -1;
995                             }
996                         }
997                     }
998                 }else{
999                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
1000                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
1001                 }
1002             }
1003         }
1004
1005         if(cbp&0x30){
1006             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1007                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
1008                     return -1;
1009                 }
1010         }
1011
1012         if(cbp&0x20){
1013             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1014                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1015                 for(i4x4=0; i4x4<4; i4x4++){
1016                     const int index= 16 + 4*chroma_idx + i4x4;
1017                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1018                         return -1;
1019                     }
1020                 }
1021             }
1022         }else{
1023             uint8_t * const nnz= &h->non_zero_count_cache[0];
1024             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1025             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1026         }
1027     }else{
1028         uint8_t * const nnz= &h->non_zero_count_cache[0];
1029         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1030         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1031         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1032     }
1033     s->current_picture.qscale_table[mb_xy]= s->qscale;
1034     write_back_non_zero_count(h);
1035
1036     if(MB_MBAFF){
1037         h->ref_count[0] >>= 1;
1038         h->ref_count[1] >>= 1;
1039     }
1040
1041     return 0;
1042 }
1043