]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
Roll back 4:4:4 H.264 for now
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t coeff_token_len[4][4*17]={
66 {
67      1, 0, 0, 0,
68      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
69     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
70     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
71     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
72 },
73 {
74      2, 0, 0, 0,
75      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
76      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
77     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
78     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
79 },
80 {
81      4, 0, 0, 0,
82      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
83      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
84      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
85     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
86 },
87 {
88      6, 0, 0, 0,
89      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
90      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
91      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
92      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
93 }
94 };
95
96 static const uint8_t coeff_token_bits[4][4*17]={
97 {
98      1, 0, 0, 0,
99      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
100      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
101     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
102     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
103 },
104 {
105      3, 0, 0, 0,
106     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
107      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
108     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
109     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
110 },
111 {
112     15, 0, 0, 0,
113     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
114     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
115     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
116     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
117 },
118 {
119      3, 0, 0, 0,
120      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
121     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
122     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
123     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
124 }
125 };
126
127 static const uint8_t total_zeros_len[16][16]= {
128     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131     {5,3,4,4,3,3,3,4,3,4,5,5,5},
132     {4,4,4,3,3,3,3,3,4,5,4,5},
133     {6,5,3,3,3,3,3,3,4,3,6},
134     {6,5,3,3,3,2,3,4,3,6},
135     {6,4,5,3,2,2,3,3,6},
136     {6,6,4,2,2,3,2,5},
137     {5,5,3,2,2,2,4},
138     {4,4,3,3,1,3},
139     {4,4,2,1,3},
140     {3,3,1,2},
141     {2,2,1},
142     {1,1},
143 };
144
145 static const uint8_t total_zeros_bits[16][16]= {
146     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149     {3,7,5,4,6,5,4,3,3,2,2,1,0},
150     {5,4,3,7,6,5,4,3,2,1,1,0},
151     {1,1,7,6,5,4,3,2,1,1,0},
152     {1,1,5,4,3,3,2,1,1,0},
153     {1,1,1,3,3,2,2,1,0},
154     {1,0,1,3,2,1,1,1},
155     {1,0,1,3,2,1,1},
156     {0,1,1,2,1,3},
157     {0,1,1,1,1},
158     {0,1,1,1},
159     {0,1,1},
160     {0,1},
161 };
162
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
164     { 1, 2, 3, 3,},
165     { 1, 2, 2, 0,},
166     { 1, 1, 0, 0,},
167 };
168
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
170     { 1, 1, 1, 0,},
171     { 1, 1, 0, 0,},
172     { 1, 0, 0, 0,},
173 };
174
175 static const uint8_t run_len[7][16]={
176     {1,1},
177     {1,2,2},
178     {2,2,2,2},
179     {2,2,2,3,3},
180     {2,2,3,3,3,3},
181     {2,3,3,3,3,3,3},
182     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
183 };
184
185 static const uint8_t run_bits[7][16]={
186     {1,0},
187     {1,1,0},
188     {3,2,1,0},
189     {3,2,1,1,0},
190     {3,2,3,2,1,0},
191     {3,0,1,3,2,5,4},
192     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
193 };
194
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
198
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
202
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
206
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
210
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
214
215 static VLC run7_vlc;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
218
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
221
222
223 /**
224  * gets the predicted number of non-zero coefficients.
225  * @param n block index
226  */
227 static inline int pred_non_zero_count(H264Context *h, int n){
228     const int index8= scan8[n];
229     const int left= h->non_zero_count_cache[index8 - 1];
230     const int top = h->non_zero_count_cache[index8 - 8];
231     int i= left + top;
232
233     if(i<64) i= (i+1)>>1;
234
235     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
236
237     return i&31;
238 }
239
240 static av_cold void init_cavlc_level_tab(void){
241     int suffix_length, mask;
242     unsigned int i;
243
244     for(suffix_length=0; suffix_length<7; suffix_length++){
245         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
248
249             mask= -(level_code&1);
250             level_code= (((2+level_code)>>1) ^ mask) - mask;
251             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252                 cavlc_level_tab[suffix_length][i][0]= level_code;
253                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254             }else if(prefix + 1 <= LEVEL_TAB_BITS){
255                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
256                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
257             }else{
258                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
260             }
261         }
262     }
263 }
264
265 av_cold void ff_h264_decode_init_vlc(void){
266     static int done = 0;
267
268     if (!done) {
269         int i;
270         int offset;
271         done = 1;
272
273         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276                  &chroma_dc_coeff_token_len [0], 1, 1,
277                  &chroma_dc_coeff_token_bits[0], 1, 1,
278                  INIT_VLC_USE_NEW_STATIC);
279
280         offset = 0;
281         for(i=0; i<4; i++){
282             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285                      &coeff_token_len [i][0], 1, 1,
286                      &coeff_token_bits[i][0], 1, 1,
287                      INIT_VLC_USE_NEW_STATIC);
288             offset += coeff_token_vlc_tables_size[i];
289         }
290         /*
291          * This is a one time safety check to make sure that
292          * the packed static coeff_token_vlc table sizes
293          * were initialized correctly.
294          */
295         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
296
297         for(i=0; i<3; i++){
298             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300             init_vlc(&chroma_dc_total_zeros_vlc[i],
301                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302                      &chroma_dc_total_zeros_len [i][0], 1, 1,
303                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
304                      INIT_VLC_USE_NEW_STATIC);
305         }
306         for(i=0; i<15; i++){
307             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309             init_vlc(&total_zeros_vlc[i],
310                      TOTAL_ZEROS_VLC_BITS, 16,
311                      &total_zeros_len [i][0], 1, 1,
312                      &total_zeros_bits[i][0], 1, 1,
313                      INIT_VLC_USE_NEW_STATIC);
314         }
315
316         for(i=0; i<6; i++){
317             run_vlc[i].table = run_vlc_tables[i];
318             run_vlc[i].table_allocated = run_vlc_tables_size;
319             init_vlc(&run_vlc[i],
320                      RUN_VLC_BITS, 7,
321                      &run_len [i][0], 1, 1,
322                      &run_bits[i][0], 1, 1,
323                      INIT_VLC_USE_NEW_STATIC);
324         }
325         run7_vlc.table = run7_vlc_table,
326         run7_vlc.table_allocated = run7_vlc_table_size;
327         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328                  &run_len [6][0], 1, 1,
329                  &run_bits[6][0], 1, 1,
330                  INIT_VLC_USE_NEW_STATIC);
331
332         init_cavlc_level_tab();
333     }
334 }
335
336 /**
337  *
338  */
339 static inline int get_level_prefix(GetBitContext *gb){
340     unsigned int buf;
341     int log;
342
343     OPEN_READER(re, gb);
344     UPDATE_CACHE(re, gb);
345     buf=GET_CACHE(re, gb);
346
347     log= 32 - av_log2(buf);
348 #ifdef TRACE
349     print_bin(buf>>(32-log), log);
350     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
351 #endif
352
353     LAST_SKIP_BITS(re, gb, log);
354     CLOSE_READER(re, gb);
355
356     return log-1;
357 }
358
359 /**
360  * decodes a residual block.
361  * @param n block index
362  * @param scantable scantable
363  * @param max_coeff number of coefficients in the block
364  * @return <0 if an error occurred
365  */
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367     MpegEncContext * const s = &h->s;
368     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
369     int level[16];
370     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
371
372     //FIXME put trailing_onex into the context
373
374     if(n >= CHROMA_DC_BLOCK_INDEX){
375         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376         total_coeff= coeff_token>>2;
377     }else{
378         if(n == LUMA_DC_BLOCK_INDEX){
379             total_coeff= pred_non_zero_count(h, 0);
380             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381             total_coeff= coeff_token>>2;
382         }else{
383             total_coeff= pred_non_zero_count(h, n);
384             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385             total_coeff= coeff_token>>2;
386         }
387     }
388     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
389
390     //FIXME set last_non_zero?
391
392     if(total_coeff==0)
393         return 0;
394     if(total_coeff > (unsigned)max_coeff) {
395         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
396         return -1;
397     }
398
399     trailing_ones= coeff_token&3;
400     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401     assert(total_coeff<=16);
402
403     i = show_bits(gb, 3);
404     skip_bits(gb, trailing_ones);
405     level[0] = 1-((i&4)>>1);
406     level[1] = 1-((i&2)   );
407     level[2] = 1-((i&1)<<1);
408
409     if(trailing_ones<total_coeff) {
410         int mask, prefix;
411         int suffix_length = total_coeff > 10 & trailing_ones < 3;
412         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
414
415         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416         if(level_code >= 100){
417             prefix= level_code - 100;
418             if(prefix == LEVEL_TAB_BITS)
419                 prefix += get_level_prefix(gb);
420
421             //first coefficient has suffix_length equal to 0 or 1
422             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
423                 if(suffix_length)
424                     level_code= (prefix<<1) + get_bits1(gb); //part
425                 else
426                     level_code= prefix; //part
427             }else if(prefix==14){
428                 if(suffix_length)
429                     level_code= (prefix<<1) + get_bits1(gb); //part
430                 else
431                     level_code= prefix + get_bits(gb, 4); //part
432             }else{
433                 level_code= 30 + get_bits(gb, prefix-3); //part
434                 if(prefix>=16){
435                     if(prefix > 25+3){
436                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
437                         return -1;
438                     }
439                     level_code += (1<<(prefix-3))-4096;
440                 }
441             }
442
443             if(trailing_ones < 3) level_code += 2;
444
445             suffix_length = 2;
446             mask= -(level_code&1);
447             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
448         }else{
449             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
450
451             suffix_length = 1 + (level_code + 3U > 6U);
452             level[trailing_ones]= level_code;
453         }
454
455         //remaining coefficients have suffix_length > 0
456         for(i=trailing_ones+1;i<total_coeff;i++) {
457             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
458             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
459             level_code= cavlc_level_tab[suffix_length][bitsi][0];
460
461             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
462             if(level_code >= 100){
463                 prefix= level_code - 100;
464                 if(prefix == LEVEL_TAB_BITS){
465                     prefix += get_level_prefix(gb);
466                 }
467                 if(prefix<15){
468                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
469                 }else{
470                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
471                     if(prefix>=16)
472                         level_code += (1<<(prefix-3))-4096;
473                 }
474                 mask= -(level_code&1);
475                 level_code= (((2+level_code)>>1) ^ mask) - mask;
476             }
477             level[i]= level_code;
478             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
479         }
480     }
481
482     if(total_coeff == max_coeff)
483         zeros_left=0;
484     else{
485         if(n >= CHROMA_DC_BLOCK_INDEX)
486             zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
487         else
488             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
489     }
490
491 #define STORE_BLOCK(type) \
492     scantable += zeros_left + total_coeff - 1; \
493     if(n >= LUMA_DC_BLOCK_INDEX){ \
494         ((type*)block)[*scantable] = level[0]; \
495         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
496             if(zeros_left < 7) \
497                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
498             else \
499                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
500             zeros_left -= run_before; \
501             scantable -= 1 + run_before; \
502             ((type*)block)[*scantable]= level[i]; \
503         } \
504         for(;i<total_coeff;i++) { \
505             scantable--; \
506             ((type*)block)[*scantable]= level[i]; \
507         } \
508     }else{ \
509         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
510         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
511             if(zeros_left < 7) \
512                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
513             else \
514                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
515             zeros_left -= run_before; \
516             scantable -= 1 + run_before; \
517             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
518         } \
519         for(;i<total_coeff;i++) { \
520             scantable--; \
521             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
522         } \
523     }
524
525     if (h->pixel_shift) {
526         STORE_BLOCK(int32_t)
527     } else {
528         STORE_BLOCK(int16_t)
529     }
530
531     if(zeros_left<0){
532         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
533         return -1;
534     }
535
536     return 0;
537 }
538
539 int ff_h264_decode_mb_cavlc(H264Context *h){
540     MpegEncContext * const s = &h->s;
541     int mb_xy;
542     int partition_count;
543     unsigned int mb_type, cbp;
544     int dct8x8_allowed= h->pps.transform_8x8_mode;
545     const int pixel_shift = h->pixel_shift;
546
547     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
548
549     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
550     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
551                 down the code */
552     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
553         if(s->mb_skip_run==-1)
554             s->mb_skip_run= get_ue_golomb(&s->gb);
555
556         if (s->mb_skip_run--) {
557             if(FRAME_MBAFF && (s->mb_y&1) == 0){
558                 if(s->mb_skip_run==0)
559                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
560             }
561             decode_mb_skip(h);
562             return 0;
563         }
564     }
565     if(FRAME_MBAFF){
566         if( (s->mb_y&1) == 0 )
567             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
568     }
569
570     h->prev_mb_skipped= 0;
571
572     mb_type= get_ue_golomb(&s->gb);
573     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
574         if(mb_type < 23){
575             partition_count= b_mb_type_info[mb_type].partition_count;
576             mb_type=         b_mb_type_info[mb_type].type;
577         }else{
578             mb_type -= 23;
579             goto decode_intra_mb;
580         }
581     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
582         if(mb_type < 5){
583             partition_count= p_mb_type_info[mb_type].partition_count;
584             mb_type=         p_mb_type_info[mb_type].type;
585         }else{
586             mb_type -= 5;
587             goto decode_intra_mb;
588         }
589     }else{
590        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
591         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
592             mb_type--;
593 decode_intra_mb:
594         if(mb_type > 25){
595             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
596             return -1;
597         }
598         partition_count=0;
599         cbp= i_mb_type_info[mb_type].cbp;
600         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
601         mb_type= i_mb_type_info[mb_type].type;
602     }
603
604     if(MB_FIELD)
605         mb_type |= MB_TYPE_INTERLACED;
606
607     h->slice_table[ mb_xy ]= h->slice_num;
608
609     if(IS_INTRA_PCM(mb_type)){
610         unsigned int x;
611
612         // We assume these blocks are very rare so we do not optimize it.
613         align_get_bits(&s->gb);
614
615         // The pixels are stored in the same order as levels in h->mb array.
616         for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
617             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
618         }
619
620         // In deblocking, the quantizer is 0
621         s->current_picture.qscale_table[mb_xy]= 0;
622         // All coeffs are present
623         memset(h->non_zero_count[mb_xy], 16, 32);
624
625         s->current_picture.mb_type[mb_xy]= mb_type;
626         return 0;
627     }
628
629     if(MB_MBAFF){
630         h->ref_count[0] <<= 1;
631         h->ref_count[1] <<= 1;
632     }
633
634     fill_decode_neighbors(h, mb_type);
635     fill_decode_caches(h, mb_type);
636
637     //mb_pred
638     if(IS_INTRA(mb_type)){
639         int pred_mode;
640 //            init_top_left_availability(h);
641         if(IS_INTRA4x4(mb_type)){
642             int i;
643             int di = 1;
644             if(dct8x8_allowed && get_bits1(&s->gb)){
645                 mb_type |= MB_TYPE_8x8DCT;
646                 di = 4;
647             }
648
649 //                fill_intra4x4_pred_table(h);
650             for(i=0; i<16; i+=di){
651                 int mode= pred_intra_mode(h, i);
652
653                 if(!get_bits1(&s->gb)){
654                     const int rem_mode= get_bits(&s->gb, 3);
655                     mode = rem_mode + (rem_mode >= mode);
656                 }
657
658                 if(di==4)
659                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
660                 else
661                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
662             }
663             ff_h264_write_back_intra_pred_mode(h);
664             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
665                 return -1;
666         }else{
667             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
668             if(h->intra16x16_pred_mode < 0)
669                 return -1;
670         }
671         if(CHROMA){
672             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
673             if(pred_mode < 0)
674                 return -1;
675             h->chroma_pred_mode= pred_mode;
676         } else {
677             h->chroma_pred_mode = DC_128_PRED8x8;
678         }
679     }else if(partition_count==4){
680         int i, j, sub_partition_count[4], list, ref[2][4];
681
682         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
683             for(i=0; i<4; i++){
684                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
685                 if(h->sub_mb_type[i] >=13){
686                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
687                     return -1;
688                 }
689                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
690                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
691             }
692             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
693                 ff_h264_pred_direct_motion(h, &mb_type);
694                 h->ref_cache[0][scan8[4]] =
695                 h->ref_cache[1][scan8[4]] =
696                 h->ref_cache[0][scan8[12]] =
697                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
698             }
699         }else{
700             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
701             for(i=0; i<4; i++){
702                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
703                 if(h->sub_mb_type[i] >=4){
704                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
705                     return -1;
706                 }
707                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
708                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
709             }
710         }
711
712         for(list=0; list<h->list_count; list++){
713             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
714             for(i=0; i<4; i++){
715                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
716                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
717                     unsigned int tmp;
718                     if(ref_count == 1){
719                         tmp= 0;
720                     }else if(ref_count == 2){
721                         tmp= get_bits1(&s->gb)^1;
722                     }else{
723                         tmp= get_ue_golomb_31(&s->gb);
724                         if(tmp>=ref_count){
725                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
726                             return -1;
727                         }
728                     }
729                     ref[list][i]= tmp;
730                 }else{
731                  //FIXME
732                     ref[list][i] = -1;
733                 }
734             }
735         }
736
737         if(dct8x8_allowed)
738             dct8x8_allowed = get_dct8x8_allowed(h);
739
740         for(list=0; list<h->list_count; list++){
741             for(i=0; i<4; i++){
742                 if(IS_DIRECT(h->sub_mb_type[i])) {
743                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
744                     continue;
745                 }
746                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
747                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
748
749                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
750                     const int sub_mb_type= h->sub_mb_type[i];
751                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
752                     for(j=0; j<sub_partition_count[i]; j++){
753                         int mx, my;
754                         const int index= 4*i + block_width*j;
755                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
756                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
757                         mx += get_se_golomb(&s->gb);
758                         my += get_se_golomb(&s->gb);
759                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
760
761                         if(IS_SUB_8X8(sub_mb_type)){
762                             mv_cache[ 1 ][0]=
763                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
764                             mv_cache[ 1 ][1]=
765                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
766                         }else if(IS_SUB_8X4(sub_mb_type)){
767                             mv_cache[ 1 ][0]= mx;
768                             mv_cache[ 1 ][1]= my;
769                         }else if(IS_SUB_4X8(sub_mb_type)){
770                             mv_cache[ 8 ][0]= mx;
771                             mv_cache[ 8 ][1]= my;
772                         }
773                         mv_cache[ 0 ][0]= mx;
774                         mv_cache[ 0 ][1]= my;
775                     }
776                 }else{
777                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
778                     p[0] = p[1]=
779                     p[8] = p[9]= 0;
780                 }
781             }
782         }
783     }else if(IS_DIRECT(mb_type)){
784         ff_h264_pred_direct_motion(h, &mb_type);
785         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
786     }else{
787         int list, mx, my, i;
788          //FIXME we should set ref_idx_l? to 0 if we use that later ...
789         if(IS_16X16(mb_type)){
790             for(list=0; list<h->list_count; list++){
791                     unsigned int val;
792                     if(IS_DIR(mb_type, 0, list)){
793                         if(h->ref_count[list]==1){
794                             val= 0;
795                         }else if(h->ref_count[list]==2){
796                             val= get_bits1(&s->gb)^1;
797                         }else{
798                             val= get_ue_golomb_31(&s->gb);
799                             if(val >= h->ref_count[list]){
800                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
801                                 return -1;
802                             }
803                         }
804                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
805                     }
806             }
807             for(list=0; list<h->list_count; list++){
808                 if(IS_DIR(mb_type, 0, list)){
809                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
810                     mx += get_se_golomb(&s->gb);
811                     my += get_se_golomb(&s->gb);
812                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
813
814                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
815                 }
816             }
817         }
818         else if(IS_16X8(mb_type)){
819             for(list=0; list<h->list_count; list++){
820                     for(i=0; i<2; i++){
821                         unsigned int val;
822                         if(IS_DIR(mb_type, i, list)){
823                             if(h->ref_count[list] == 1){
824                                 val= 0;
825                             }else if(h->ref_count[list] == 2){
826                                 val= get_bits1(&s->gb)^1;
827                             }else{
828                                 val= get_ue_golomb_31(&s->gb);
829                                 if(val >= h->ref_count[list]){
830                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
831                                     return -1;
832                                 }
833                             }
834                         }else
835                             val= LIST_NOT_USED&0xFF;
836                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
837                     }
838             }
839             for(list=0; list<h->list_count; list++){
840                 for(i=0; i<2; i++){
841                     unsigned int val;
842                     if(IS_DIR(mb_type, i, list)){
843                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
844                         mx += get_se_golomb(&s->gb);
845                         my += get_se_golomb(&s->gb);
846                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
847
848                         val= pack16to32(mx,my);
849                     }else
850                         val=0;
851                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
852                 }
853             }
854         }else{
855             assert(IS_8X16(mb_type));
856             for(list=0; list<h->list_count; list++){
857                     for(i=0; i<2; i++){
858                         unsigned int val;
859                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
860                             if(h->ref_count[list]==1){
861                                 val= 0;
862                             }else if(h->ref_count[list]==2){
863                                 val= get_bits1(&s->gb)^1;
864                             }else{
865                                 val= get_ue_golomb_31(&s->gb);
866                                 if(val >= h->ref_count[list]){
867                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
868                                     return -1;
869                                 }
870                             }
871                         }else
872                             val= LIST_NOT_USED&0xFF;
873                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
874                     }
875             }
876             for(list=0; list<h->list_count; list++){
877                 for(i=0; i<2; i++){
878                     unsigned int val;
879                     if(IS_DIR(mb_type, i, list)){
880                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
881                         mx += get_se_golomb(&s->gb);
882                         my += get_se_golomb(&s->gb);
883                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
884
885                         val= pack16to32(mx,my);
886                     }else
887                         val=0;
888                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
889                 }
890             }
891         }
892     }
893
894     if(IS_INTER(mb_type))
895         write_back_motion(h, mb_type);
896
897     if(!IS_INTRA16x16(mb_type)){
898         cbp= get_ue_golomb(&s->gb);
899         if(cbp > 47){
900             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
901             return -1;
902         }
903
904         if(CHROMA){
905             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
906             else                     cbp= golomb_to_inter_cbp   [cbp];
907         }else{
908             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
909             else                     cbp= golomb_to_inter_cbp_gray[cbp];
910         }
911     }
912
913     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
914         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
915     }
916     h->cbp=
917     h->cbp_table[mb_xy]= cbp;
918     s->current_picture.mb_type[mb_xy]= mb_type;
919
920     if(cbp || IS_INTRA16x16(mb_type)){
921         int i8x8, i4x4, chroma_idx;
922         int dquant;
923         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
924         const uint8_t *scan, *scan8x8;
925         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
926
927         if(IS_INTERLACED(mb_type)){
928             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
929             scan= s->qscale ? h->field_scan : h->field_scan_q0;
930         }else{
931             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
932             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
933         }
934
935         dquant= get_se_golomb(&s->gb);
936
937         s->qscale += dquant;
938
939         if(((unsigned)s->qscale) > max_qp){
940             if(s->qscale<0) s->qscale+= max_qp+1;
941             else            s->qscale-= max_qp+1;
942             if(((unsigned)s->qscale) > max_qp){
943                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
944                 return -1;
945             }
946         }
947
948         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
949         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
950         if(IS_INTRA16x16(mb_type)){
951             AV_ZERO128(h->mb_luma_dc+0);
952             AV_ZERO128(h->mb_luma_dc+8);
953             AV_ZERO128(h->mb_luma_dc+16);
954             AV_ZERO128(h->mb_luma_dc+24);
955             if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
956                 return -1; //FIXME continue if partitioned and other return -1 too
957             }
958
959             assert((cbp&15) == 0 || (cbp&15) == 15);
960
961             if(cbp&15){
962                 for(i8x8=0; i8x8<4; i8x8++){
963                     for(i4x4=0; i4x4<4; i4x4++){
964                         const int index= i4x4 + 4*i8x8;
965                         if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
966                             return -1;
967                         }
968                     }
969                 }
970             }else{
971                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
972             }
973         }else{
974             for(i8x8=0; i8x8<4; i8x8++){
975                 if(cbp & (1<<i8x8)){
976                     if(IS_8x8DCT(mb_type)){
977                         DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift];
978                         uint8_t *nnz;
979                         for(i4x4=0; i4x4<4; i4x4++){
980                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
981                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
982                                 return -1;
983                         }
984                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
985                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
986                     }else{
987                         for(i4x4=0; i4x4<4; i4x4++){
988                             const int index= i4x4 + 4*i8x8;
989
990                             if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
991                                 return -1;
992                             }
993                         }
994                     }
995                 }else{
996                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
997                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
998                 }
999             }
1000         }
1001
1002         if(cbp&0x30){
1003             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1004                 if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1005                     return -1;
1006                 }
1007         }
1008
1009         if(cbp&0x20){
1010             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1011                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1012                 for(i4x4=0; i4x4<4; i4x4++){
1013                     const int index= 16 + 4*chroma_idx + i4x4;
1014                     if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1015                         return -1;
1016                     }
1017                 }
1018             }
1019         }else{
1020             uint8_t * const nnz= &h->non_zero_count_cache[0];
1021             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1022             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1023         }
1024     }else{
1025         uint8_t * const nnz= &h->non_zero_count_cache[0];
1026         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1027         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1028         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1029     }
1030     s->current_picture.qscale_table[mb_xy]= s->qscale;
1031     write_back_non_zero_count(h);
1032
1033     if(MB_MBAFF){
1034         h->ref_count[0] >>= 1;
1035         h->ref_count[1] >>= 1;
1036     }
1037
1038     return 0;
1039 }
1040