]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264_cavlc.c
shorten: remove the flush function.
[ffmpeg] / libavcodec / h264_cavlc.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define CABAC 0
29
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37
38 //#undef NDEBUG
39 #include <assert.h>
40
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64
65 static const uint8_t coeff_token_len[4][4*17]={
66 {
67      1, 0, 0, 0,
68      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
69     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
70     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
71     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
72 },
73 {
74      2, 0, 0, 0,
75      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
76      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
77     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
78     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
79 },
80 {
81      4, 0, 0, 0,
82      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
83      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
84      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
85     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
86 },
87 {
88      6, 0, 0, 0,
89      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
90      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
91      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
92      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
93 }
94 };
95
96 static const uint8_t coeff_token_bits[4][4*17]={
97 {
98      1, 0, 0, 0,
99      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
100      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
101     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
102     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
103 },
104 {
105      3, 0, 0, 0,
106     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
107      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
108     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
109     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
110 },
111 {
112     15, 0, 0, 0,
113     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
114     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
115     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
116     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
117 },
118 {
119      3, 0, 0, 0,
120      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
121     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
122     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
123     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
124 }
125 };
126
127 static const uint8_t total_zeros_len[16][16]= {
128     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131     {5,3,4,4,3,3,3,4,3,4,5,5,5},
132     {4,4,4,3,3,3,3,3,4,5,4,5},
133     {6,5,3,3,3,3,3,3,4,3,6},
134     {6,5,3,3,3,2,3,4,3,6},
135     {6,4,5,3,2,2,3,3,6},
136     {6,6,4,2,2,3,2,5},
137     {5,5,3,2,2,2,4},
138     {4,4,3,3,1,3},
139     {4,4,2,1,3},
140     {3,3,1,2},
141     {2,2,1},
142     {1,1},
143 };
144
145 static const uint8_t total_zeros_bits[16][16]= {
146     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149     {3,7,5,4,6,5,4,3,3,2,2,1,0},
150     {5,4,3,7,6,5,4,3,2,1,1,0},
151     {1,1,7,6,5,4,3,2,1,1,0},
152     {1,1,5,4,3,3,2,1,1,0},
153     {1,1,1,3,3,2,2,1,0},
154     {1,0,1,3,2,1,1,1},
155     {1,0,1,3,2,1,1},
156     {0,1,1,2,1,3},
157     {0,1,1,1,1},
158     {0,1,1,1},
159     {0,1,1},
160     {0,1},
161 };
162
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
164     { 1, 2, 3, 3,},
165     { 1, 2, 2, 0,},
166     { 1, 1, 0, 0,},
167 };
168
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
170     { 1, 1, 1, 0,},
171     { 1, 1, 0, 0,},
172     { 1, 0, 0, 0,},
173 };
174
175 static const uint8_t run_len[7][16]={
176     {1,1},
177     {1,2,2},
178     {2,2,2,2},
179     {2,2,2,3,3},
180     {2,2,3,3,3,3},
181     {2,3,3,3,3,3,3},
182     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
183 };
184
185 static const uint8_t run_bits[7][16]={
186     {1,0},
187     {1,1,0},
188     {3,2,1,0},
189     {3,2,1,1,0},
190     {3,2,3,2,1,0},
191     {3,0,1,3,2,5,4},
192     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
193 };
194
195 static VLC coeff_token_vlc[4];
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
198
199 static VLC chroma_dc_coeff_token_vlc;
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201 static const int chroma_dc_coeff_token_vlc_table_size = 256;
202
203 static VLC total_zeros_vlc[15];
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205 static const int total_zeros_vlc_tables_size = 512;
206
207 static VLC chroma_dc_total_zeros_vlc[3];
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
210
211 static VLC run_vlc[6];
212 static VLC_TYPE run_vlc_tables[6][8][2];
213 static const int run_vlc_tables_size = 8;
214
215 static VLC run7_vlc;
216 static VLC_TYPE run7_vlc_table[96][2];
217 static const int run7_vlc_table_size = 96;
218
219 #define LEVEL_TAB_BITS 8
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
221
222
223 /**
224  * gets the predicted number of non-zero coefficients.
225  * @param n block index
226  */
227 static inline int pred_non_zero_count(H264Context *h, int n){
228     const int index8= scan8[n];
229     const int left= h->non_zero_count_cache[index8 - 1];
230     const int top = h->non_zero_count_cache[index8 - 8];
231     int i= left + top;
232
233     if(i<64) i= (i+1)>>1;
234
235     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
236
237     return i&31;
238 }
239
240 static av_cold void init_cavlc_level_tab(void){
241     int suffix_length;
242     unsigned int i;
243
244     for(suffix_length=0; suffix_length<7; suffix_length++){
245         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247
248             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
249                 int level_code = (prefix << suffix_length) +
250                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
251                 int mask = -(level_code&1);
252                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
253                 cavlc_level_tab[suffix_length][i][0]= level_code;
254                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
255             }else if(prefix + 1 <= LEVEL_TAB_BITS){
256                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
257                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
258             }else{
259                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
260                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
261             }
262         }
263     }
264 }
265
266 av_cold void ff_h264_decode_init_vlc(void){
267     static int done = 0;
268
269     if (!done) {
270         int i;
271         int offset;
272         done = 1;
273
274         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
275         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
276         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
277                  &chroma_dc_coeff_token_len [0], 1, 1,
278                  &chroma_dc_coeff_token_bits[0], 1, 1,
279                  INIT_VLC_USE_NEW_STATIC);
280
281         offset = 0;
282         for(i=0; i<4; i++){
283             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
284             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
285             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
286                      &coeff_token_len [i][0], 1, 1,
287                      &coeff_token_bits[i][0], 1, 1,
288                      INIT_VLC_USE_NEW_STATIC);
289             offset += coeff_token_vlc_tables_size[i];
290         }
291         /*
292          * This is a one time safety check to make sure that
293          * the packed static coeff_token_vlc table sizes
294          * were initialized correctly.
295          */
296         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
297
298         for(i=0; i<3; i++){
299             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
300             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
301             init_vlc(&chroma_dc_total_zeros_vlc[i],
302                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
303                      &chroma_dc_total_zeros_len [i][0], 1, 1,
304                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
305                      INIT_VLC_USE_NEW_STATIC);
306         }
307         for(i=0; i<15; i++){
308             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
309             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
310             init_vlc(&total_zeros_vlc[i],
311                      TOTAL_ZEROS_VLC_BITS, 16,
312                      &total_zeros_len [i][0], 1, 1,
313                      &total_zeros_bits[i][0], 1, 1,
314                      INIT_VLC_USE_NEW_STATIC);
315         }
316
317         for(i=0; i<6; i++){
318             run_vlc[i].table = run_vlc_tables[i];
319             run_vlc[i].table_allocated = run_vlc_tables_size;
320             init_vlc(&run_vlc[i],
321                      RUN_VLC_BITS, 7,
322                      &run_len [i][0], 1, 1,
323                      &run_bits[i][0], 1, 1,
324                      INIT_VLC_USE_NEW_STATIC);
325         }
326         run7_vlc.table = run7_vlc_table,
327         run7_vlc.table_allocated = run7_vlc_table_size;
328         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
329                  &run_len [6][0], 1, 1,
330                  &run_bits[6][0], 1, 1,
331                  INIT_VLC_USE_NEW_STATIC);
332
333         init_cavlc_level_tab();
334     }
335 }
336
337 /**
338  *
339  */
340 static inline int get_level_prefix(GetBitContext *gb){
341     unsigned int buf;
342     int log;
343
344     OPEN_READER(re, gb);
345     UPDATE_CACHE(re, gb);
346     buf=GET_CACHE(re, gb);
347
348     log= 32 - av_log2(buf);
349 #ifdef TRACE
350     print_bin(buf>>(32-log), log);
351     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
352 #endif
353
354     LAST_SKIP_BITS(re, gb, log);
355     CLOSE_READER(re, gb);
356
357     return log-1;
358 }
359
360 /**
361  * decodes a residual block.
362  * @param n block index
363  * @param scantable scantable
364  * @param max_coeff number of coefficients in the block
365  * @return <0 if an error occurred
366  */
367 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
368     MpegEncContext * const s = &h->s;
369     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
370     int level[16];
371     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
372
373     //FIXME put trailing_onex into the context
374
375     if(max_coeff <= 8){
376         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
377         total_coeff= coeff_token>>2;
378     }else{
379         if(n >= LUMA_DC_BLOCK_INDEX){
380             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
381             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
382             total_coeff= coeff_token>>2;
383         }else{
384             total_coeff= pred_non_zero_count(h, n);
385             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
386             total_coeff= coeff_token>>2;
387         }
388     }
389     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
390
391     //FIXME set last_non_zero?
392
393     if(total_coeff==0)
394         return 0;
395     if(total_coeff > (unsigned)max_coeff) {
396         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
397         return -1;
398     }
399
400     trailing_ones= coeff_token&3;
401     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
402     assert(total_coeff<=16);
403
404     i = show_bits(gb, 3);
405     skip_bits(gb, trailing_ones);
406     level[0] = 1-((i&4)>>1);
407     level[1] = 1-((i&2)   );
408     level[2] = 1-((i&1)<<1);
409
410     if(trailing_ones<total_coeff) {
411         int mask, prefix;
412         int suffix_length = total_coeff > 10 & trailing_ones < 3;
413         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
414         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
415
416         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
417         if(level_code >= 100){
418             prefix= level_code - 100;
419             if(prefix == LEVEL_TAB_BITS)
420                 prefix += get_level_prefix(gb);
421
422             //first coefficient has suffix_length equal to 0 or 1
423             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
424                 if(suffix_length)
425                     level_code= (prefix<<1) + get_bits1(gb); //part
426                 else
427                     level_code= prefix; //part
428             }else if(prefix==14){
429                 if(suffix_length)
430                     level_code= (prefix<<1) + get_bits1(gb); //part
431                 else
432                     level_code= prefix + get_bits(gb, 4); //part
433             }else{
434                 level_code= 30 + get_bits(gb, prefix-3); //part
435                 if(prefix>=16){
436                     if(prefix > 25+3){
437                         av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
438                         return -1;
439                     }
440                     level_code += (1<<(prefix-3))-4096;
441                 }
442             }
443
444             if(trailing_ones < 3) level_code += 2;
445
446             suffix_length = 2;
447             mask= -(level_code&1);
448             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
449         }else{
450             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
451
452             suffix_length = 1 + (level_code + 3U > 6U);
453             level[trailing_ones]= level_code;
454         }
455
456         //remaining coefficients have suffix_length > 0
457         for(i=trailing_ones+1;i<total_coeff;i++) {
458             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
459             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
460             level_code= cavlc_level_tab[suffix_length][bitsi][0];
461
462             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
463             if(level_code >= 100){
464                 prefix= level_code - 100;
465                 if(prefix == LEVEL_TAB_BITS){
466                     prefix += get_level_prefix(gb);
467                 }
468                 if(prefix<15){
469                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
470                 }else{
471                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
472                     if(prefix>=16)
473                         level_code += (1<<(prefix-3))-4096;
474                 }
475                 mask= -(level_code&1);
476                 level_code= (((2+level_code)>>1) ^ mask) - mask;
477             }
478             level[i]= level_code;
479             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
480         }
481     }
482
483     if(total_coeff == max_coeff)
484         zeros_left=0;
485     else{
486         /* FIXME: we don't actually support 4:2:2 yet. */
487         if(max_coeff <= 8)
488             zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
489         else
490             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
491     }
492
493 #define STORE_BLOCK(type) \
494     scantable += zeros_left + total_coeff - 1; \
495     if(n >= LUMA_DC_BLOCK_INDEX){ \
496         ((type*)block)[*scantable] = level[0]; \
497         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
498             if(zeros_left < 7) \
499                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
500             else \
501                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
502             zeros_left -= run_before; \
503             scantable -= 1 + run_before; \
504             ((type*)block)[*scantable]= level[i]; \
505         } \
506         for(;i<total_coeff;i++) { \
507             scantable--; \
508             ((type*)block)[*scantable]= level[i]; \
509         } \
510     }else{ \
511         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
512         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
513             if(zeros_left < 7) \
514                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
515             else \
516                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
517             zeros_left -= run_before; \
518             scantable -= 1 + run_before; \
519             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
520         } \
521         for(;i<total_coeff;i++) { \
522             scantable--; \
523             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
524         } \
525     }
526
527     if (h->pixel_shift) {
528         STORE_BLOCK(int32_t)
529     } else {
530         STORE_BLOCK(int16_t)
531     }
532
533     if(zeros_left<0){
534         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
535         return -1;
536     }
537
538     return 0;
539 }
540
541 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
542     int i4x4, i8x8;
543     MpegEncContext * const s = &h->s;
544     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
545     if(IS_INTRA16x16(mb_type)){
546         AV_ZERO128(h->mb_luma_dc[p]+0);
547         AV_ZERO128(h->mb_luma_dc[p]+8);
548         AV_ZERO128(h->mb_luma_dc[p]+16);
549         AV_ZERO128(h->mb_luma_dc[p]+24);
550         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
551             return -1; //FIXME continue if partitioned and other return -1 too
552         }
553
554         assert((cbp&15) == 0 || (cbp&15) == 15);
555
556         if(cbp&15){
557             for(i8x8=0; i8x8<4; i8x8++){
558                 for(i4x4=0; i4x4<4; i4x4++){
559                     const int index= i4x4 + 4*i8x8 + p*16;
560                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
561                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
562                         return -1;
563                     }
564                 }
565             }
566             return 0xf;
567         }else{
568             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
569             return 0;
570         }
571     }else{
572         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
573         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
574         int new_cbp = 0;
575         for(i8x8=0; i8x8<4; i8x8++){
576             if(cbp & (1<<i8x8)){
577                 if(IS_8x8DCT(mb_type)){
578                     DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
579                     uint8_t *nnz;
580                     for(i4x4=0; i4x4<4; i4x4++){
581                         const int index= i4x4 + 4*i8x8 + p*16;
582                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
583                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
584                             return -1;
585                     }
586                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
587                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
588                     new_cbp |= !!nnz[0] << i8x8;
589                 }else{
590                     for(i4x4=0; i4x4<4; i4x4++){
591                         const int index= i4x4 + 4*i8x8 + p*16;
592                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
593                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
594                             return -1;
595                         }
596                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
597                     }
598                 }
599             }else{
600                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
601                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
602             }
603         }
604         return new_cbp;
605     }
606 }
607
608 int ff_h264_decode_mb_cavlc(H264Context *h){
609     MpegEncContext * const s = &h->s;
610     int mb_xy;
611     int partition_count;
612     unsigned int mb_type, cbp;
613     int dct8x8_allowed= h->pps.transform_8x8_mode;
614     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
615     const int pixel_shift = h->pixel_shift;
616
617     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
618
619     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
620     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
621                 down the code */
622     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
623         if(s->mb_skip_run==-1)
624             s->mb_skip_run= get_ue_golomb(&s->gb);
625
626         if (s->mb_skip_run--) {
627             if(FRAME_MBAFF && (s->mb_y&1) == 0){
628                 if(s->mb_skip_run==0)
629                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
630             }
631             decode_mb_skip(h);
632             return 0;
633         }
634     }
635     if(FRAME_MBAFF){
636         if( (s->mb_y&1) == 0 )
637             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
638     }
639
640     h->prev_mb_skipped= 0;
641
642     mb_type= get_ue_golomb(&s->gb);
643     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
644         if(mb_type < 23){
645             partition_count= b_mb_type_info[mb_type].partition_count;
646             mb_type=         b_mb_type_info[mb_type].type;
647         }else{
648             mb_type -= 23;
649             goto decode_intra_mb;
650         }
651     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
652         if(mb_type < 5){
653             partition_count= p_mb_type_info[mb_type].partition_count;
654             mb_type=         p_mb_type_info[mb_type].type;
655         }else{
656             mb_type -= 5;
657             goto decode_intra_mb;
658         }
659     }else{
660        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
661         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
662             mb_type--;
663 decode_intra_mb:
664         if(mb_type > 25){
665             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
666             return -1;
667         }
668         partition_count=0;
669         cbp= i_mb_type_info[mb_type].cbp;
670         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
671         mb_type= i_mb_type_info[mb_type].type;
672     }
673
674     if(MB_FIELD)
675         mb_type |= MB_TYPE_INTERLACED;
676
677     h->slice_table[ mb_xy ]= h->slice_num;
678
679     if(IS_INTRA_PCM(mb_type)){
680         unsigned int x;
681         static const uint16_t mb_sizes[4] = {256,384,512,768};
682         const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
683
684         // We assume these blocks are very rare so we do not optimize it.
685         align_get_bits(&s->gb);
686
687         // The pixels are stored in the same order as levels in h->mb array.
688         for(x=0; x < mb_size; x++){
689             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
690         }
691
692         // In deblocking, the quantizer is 0
693         s->current_picture.f.qscale_table[mb_xy] = 0;
694         // All coeffs are present
695         memset(h->non_zero_count[mb_xy], 16, 48);
696
697         s->current_picture.f.mb_type[mb_xy] = mb_type;
698         return 0;
699     }
700
701     if(MB_MBAFF){
702         h->ref_count[0] <<= 1;
703         h->ref_count[1] <<= 1;
704     }
705
706     fill_decode_neighbors(h, mb_type);
707     fill_decode_caches(h, mb_type);
708
709     //mb_pred
710     if(IS_INTRA(mb_type)){
711         int pred_mode;
712 //            init_top_left_availability(h);
713         if(IS_INTRA4x4(mb_type)){
714             int i;
715             int di = 1;
716             if(dct8x8_allowed && get_bits1(&s->gb)){
717                 mb_type |= MB_TYPE_8x8DCT;
718                 di = 4;
719             }
720
721 //                fill_intra4x4_pred_table(h);
722             for(i=0; i<16; i+=di){
723                 int mode= pred_intra_mode(h, i);
724
725                 if(!get_bits1(&s->gb)){
726                     const int rem_mode= get_bits(&s->gb, 3);
727                     mode = rem_mode + (rem_mode >= mode);
728                 }
729
730                 if(di==4)
731                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
732                 else
733                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
734             }
735             write_back_intra_pred_mode(h);
736             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
737                 return -1;
738         }else{
739             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
740             if(h->intra16x16_pred_mode < 0)
741                 return -1;
742         }
743         if(decode_chroma){
744             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
745             if(pred_mode < 0)
746                 return -1;
747             h->chroma_pred_mode= pred_mode;
748         } else {
749             h->chroma_pred_mode = DC_128_PRED8x8;
750         }
751     }else if(partition_count==4){
752         int i, j, sub_partition_count[4], list, ref[2][4];
753
754         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
755             for(i=0; i<4; i++){
756                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
757                 if(h->sub_mb_type[i] >=13){
758                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
759                     return -1;
760                 }
761                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
762                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
763             }
764             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
765                 ff_h264_pred_direct_motion(h, &mb_type);
766                 h->ref_cache[0][scan8[4]] =
767                 h->ref_cache[1][scan8[4]] =
768                 h->ref_cache[0][scan8[12]] =
769                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
770             }
771         }else{
772             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
773             for(i=0; i<4; i++){
774                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
775                 if(h->sub_mb_type[i] >=4){
776                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
777                     return -1;
778                 }
779                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
780                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
781             }
782         }
783
784         for(list=0; list<h->list_count; list++){
785             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
786             for(i=0; i<4; i++){
787                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
788                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
789                     unsigned int tmp;
790                     if(ref_count == 1){
791                         tmp= 0;
792                     }else if(ref_count == 2){
793                         tmp= get_bits1(&s->gb)^1;
794                     }else{
795                         tmp= get_ue_golomb_31(&s->gb);
796                         if(tmp>=ref_count){
797                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
798                             return -1;
799                         }
800                     }
801                     ref[list][i]= tmp;
802                 }else{
803                  //FIXME
804                     ref[list][i] = -1;
805                 }
806             }
807         }
808
809         if(dct8x8_allowed)
810             dct8x8_allowed = get_dct8x8_allowed(h);
811
812         for(list=0; list<h->list_count; list++){
813             for(i=0; i<4; i++){
814                 if(IS_DIRECT(h->sub_mb_type[i])) {
815                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
816                     continue;
817                 }
818                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
819                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
820
821                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
822                     const int sub_mb_type= h->sub_mb_type[i];
823                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
824                     for(j=0; j<sub_partition_count[i]; j++){
825                         int mx, my;
826                         const int index= 4*i + block_width*j;
827                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
828                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
829                         mx += get_se_golomb(&s->gb);
830                         my += get_se_golomb(&s->gb);
831                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
832
833                         if(IS_SUB_8X8(sub_mb_type)){
834                             mv_cache[ 1 ][0]=
835                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
836                             mv_cache[ 1 ][1]=
837                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
838                         }else if(IS_SUB_8X4(sub_mb_type)){
839                             mv_cache[ 1 ][0]= mx;
840                             mv_cache[ 1 ][1]= my;
841                         }else if(IS_SUB_4X8(sub_mb_type)){
842                             mv_cache[ 8 ][0]= mx;
843                             mv_cache[ 8 ][1]= my;
844                         }
845                         mv_cache[ 0 ][0]= mx;
846                         mv_cache[ 0 ][1]= my;
847                     }
848                 }else{
849                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
850                     p[0] = p[1]=
851                     p[8] = p[9]= 0;
852                 }
853             }
854         }
855     }else if(IS_DIRECT(mb_type)){
856         ff_h264_pred_direct_motion(h, &mb_type);
857         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
858     }else{
859         int list, mx, my, i;
860          //FIXME we should set ref_idx_l? to 0 if we use that later ...
861         if(IS_16X16(mb_type)){
862             for(list=0; list<h->list_count; list++){
863                     unsigned int val;
864                     if(IS_DIR(mb_type, 0, list)){
865                         if(h->ref_count[list]==1){
866                             val= 0;
867                         }else if(h->ref_count[list]==2){
868                             val= get_bits1(&s->gb)^1;
869                         }else{
870                             val= get_ue_golomb_31(&s->gb);
871                             if(val >= h->ref_count[list]){
872                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
873                                 return -1;
874                             }
875                         }
876                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
877                     }
878             }
879             for(list=0; list<h->list_count; list++){
880                 if(IS_DIR(mb_type, 0, list)){
881                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
882                     mx += get_se_golomb(&s->gb);
883                     my += get_se_golomb(&s->gb);
884                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
885
886                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
887                 }
888             }
889         }
890         else if(IS_16X8(mb_type)){
891             for(list=0; list<h->list_count; list++){
892                     for(i=0; i<2; i++){
893                         unsigned int val;
894                         if(IS_DIR(mb_type, i, list)){
895                             if(h->ref_count[list] == 1){
896                                 val= 0;
897                             }else if(h->ref_count[list] == 2){
898                                 val= get_bits1(&s->gb)^1;
899                             }else{
900                                 val= get_ue_golomb_31(&s->gb);
901                                 if(val >= h->ref_count[list]){
902                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
903                                     return -1;
904                                 }
905                             }
906                         }else
907                             val= LIST_NOT_USED&0xFF;
908                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
909                     }
910             }
911             for(list=0; list<h->list_count; list++){
912                 for(i=0; i<2; i++){
913                     unsigned int val;
914                     if(IS_DIR(mb_type, i, list)){
915                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
916                         mx += get_se_golomb(&s->gb);
917                         my += get_se_golomb(&s->gb);
918                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
919
920                         val= pack16to32(mx,my);
921                     }else
922                         val=0;
923                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
924                 }
925             }
926         }else{
927             assert(IS_8X16(mb_type));
928             for(list=0; list<h->list_count; list++){
929                     for(i=0; i<2; i++){
930                         unsigned int val;
931                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
932                             if(h->ref_count[list]==1){
933                                 val= 0;
934                             }else if(h->ref_count[list]==2){
935                                 val= get_bits1(&s->gb)^1;
936                             }else{
937                                 val= get_ue_golomb_31(&s->gb);
938                                 if(val >= h->ref_count[list]){
939                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
940                                     return -1;
941                                 }
942                             }
943                         }else
944                             val= LIST_NOT_USED&0xFF;
945                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
946                     }
947             }
948             for(list=0; list<h->list_count; list++){
949                 for(i=0; i<2; i++){
950                     unsigned int val;
951                     if(IS_DIR(mb_type, i, list)){
952                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
953                         mx += get_se_golomb(&s->gb);
954                         my += get_se_golomb(&s->gb);
955                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
956
957                         val= pack16to32(mx,my);
958                     }else
959                         val=0;
960                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
961                 }
962             }
963         }
964     }
965
966     if(IS_INTER(mb_type))
967         write_back_motion(h, mb_type);
968
969     if(!IS_INTRA16x16(mb_type)){
970         cbp= get_ue_golomb(&s->gb);
971
972         if(decode_chroma){
973             if(cbp > 47){
974                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
975                 return -1;
976             }
977             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
978             else                     cbp= golomb_to_inter_cbp   [cbp];
979         }else{
980             if(cbp > 15){
981                 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
982                 return -1;
983             }
984             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
985             else                     cbp= golomb_to_inter_cbp_gray[cbp];
986         }
987     }
988
989     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
990         mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
991     }
992     h->cbp=
993     h->cbp_table[mb_xy]= cbp;
994     s->current_picture.f.mb_type[mb_xy] = mb_type;
995
996     if(cbp || IS_INTRA16x16(mb_type)){
997         int i4x4, chroma_idx;
998         int dquant;
999         int ret;
1000         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1001         const uint8_t *scan, *scan8x8;
1002         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1003
1004         if(IS_INTERLACED(mb_type)){
1005             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1006             scan= s->qscale ? h->field_scan : h->field_scan_q0;
1007         }else{
1008             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1009             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1010         }
1011
1012         dquant= get_se_golomb(&s->gb);
1013
1014         s->qscale += dquant;
1015
1016         if(((unsigned)s->qscale) > max_qp){
1017             if(s->qscale<0) s->qscale+= max_qp+1;
1018             else            s->qscale-= max_qp+1;
1019             if(((unsigned)s->qscale) > max_qp){
1020                 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1021                 return -1;
1022             }
1023         }
1024
1025         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1026         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1027
1028         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1029             return -1;
1030         }
1031         h->cbp_table[mb_xy] |= ret << 12;
1032         if(CHROMA444){
1033             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1034                 return -1;
1035             }
1036             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1037                 return -1;
1038             }
1039         } else {
1040             if(cbp&0x30){
1041                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1042                     if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1043                         return -1;
1044                     }
1045             }
1046
1047             if(cbp&0x20){
1048                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1049                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1050                     for(i4x4=0; i4x4<4; i4x4++){
1051                         const int index= 16 + 16*chroma_idx + i4x4;
1052                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1053                             return -1;
1054                         }
1055                     }
1056                 }
1057             }else{
1058                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1059                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1060             }
1061         }
1062     }else{
1063         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1064         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1065         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1066     }
1067     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
1068     write_back_non_zero_count(h);
1069
1070     if(MB_MBAFF){
1071         h->ref_count[0] >>= 1;
1072         h->ref_count[1] >>= 1;
1073     }
1074
1075     return 0;
1076 }
1077