2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1591 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1616 // last_non_zero = i;
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1634 // last_non_zero = i;
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1650 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1654 level= (bias + level)>>QUANT_SHIFT;
1657 level= (bias - level)>>QUANT_SHIFT;
1666 return last_non_zero;
1669 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1676 const int luma_xy= (mx&3) + ((my&3)<<2);
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
1684 const int pic_width = 16*s->mb_width;
1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
1687 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1710 // chroma offset when predicting from a field of opposite parity
1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1719 src_cb= s->edge_emu_buffer;
1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cr= s->edge_emu_buffer;
1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1743 x_offset += 8*s->mb_x;
1744 y_offset += 8*(s->mb_y >> MB_FIELD);
1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1753 chroma_op= chroma_avg;
1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1764 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1776 x_offset += 8*s->mb_x;
1777 y_offset += 8*(s->mb_y >> MB_FIELD);
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1831 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1849 static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1865 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1869 MpegEncContext * const s = &h->s;
1870 const int mb_xy= h->mb_xy;
1871 const int mb_type= s->current_picture.mb_type[mb_xy];
1873 assert(IS_INTER(mb_type));
1875 prefetch_motion(h, 0);
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1880 &weight_op[0], &weight_avg[0],
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1885 &weight_op[1], &weight_avg[1],
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1894 &weight_op[2], &weight_avg[2],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1903 assert(IS_8X8(mb_type));
1906 const int sub_mb_type= h->sub_mb_type[i];
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1914 &weight_op[3], &weight_avg[3],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1919 &weight_op[4], &weight_avg[4],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1928 &weight_op[5], &weight_avg[5],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 assert(IS_SUB_4X4(sub_mb_type));
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[6], &weight_avg[6],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 prefetch_motion(h, 1);
1952 static av_cold void decode_init_vlc(void){
1953 static int done = 0;
1960 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1961 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1962 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1963 &chroma_dc_coeff_token_len [0], 1, 1,
1964 &chroma_dc_coeff_token_bits[0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1969 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1970 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1971 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1972 &coeff_token_len [i][0], 1, 1,
1973 &coeff_token_bits[i][0], 1, 1,
1974 INIT_VLC_USE_NEW_STATIC);
1975 offset += coeff_token_vlc_tables_size[i];
1978 * This is a one time safety check to make sure that
1979 * the packed static coeff_token_vlc table sizes
1980 * were initialized correctly.
1982 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1985 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1986 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1987 init_vlc(&chroma_dc_total_zeros_vlc[i],
1988 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1989 &chroma_dc_total_zeros_len [i][0], 1, 1,
1990 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1991 INIT_VLC_USE_NEW_STATIC);
1993 for(i=0; i<15; i++){
1994 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1995 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1996 init_vlc(&total_zeros_vlc[i],
1997 TOTAL_ZEROS_VLC_BITS, 16,
1998 &total_zeros_len [i][0], 1, 1,
1999 &total_zeros_bits[i][0], 1, 1,
2000 INIT_VLC_USE_NEW_STATIC);
2004 run_vlc[i].table = run_vlc_tables[i];
2005 run_vlc[i].table_allocated = run_vlc_tables_size;
2006 init_vlc(&run_vlc[i],
2008 &run_len [i][0], 1, 1,
2009 &run_bits[i][0], 1, 1,
2010 INIT_VLC_USE_NEW_STATIC);
2012 run7_vlc.table = run7_vlc_table,
2013 run7_vlc.table_allocated = run7_vlc_table_size;
2014 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2015 &run_len [6][0], 1, 1,
2016 &run_bits[6][0], 1, 1,
2017 INIT_VLC_USE_NEW_STATIC);
2021 static void free_tables(H264Context *h){
2024 av_freep(&h->intra4x4_pred_mode);
2025 av_freep(&h->chroma_pred_mode_table);
2026 av_freep(&h->cbp_table);
2027 av_freep(&h->mvd_table[0]);
2028 av_freep(&h->mvd_table[1]);
2029 av_freep(&h->direct_table);
2030 av_freep(&h->non_zero_count);
2031 av_freep(&h->slice_table_base);
2032 h->slice_table= NULL;
2034 av_freep(&h->mb2b_xy);
2035 av_freep(&h->mb2b8_xy);
2037 for(i = 0; i < h->s.avctx->thread_count; i++) {
2038 hx = h->thread_context[i];
2040 av_freep(&hx->top_borders[1]);
2041 av_freep(&hx->top_borders[0]);
2042 av_freep(&hx->s.obmc_scratchpad);
2046 static void init_dequant8_coeff_table(H264Context *h){
2048 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2049 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2050 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2052 for(i=0; i<2; i++ ){
2053 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2054 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2058 for(q=0; q<52; q++){
2059 int shift = div6[q];
2062 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2063 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2064 h->pps.scaling_matrix8[i][x]) << shift;
2069 static void init_dequant4_coeff_table(H264Context *h){
2071 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2072 for(i=0; i<6; i++ ){
2073 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2075 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2076 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2083 for(q=0; q<52; q++){
2084 int shift = div6[q] + 2;
2087 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2088 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2089 h->pps.scaling_matrix4[i][x]) << shift;
2094 static void init_dequant_tables(H264Context *h){
2096 init_dequant4_coeff_table(h);
2097 if(h->pps.transform_8x8_mode)
2098 init_dequant8_coeff_table(h);
2099 if(h->sps.transform_bypass){
2102 h->dequant4_coeff[i][0][x] = 1<<6;
2103 if(h->pps.transform_8x8_mode)
2106 h->dequant8_coeff[i][0][x] = 1<<6;
2113 * needs width/height
2115 static int alloc_tables(H264Context *h){
2116 MpegEncContext * const s = &h->s;
2117 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2120 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2123 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2124 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2126 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2127 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2129 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
2138 const int mb_xy= x + y*s->mb_stride;
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2147 s->obmc_scratchpad = NULL;
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2159 * Mimic alloc_tables(), but for every context thread.
2161 static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2179 * Allocate buffers which are not shared amongst multiple threads.
2181 static int context_init(H264Context *h){
2182 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2183 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2187 return -1; // free_tables will clean up for us
2190 static av_cold void common_init(H264Context *h){
2191 MpegEncContext * const s = &h->s;
2193 s->width = s->avctx->width;
2194 s->height = s->avctx->height;
2195 s->codec_id= s->avctx->codec->id;
2197 ff_h264_pred_init(&h->hpc, s->codec_id);
2199 h->dequant_coeff_pps= -1;
2200 s->unrestricted_mv=1;
2201 s->decode=1; //FIXME
2203 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2204 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2207 static av_cold int decode_init(AVCodecContext *avctx){
2208 H264Context *h= avctx->priv_data;
2209 MpegEncContext * const s = &h->s;
2211 MPV_decode_defaults(s);
2216 s->out_format = FMT_H264;
2217 s->workaround_bugs= avctx->workaround_bugs;
2220 // s->decode_mb= ff_h263_decode_mb;
2221 s->quarter_sample = 1;
2224 if(avctx->codec_id == CODEC_ID_SVQ3)
2225 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2227 avctx->pix_fmt= PIX_FMT_YUV420P;
2231 if(avctx->extradata_size > 0 && avctx->extradata &&
2232 *(char *)avctx->extradata == 1){
2239 h->thread_context[0] = h;
2240 h->outputed_poc = INT_MIN;
2241 h->prev_poc_msb= 1<<16;
2245 static int frame_start(H264Context *h){
2246 MpegEncContext * const s = &h->s;
2249 if(MPV_frame_start(s, s->avctx) < 0)
2251 ff_er_frame_start(s);
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
2255 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2256 * See decode_nal_units().
2258 s->current_picture_ptr->key_frame= 0;
2260 assert(s->linesize && s->uvlinesize);
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2279 /* some macroblocks will be accessed before they're available */
2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2283 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2285 // We mark the current picture as non-reference after allocating it, so
2286 // that if we break out due to an error it can be released automatically
2287 // in the next MPV_frame_start().
2288 // SVQ3 as well as most other codecs have only last/next/current and thus
2289 // get released even with set reference, besides SVQ3 and others do not
2290 // mark frames as reference later "naturally".
2291 if(s->codec_id != CODEC_ID_SVQ3)
2292 s->current_picture_ptr->reference= 0;
2294 s->current_picture_ptr->field_poc[0]=
2295 s->current_picture_ptr->field_poc[1]= INT_MAX;
2296 assert(s->current_picture_ptr->long_ref==0);
2301 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2302 MpegEncContext * const s = &h->s;
2311 src_cb -= uvlinesize;
2312 src_cr -= uvlinesize;
2314 if(!simple && FRAME_MBAFF){
2316 offset = MB_MBAFF ? 1 : 17;
2317 uvoffset= MB_MBAFF ? 1 : 9;
2319 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2320 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2321 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2322 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2323 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2328 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2329 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2330 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2331 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2337 top_idx = MB_MBAFF ? 0 : 1;
2339 step= MB_MBAFF ? 2 : 1;
2342 // There are two lines saved, the line above the the top macroblock of a pair,
2343 // and the line above the bottom macroblock
2344 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2345 for(i=1; i<17 - skiplast; i++){
2346 h->left_border[offset+i*step]= src_y[15+i* linesize];
2349 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2350 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2352 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2353 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2354 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2355 for(i=1; i<9 - skiplast; i++){
2356 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2357 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2359 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2360 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2364 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2365 MpegEncContext * const s = &h->s;
2376 if(!simple && FRAME_MBAFF){
2378 offset = MB_MBAFF ? 1 : 17;
2379 uvoffset= MB_MBAFF ? 1 : 9;
2383 top_idx = MB_MBAFF ? 0 : 1;
2385 step= MB_MBAFF ? 2 : 1;
2388 if(h->deblocking_filter == 2) {
2390 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2391 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2393 deblock_left = (s->mb_x > 0);
2394 deblock_top = (s->mb_y > !!MB_FIELD);
2397 src_y -= linesize + 1;
2398 src_cb -= uvlinesize + 1;
2399 src_cr -= uvlinesize + 1;
2401 #define XCHG(a,b,t,xchg)\
2408 for(i = !deblock_top; i<16; i++){
2409 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2411 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2415 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2416 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2417 if(s->mb_x+1 < s->mb_width){
2418 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2422 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2424 for(i = !deblock_top; i<8; i++){
2425 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2426 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2428 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2429 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2432 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2433 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2438 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2439 MpegEncContext * const s = &h->s;
2440 const int mb_x= s->mb_x;
2441 const int mb_y= s->mb_y;
2442 const int mb_xy= h->mb_xy;
2443 const int mb_type= s->current_picture.mb_type[mb_xy];
2444 uint8_t *dest_y, *dest_cb, *dest_cr;
2445 int linesize, uvlinesize /*dct_offset*/;
2447 int *block_offset = &h->block_offset[0];
2448 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2449 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2450 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2452 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2453 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2454 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2456 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2457 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2459 if (!simple && MB_FIELD) {
2460 linesize = h->mb_linesize = s->linesize * 2;
2461 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2462 block_offset = &h->block_offset[24];
2463 if(mb_y&1){ //FIXME move out of this function?
2464 dest_y -= s->linesize*15;
2465 dest_cb-= s->uvlinesize*7;
2466 dest_cr-= s->uvlinesize*7;
2470 for(list=0; list<h->list_count; list++){
2471 if(!USES_LIST(mb_type, list))
2473 if(IS_16X16(mb_type)){
2474 int8_t *ref = &h->ref_cache[list][scan8[0]];
2475 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2477 for(i=0; i<16; i+=4){
2478 int ref = h->ref_cache[list][scan8[i]];
2480 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2486 linesize = h->mb_linesize = s->linesize;
2487 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2488 // dct_offset = s->linesize * 16;
2491 if(transform_bypass){
2493 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2494 }else if(IS_8x8DCT(mb_type)){
2495 idct_dc_add = s->dsp.h264_idct8_dc_add;
2496 idct_add = s->dsp.h264_idct8_add;
2498 idct_dc_add = s->dsp.h264_idct_dc_add;
2499 idct_add = s->dsp.h264_idct_add;
2502 if (!simple && IS_INTRA_PCM(mb_type)) {
2503 for (i=0; i<16; i++) {
2504 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2506 for (i=0; i<8; i++) {
2507 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2508 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2511 if(IS_INTRA(mb_type)){
2512 if(h->deblocking_filter)
2513 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2515 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2516 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2517 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2520 if(IS_INTRA4x4(mb_type)){
2521 if(simple || !s->encoding){
2522 if(IS_8x8DCT(mb_type)){
2523 for(i=0; i<16; i+=4){
2524 uint8_t * const ptr= dest_y + block_offset[i];
2525 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2526 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2527 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2528 (h->topright_samples_available<<i)&0x4000, linesize);
2530 if(nnz == 1 && h->mb[i*16])
2531 idct_dc_add(ptr, h->mb + i*16, linesize);
2533 idct_add(ptr, h->mb + i*16, linesize);
2537 for(i=0; i<16; i++){
2538 uint8_t * const ptr= dest_y + block_offset[i];
2540 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2543 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2544 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2545 assert(mb_y || linesize <= block_offset[i]);
2546 if(!topright_avail){
2547 tr= ptr[3 - linesize]*0x01010101;
2548 topright= (uint8_t*) &tr;
2550 topright= ptr + 4 - linesize;
2554 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2555 nnz = h->non_zero_count_cache[ scan8[i] ];
2558 if(nnz == 1 && h->mb[i*16])
2559 idct_dc_add(ptr, h->mb + i*16, linesize);
2561 idct_add(ptr, h->mb + i*16, linesize);
2563 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2568 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2570 if(!transform_bypass)
2571 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2573 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2575 if(h->deblocking_filter)
2576 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2578 hl_motion(h, dest_y, dest_cb, dest_cr,
2579 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2580 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2581 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2585 if(!IS_INTRA4x4(mb_type)){
2587 if(IS_INTRA16x16(mb_type)){
2588 for(i=0; i<16; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ])
2590 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2591 else if(h->mb[i*16])
2592 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2595 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2596 for(i=0; i<16; i+=di){
2597 int nnz = h->non_zero_count_cache[ scan8[i] ];
2599 if(nnz==1 && h->mb[i*16])
2600 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2602 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2607 for(i=0; i<16; i++){
2608 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2609 uint8_t * const ptr= dest_y + block_offset[i];
2610 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2616 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2617 uint8_t *dest[2] = {dest_cb, dest_cr};
2618 if(transform_bypass){
2619 idct_add = idct_dc_add = s->dsp.add_pixels4;
2621 idct_add = s->dsp.h264_idct_add;
2622 idct_dc_add = s->dsp.h264_idct_dc_add;
2623 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2624 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2627 for(i=16; i<16+8; i++){
2628 if(h->non_zero_count_cache[ scan8[i] ])
2629 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2630 else if(h->mb[i*16])
2631 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2634 for(i=16; i<16+8; i++){
2635 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2636 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2637 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2643 if(h->deblocking_filter) {
2644 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2645 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2646 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2647 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2648 if (!simple && FRAME_MBAFF) {
2649 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2651 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2657 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2659 static void hl_decode_mb_simple(H264Context *h){
2660 hl_decode_mb_internal(h, 1);
2664 * Process a macroblock; this handles edge cases, such as interlacing.
2666 static void av_noinline hl_decode_mb_complex(H264Context *h){
2667 hl_decode_mb_internal(h, 0);
2670 static void hl_decode_mb(H264Context *h){
2671 MpegEncContext * const s = &h->s;
2672 const int mb_xy= h->mb_xy;
2673 const int mb_type= s->current_picture.mb_type[mb_xy];
2674 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2675 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2677 if(ENABLE_H264_ENCODER && !s->decode)
2681 hl_decode_mb_complex(h);
2682 else hl_decode_mb_simple(h);
2685 static void pic_as_field(Picture *pic, const int parity){
2687 for (i = 0; i < 4; ++i) {
2688 if (parity == PICT_BOTTOM_FIELD)
2689 pic->data[i] += pic->linesize[i];
2690 pic->reference = parity;
2691 pic->linesize[i] *= 2;
2693 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2696 static int split_field_copy(Picture *dest, Picture *src,
2697 int parity, int id_add){
2698 int match = !!(src->reference & parity);
2702 if(parity != PICT_FRAME){
2703 pic_as_field(dest, parity);
2705 dest->pic_id += id_add;
2712 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2716 while(i[0]<len || i[1]<len){
2717 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2719 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2722 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2726 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2727 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2734 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2739 best_poc= dir ? INT_MIN : INT_MAX;
2741 for(i=0; i<len; i++){
2742 const int poc= src[i]->poc;
2743 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2745 sorted[out_i]= src[i];
2748 if(best_poc == (dir ? INT_MIN : INT_MAX))
2750 limit= sorted[out_i++]->poc - dir;
2756 * fills the default_ref_list.
2758 static int fill_default_ref_list(H264Context *h){
2759 MpegEncContext * const s = &h->s;
2762 if(h->slice_type_nos==FF_B_TYPE){
2763 Picture *sorted[32];
2768 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2770 cur_poc= s->current_picture_ptr->poc;
2772 for(list= 0; list<2; list++){
2773 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2774 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2776 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2777 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2780 if(len < h->ref_count[list])
2781 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2785 if(lens[0] == lens[1] && lens[1] > 1){
2786 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2788 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2791 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2792 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2794 if(len < h->ref_count[0])
2795 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2798 for (i=0; i<h->ref_count[0]; i++) {
2799 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2801 if(h->slice_type_nos==FF_B_TYPE){
2802 for (i=0; i<h->ref_count[1]; i++) {
2803 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2810 static void print_short_term(H264Context *h);
2811 static void print_long_term(H264Context *h);
2814 * Extract structure information about the picture described by pic_num in
2815 * the current decoding context (frame or field). Note that pic_num is
2816 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2817 * @param pic_num picture number for which to extract structure information
2818 * @param structure one of PICT_XXX describing structure of picture
2820 * @return frame number (short term) or long term index of picture
2821 * described by pic_num
2823 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2824 MpegEncContext * const s = &h->s;
2826 *structure = s->picture_structure;
2829 /* opposite field */
2830 *structure ^= PICT_FRAME;
2837 static int decode_ref_pic_list_reordering(H264Context *h){
2838 MpegEncContext * const s = &h->s;
2839 int list, index, pic_structure;
2841 print_short_term(h);
2844 for(list=0; list<h->list_count; list++){
2845 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2847 if(get_bits1(&s->gb)){
2848 int pred= h->curr_pic_num;
2850 for(index=0; ; index++){
2851 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2852 unsigned int pic_id;
2854 Picture *ref = NULL;
2856 if(reordering_of_pic_nums_idc==3)
2859 if(index >= h->ref_count[list]){
2860 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2864 if(reordering_of_pic_nums_idc<3){
2865 if(reordering_of_pic_nums_idc<2){
2866 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2869 if(abs_diff_pic_num > h->max_pic_num){
2870 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2874 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2875 else pred+= abs_diff_pic_num;
2876 pred &= h->max_pic_num - 1;
2878 frame_num = pic_num_extract(h, pred, &pic_structure);
2880 for(i= h->short_ref_count-1; i>=0; i--){
2881 ref = h->short_ref[i];
2882 assert(ref->reference);
2883 assert(!ref->long_ref);
2885 ref->frame_num == frame_num &&
2886 (ref->reference & pic_structure)
2894 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2896 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2899 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2902 ref = h->long_ref[long_idx];
2903 assert(!(ref && !ref->reference));
2904 if(ref && (ref->reference & pic_structure)){
2905 ref->pic_id= pic_id;
2906 assert(ref->long_ref);
2914 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2915 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2917 for(i=index; i+1<h->ref_count[list]; i++){
2918 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2921 for(; i > index; i--){
2922 h->ref_list[list][i]= h->ref_list[list][i-1];
2924 h->ref_list[list][index]= *ref;
2926 pic_as_field(&h->ref_list[list][index], pic_structure);
2930 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2936 for(list=0; list<h->list_count; list++){
2937 for(index= 0; index < h->ref_count[list]; index++){
2938 if(!h->ref_list[list][index].data[0]){
2939 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2940 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2948 static void fill_mbaff_ref_list(H264Context *h){
2950 for(list=0; list<2; list++){ //FIXME try list_count
2951 for(i=0; i<h->ref_count[list]; i++){
2952 Picture *frame = &h->ref_list[list][i];
2953 Picture *field = &h->ref_list[list][16+2*i];
2956 field[0].linesize[j] <<= 1;
2957 field[0].reference = PICT_TOP_FIELD;
2958 field[0].poc= field[0].field_poc[0];
2959 field[1] = field[0];
2961 field[1].data[j] += frame->linesize[j];
2962 field[1].reference = PICT_BOTTOM_FIELD;
2963 field[1].poc= field[1].field_poc[1];
2965 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2966 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2968 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2969 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2973 for(j=0; j<h->ref_count[1]; j++){
2974 for(i=0; i<h->ref_count[0]; i++)
2975 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2976 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2977 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2981 static int pred_weight_table(H264Context *h){
2982 MpegEncContext * const s = &h->s;
2984 int luma_def, chroma_def;
2987 h->use_weight_chroma= 0;
2988 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2989 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2990 luma_def = 1<<h->luma_log2_weight_denom;
2991 chroma_def = 1<<h->chroma_log2_weight_denom;
2993 for(list=0; list<2; list++){
2994 for(i=0; i<h->ref_count[list]; i++){
2995 int luma_weight_flag, chroma_weight_flag;
2997 luma_weight_flag= get_bits1(&s->gb);
2998 if(luma_weight_flag){
2999 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3000 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3001 if( h->luma_weight[list][i] != luma_def
3002 || h->luma_offset[list][i] != 0)
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
3017 || h->chroma_offset[list][i][j] != 0)
3018 h->use_weight_chroma= 1;
3023 h->chroma_weight[list][i][j]= chroma_def;
3024 h->chroma_offset[list][i][j]= 0;
3029 if(h->slice_type_nos != FF_B_TYPE) break;
3031 h->use_weight= h->use_weight || h->use_weight_chroma;
3035 static void implicit_weight_table(H264Context *h){
3036 MpegEncContext * const s = &h->s;
3038 int cur_poc = s->current_picture_ptr->poc;
3040 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3041 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3043 h->use_weight_chroma= 0;
3048 h->use_weight_chroma= 2;
3049 h->luma_log2_weight_denom= 5;
3050 h->chroma_log2_weight_denom= 5;
3052 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3053 int poc0 = h->ref_list[0][ref0].poc;
3054 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3055 int poc1 = h->ref_list[1][ref1].poc;
3056 int td = av_clip(poc1 - poc0, -128, 127);
3058 int tb = av_clip(cur_poc - poc0, -128, 127);
3059 int tx = (16384 + (FFABS(td) >> 1)) / td;
3060 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3061 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3062 h->implicit_weight[ref0][ref1] = 32;
3064 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3066 h->implicit_weight[ref0][ref1] = 32;
3072 * Mark a picture as no longer needed for reference. The refmask
3073 * argument allows unreferencing of individual fields or the whole frame.
3074 * If the picture becomes entirely unreferenced, but is being held for
3075 * display purposes, it is marked as such.
3076 * @param refmask mask of fields to unreference; the mask is bitwise
3077 * anded with the reference marking of pic
3078 * @return non-zero if pic becomes entirely unreferenced (except possibly
3079 * for display purposes) zero if one of the fields remains in
3082 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3084 if (pic->reference &= refmask) {
3087 for(i = 0; h->delayed_pic[i]; i++)
3088 if(pic == h->delayed_pic[i]){
3089 pic->reference=DELAYED_PIC_REF;
3097 * instantaneous decoder refresh.
3099 static void idr(H264Context *h){
3102 for(i=0; i<16; i++){
3103 remove_long(h, i, 0);
3105 assert(h->long_ref_count==0);
3107 for(i=0; i<h->short_ref_count; i++){
3108 unreference_pic(h, h->short_ref[i], 0);
3109 h->short_ref[i]= NULL;
3111 h->short_ref_count=0;
3112 h->prev_frame_num= 0;
3113 h->prev_frame_num_offset= 0;
3118 /* forget old pics after a seek */
3119 static void flush_dpb(AVCodecContext *avctx){
3120 H264Context *h= avctx->priv_data;
3122 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3123 if(h->delayed_pic[i])
3124 h->delayed_pic[i]->reference= 0;
3125 h->delayed_pic[i]= NULL;
3127 h->outputed_poc= INT_MIN;
3129 if(h->s.current_picture_ptr)
3130 h->s.current_picture_ptr->reference= 0;
3131 h->s.first_field= 0;
3132 ff_mpeg_flush(avctx);
3136 * Find a Picture in the short term reference list by frame number.
3137 * @param frame_num frame number to search for
3138 * @param idx the index into h->short_ref where returned picture is found
3139 * undefined if no picture found.
3140 * @return pointer to the found picture, or NULL if no pic with the provided
3141 * frame number is found
3143 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3144 MpegEncContext * const s = &h->s;
3147 for(i=0; i<h->short_ref_count; i++){
3148 Picture *pic= h->short_ref[i];
3149 if(s->avctx->debug&FF_DEBUG_MMCO)
3150 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3151 if(pic->frame_num == frame_num) {
3160 * Remove a picture from the short term reference list by its index in
3161 * that list. This does no checking on the provided index; it is assumed
3162 * to be valid. Other list entries are shifted down.
3163 * @param i index into h->short_ref of picture to remove.
3165 static void remove_short_at_index(H264Context *h, int i){
3166 assert(i >= 0 && i < h->short_ref_count);
3167 h->short_ref[i]= NULL;
3168 if (--h->short_ref_count)
3169 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3174 * @return the removed picture or NULL if an error occurs
3176 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3177 MpegEncContext * const s = &h->s;
3181 if(s->avctx->debug&FF_DEBUG_MMCO)
3182 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3184 pic = find_short(h, frame_num, &i);
3186 if(unreference_pic(h, pic, ref_mask))
3187 remove_short_at_index(h, i);
3194 * Remove a picture from the long term reference list by its index in
3196 * @return the removed picture or NULL if an error occurs
3198 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3201 pic= h->long_ref[i];
3203 if(unreference_pic(h, pic, ref_mask)){
3204 assert(h->long_ref[i]->long_ref == 1);
3205 h->long_ref[i]->long_ref= 0;
3206 h->long_ref[i]= NULL;
3207 h->long_ref_count--;
3215 * print short term list
3217 static void print_short_term(H264Context *h) {
3219 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3220 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3221 for(i=0; i<h->short_ref_count; i++){
3222 Picture *pic= h->short_ref[i];
3223 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3229 * print long term list
3231 static void print_long_term(H264Context *h) {
3233 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3234 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3235 for(i = 0; i < 16; i++){
3236 Picture *pic= h->long_ref[i];
3238 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3245 * Executes the reference picture marking (memory management control operations).
3247 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3248 MpegEncContext * const s = &h->s;
3250 int current_ref_assigned=0;
3253 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3256 for(i=0; i<mmco_count; i++){
3257 int structure, frame_num;
3258 if(s->avctx->debug&FF_DEBUG_MMCO)
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3261 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3262 || mmco[i].opcode == MMCO_SHORT2LONG){
3263 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3264 pic = find_short(h, frame_num, &j);
3266 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3267 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3268 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3273 switch(mmco[i].opcode){
3274 case MMCO_SHORT2UNUSED:
3275 if(s->avctx->debug&FF_DEBUG_MMCO)
3276 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3277 remove_short(h, frame_num, structure ^ PICT_FRAME);
3279 case MMCO_SHORT2LONG:
3280 if (h->long_ref[mmco[i].long_arg] != pic)
3281 remove_long(h, mmco[i].long_arg, 0);
3283 remove_short_at_index(h, j);
3284 h->long_ref[ mmco[i].long_arg ]= pic;
3285 if (h->long_ref[ mmco[i].long_arg ]){
3286 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3287 h->long_ref_count++;
3290 case MMCO_LONG2UNUSED:
3291 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3292 pic = h->long_ref[j];
3294 remove_long(h, j, structure ^ PICT_FRAME);
3295 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3296 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3299 // Comment below left from previous code as it is an interresting note.
3300 /* First field in pair is in short term list or
3301 * at a different long term index.
3302 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3303 * Report the problem and keep the pair where it is,
3304 * and mark this field valid.
3307 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3308 remove_long(h, mmco[i].long_arg, 0);
3310 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3311 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3312 h->long_ref_count++;
3315 s->current_picture_ptr->reference |= s->picture_structure;
3316 current_ref_assigned=1;
3318 case MMCO_SET_MAX_LONG:
3319 assert(mmco[i].long_arg <= 16);
3320 // just remove the long term which index is greater than new max
3321 for(j = mmco[i].long_arg; j<16; j++){
3322 remove_long(h, j, 0);
3326 while(h->short_ref_count){
3327 remove_short(h, h->short_ref[0]->frame_num, 0);
3329 for(j = 0; j < 16; j++) {
3330 remove_long(h, j, 0);
3332 s->current_picture_ptr->poc=
3333 s->current_picture_ptr->field_poc[0]=
3334 s->current_picture_ptr->field_poc[1]=
3338 s->current_picture_ptr->frame_num= 0;
3344 if (!current_ref_assigned) {
3345 /* Second field of complementary field pair; the first field of
3346 * which is already referenced. If short referenced, it
3347 * should be first entry in short_ref. If not, it must exist
3348 * in long_ref; trying to put it on the short list here is an
3349 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3351 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3352 /* Just mark the second field valid */
3353 s->current_picture_ptr->reference = PICT_FRAME;
3354 } else if (s->current_picture_ptr->long_ref) {
3355 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3356 "assignment for second field "
3357 "in complementary field pair "
3358 "(first field is long term)\n");
3360 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3362 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3365 if(h->short_ref_count)
3366 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3368 h->short_ref[0]= s->current_picture_ptr;
3369 h->short_ref_count++;
3370 s->current_picture_ptr->reference |= s->picture_structure;
3374 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3376 /* We have too many reference frames, probably due to corrupted
3377 * stream. Need to discard one frame. Prevents overrun of the
3378 * short_ref and long_ref buffers.
3380 av_log(h->s.avctx, AV_LOG_ERROR,
3381 "number of reference frames exceeds max (probably "
3382 "corrupt input), discarding one\n");
3384 if (h->long_ref_count && !h->short_ref_count) {
3385 for (i = 0; i < 16; ++i)
3390 remove_long(h, i, 0);
3392 pic = h->short_ref[h->short_ref_count - 1];
3393 remove_short(h, pic->frame_num, 0);
3397 print_short_term(h);
3402 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3403 MpegEncContext * const s = &h->s;
3407 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3408 s->broken_link= get_bits1(gb) -1;
3410 h->mmco[0].opcode= MMCO_LONG;
3411 h->mmco[0].long_arg= 0;
3415 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3416 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3417 MMCOOpcode opcode= get_ue_golomb(gb);
3419 h->mmco[i].opcode= opcode;
3420 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3421 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3422 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3423 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3427 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3428 unsigned int long_arg= get_ue_golomb(gb);
3429 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3430 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3433 h->mmco[i].long_arg= long_arg;
3436 if(opcode > (unsigned)MMCO_LONG){
3437 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3440 if(opcode == MMCO_END)
3445 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3447 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3448 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3449 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3450 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3452 if (FIELD_PICTURE) {
3453 h->mmco[0].short_pic_num *= 2;
3454 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3455 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3465 static int init_poc(H264Context *h){
3466 MpegEncContext * const s = &h->s;
3467 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3469 Picture *cur = s->current_picture_ptr;
3471 h->frame_num_offset= h->prev_frame_num_offset;
3472 if(h->frame_num < h->prev_frame_num)
3473 h->frame_num_offset += max_frame_num;
3475 if(h->sps.poc_type==0){
3476 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3478 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3479 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3480 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3481 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3483 h->poc_msb = h->prev_poc_msb;
3484 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3486 field_poc[1] = h->poc_msb + h->poc_lsb;
3487 if(s->picture_structure == PICT_FRAME)
3488 field_poc[1] += h->delta_poc_bottom;
3489 }else if(h->sps.poc_type==1){
3490 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3493 if(h->sps.poc_cycle_length != 0)
3494 abs_frame_num = h->frame_num_offset + h->frame_num;
3498 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3501 expected_delta_per_poc_cycle = 0;
3502 for(i=0; i < h->sps.poc_cycle_length; i++)
3503 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3505 if(abs_frame_num > 0){
3506 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3507 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3509 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3510 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3511 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3515 if(h->nal_ref_idc == 0)
3516 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3518 field_poc[0] = expectedpoc + h->delta_poc[0];
3519 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3521 if(s->picture_structure == PICT_FRAME)
3522 field_poc[1] += h->delta_poc[1];
3524 int poc= 2*(h->frame_num_offset + h->frame_num);
3533 if(s->picture_structure != PICT_BOTTOM_FIELD)
3534 s->current_picture_ptr->field_poc[0]= field_poc[0];
3535 if(s->picture_structure != PICT_TOP_FIELD)
3536 s->current_picture_ptr->field_poc[1]= field_poc[1];
3537 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3544 * initialize scan tables
3546 static void init_scan_tables(H264Context *h){
3547 MpegEncContext * const s = &h->s;
3549 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3550 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3551 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3553 for(i=0; i<16; i++){
3554 #define T(x) (x>>2) | ((x<<2) & 0xF)
3555 h->zigzag_scan[i] = T(zigzag_scan[i]);
3556 h-> field_scan[i] = T( field_scan[i]);
3560 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3561 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3562 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3563 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3564 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3566 for(i=0; i<64; i++){
3567 #define T(x) (x>>3) | ((x&7)<<3)
3568 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3569 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3570 h->field_scan8x8[i] = T(field_scan8x8[i]);
3571 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3575 if(h->sps.transform_bypass){ //FIXME same ugly
3576 h->zigzag_scan_q0 = zigzag_scan;
3577 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3578 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3579 h->field_scan_q0 = field_scan;
3580 h->field_scan8x8_q0 = field_scan8x8;
3581 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3583 h->zigzag_scan_q0 = h->zigzag_scan;
3584 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3585 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3586 h->field_scan_q0 = h->field_scan;
3587 h->field_scan8x8_q0 = h->field_scan8x8;
3588 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3593 * Replicates H264 "master" context to thread contexts.
3595 static void clone_slice(H264Context *dst, H264Context *src)
3597 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3598 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3599 dst->s.current_picture = src->s.current_picture;
3600 dst->s.linesize = src->s.linesize;
3601 dst->s.uvlinesize = src->s.uvlinesize;
3602 dst->s.first_field = src->s.first_field;
3604 dst->prev_poc_msb = src->prev_poc_msb;
3605 dst->prev_poc_lsb = src->prev_poc_lsb;
3606 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3607 dst->prev_frame_num = src->prev_frame_num;
3608 dst->short_ref_count = src->short_ref_count;
3610 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3611 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3612 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3613 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3615 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3616 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3620 * decodes a slice header.
3621 * This will also call MPV_common_init() and frame_start() as needed.
3623 * @param h h264context
3624 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3626 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3628 static int decode_slice_header(H264Context *h, H264Context *h0){
3629 MpegEncContext * const s = &h->s;
3630 MpegEncContext * const s0 = &h0->s;
3631 unsigned int first_mb_in_slice;
3632 unsigned int pps_id;
3633 int num_ref_idx_active_override_flag;
3634 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3635 unsigned int slice_type, tmp, i, j;
3636 int default_ref_list_done = 0;
3637 int last_pic_structure;
3639 s->dropable= h->nal_ref_idc == 0;
3641 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3642 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3643 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3645 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3646 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3649 first_mb_in_slice= get_ue_golomb(&s->gb);
3651 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3652 h0->current_slice = 0;
3653 if (!s0->first_field)
3654 s->current_picture_ptr= NULL;
3657 slice_type= get_ue_golomb(&s->gb);
3659 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3664 h->slice_type_fixed=1;
3666 h->slice_type_fixed=0;
3668 slice_type= slice_type_map[ slice_type ];
3669 if (slice_type == FF_I_TYPE
3670 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3671 default_ref_list_done = 1;
3673 h->slice_type= slice_type;
3674 h->slice_type_nos= slice_type & 3;
3676 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3677 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3678 av_log(h->s.avctx, AV_LOG_ERROR,
3679 "B picture before any references, skipping\n");
3683 pps_id= get_ue_golomb(&s->gb);
3684 if(pps_id>=MAX_PPS_COUNT){
3685 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3688 if(!h0->pps_buffers[pps_id]) {
3689 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3692 h->pps= *h0->pps_buffers[pps_id];
3694 if(!h0->sps_buffers[h->pps.sps_id]) {
3695 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3698 h->sps = *h0->sps_buffers[h->pps.sps_id];
3700 if(h == h0 && h->dequant_coeff_pps != pps_id){
3701 h->dequant_coeff_pps = pps_id;
3702 init_dequant_tables(h);
3705 s->mb_width= h->sps.mb_width;
3706 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3708 h->b_stride= s->mb_width*4;
3709 h->b8_stride= s->mb_width*2;
3711 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3712 if(h->sps.frame_mbs_only_flag)
3713 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3715 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3717 if (s->context_initialized
3718 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3720 return -1; // width / height changed during parallelized decoding
3724 if (!s->context_initialized) {
3726 return -1; // we cant (re-)initialize context during parallel decoding
3727 if (MPV_common_init(s) < 0)
3731 init_scan_tables(h);
3734 for(i = 1; i < s->avctx->thread_count; i++) {
3736 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3737 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3738 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3741 init_scan_tables(c);
3745 for(i = 0; i < s->avctx->thread_count; i++)
3746 if(context_init(h->thread_context[i]) < 0)
3749 s->avctx->width = s->width;
3750 s->avctx->height = s->height;
3751 s->avctx->sample_aspect_ratio= h->sps.sar;
3752 if(!s->avctx->sample_aspect_ratio.den)
3753 s->avctx->sample_aspect_ratio.den = 1;
3755 if(h->sps.timing_info_present_flag){
3756 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3757 if(h->x264_build > 0 && h->x264_build < 44)
3758 s->avctx->time_base.den *= 2;
3759 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3760 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3764 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3767 h->mb_aff_frame = 0;
3768 last_pic_structure = s0->picture_structure;
3769 if(h->sps.frame_mbs_only_flag){
3770 s->picture_structure= PICT_FRAME;
3772 if(get_bits1(&s->gb)) { //field_pic_flag
3773 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3775 s->picture_structure= PICT_FRAME;
3776 h->mb_aff_frame = h->sps.mb_aff;
3779 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3781 if(h0->current_slice == 0){
3782 while(h->frame_num != h->prev_frame_num &&
3783 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3784 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3786 h->prev_frame_num++;
3787 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3788 s->current_picture_ptr->frame_num= h->prev_frame_num;
3789 execute_ref_pic_marking(h, NULL, 0);
3792 /* See if we have a decoded first field looking for a pair... */
3793 if (s0->first_field) {
3794 assert(s0->current_picture_ptr);
3795 assert(s0->current_picture_ptr->data[0]);
3796 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3798 /* figure out if we have a complementary field pair */
3799 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3801 * Previous field is unmatched. Don't display it, but let it
3802 * remain for reference if marked as such.
3804 s0->current_picture_ptr = NULL;
3805 s0->first_field = FIELD_PICTURE;
3808 if (h->nal_ref_idc &&
3809 s0->current_picture_ptr->reference &&
3810 s0->current_picture_ptr->frame_num != h->frame_num) {
3812 * This and previous field were reference, but had
3813 * different frame_nums. Consider this field first in
3814 * pair. Throw away previous field except for reference
3817 s0->first_field = 1;
3818 s0->current_picture_ptr = NULL;
3821 /* Second field in complementary pair */
3822 s0->first_field = 0;
3827 /* Frame or first field in a potentially complementary pair */
3828 assert(!s0->current_picture_ptr);
3829 s0->first_field = FIELD_PICTURE;
3832 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3833 s0->first_field = 0;
3840 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3842 assert(s->mb_num == s->mb_width * s->mb_height);
3843 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3844 first_mb_in_slice >= s->mb_num){
3845 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3848 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3849 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3850 if (s->picture_structure == PICT_BOTTOM_FIELD)
3851 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3852 assert(s->mb_y < s->mb_height);
3854 if(s->picture_structure==PICT_FRAME){
3855 h->curr_pic_num= h->frame_num;
3856 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3858 h->curr_pic_num= 2*h->frame_num + 1;
3859 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3862 if(h->nal_unit_type == NAL_IDR_SLICE){
3863 get_ue_golomb(&s->gb); /* idr_pic_id */
3866 if(h->sps.poc_type==0){
3867 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3869 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3870 h->delta_poc_bottom= get_se_golomb(&s->gb);
3874 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3875 h->delta_poc[0]= get_se_golomb(&s->gb);
3877 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3878 h->delta_poc[1]= get_se_golomb(&s->gb);
3883 if(h->pps.redundant_pic_cnt_present){
3884 h->redundant_pic_count= get_ue_golomb(&s->gb);
3887 //set defaults, might be overridden a few lines later
3888 h->ref_count[0]= h->pps.ref_count[0];
3889 h->ref_count[1]= h->pps.ref_count[1];
3891 if(h->slice_type_nos != FF_I_TYPE){
3892 if(h->slice_type_nos == FF_B_TYPE){
3893 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3895 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3897 if(num_ref_idx_active_override_flag){
3898 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3899 if(h->slice_type_nos==FF_B_TYPE)
3900 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3902 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3903 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3904 h->ref_count[0]= h->ref_count[1]= 1;
3908 if(h->slice_type_nos == FF_B_TYPE)
3915 if(!default_ref_list_done){
3916 fill_default_ref_list(h);
3919 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3922 if(h->slice_type_nos!=FF_I_TYPE){
3923 s->last_picture_ptr= &h->ref_list[0][0];
3924 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3926 if(h->slice_type_nos==FF_B_TYPE){
3927 s->next_picture_ptr= &h->ref_list[1][0];
3928 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3931 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3932 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3933 pred_weight_table(h);
3934 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3935 implicit_weight_table(h);
3940 decode_ref_pic_marking(h0, &s->gb);
3943 fill_mbaff_ref_list(h);
3945 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3946 direct_dist_scale_factor(h);
3947 direct_ref_list_init(h);
3949 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3950 tmp = get_ue_golomb(&s->gb);
3952 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3955 h->cabac_init_idc= tmp;
3958 h->last_qscale_diff = 0;
3959 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3961 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3965 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3966 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3967 //FIXME qscale / qp ... stuff
3968 if(h->slice_type == FF_SP_TYPE){
3969 get_bits1(&s->gb); /* sp_for_switch_flag */
3971 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3972 get_se_golomb(&s->gb); /* slice_qs_delta */
3975 h->deblocking_filter = 1;
3976 h->slice_alpha_c0_offset = 0;
3977 h->slice_beta_offset = 0;
3978 if( h->pps.deblocking_filter_parameters_present ) {
3979 tmp= get_ue_golomb(&s->gb);
3981 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3984 h->deblocking_filter= tmp;
3985 if(h->deblocking_filter < 2)
3986 h->deblocking_filter^= 1; // 1<->0
3988 if( h->deblocking_filter ) {
3989 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3990 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3994 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3995 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3996 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3997 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3998 h->deblocking_filter= 0;
4000 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4001 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4002 /* Cheat slightly for speed:
4003 Do not bother to deblock across slices. */
4004 h->deblocking_filter = 2;
4006 h0->max_contexts = 1;
4007 if(!h0->single_decode_warning) {
4008 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4009 h0->single_decode_warning = 1;
4012 return 1; // deblocking switched inside frame
4017 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4018 slice_group_change_cycle= get_bits(&s->gb, ?);
4021 h0->last_slice_type = slice_type;
4022 h->slice_num = ++h0->current_slice;
4023 if(h->slice_num >= MAX_SLICES){
4024 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4028 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4032 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4033 +(h->ref_list[j][i].reference&3);
4036 for(i=16; i<48; i++)
4037 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4038 +(h->ref_list[j][i].reference&3);
4041 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4042 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4044 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4045 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4047 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4049 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4050 pps_id, h->frame_num,
4051 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4052 h->ref_count[0], h->ref_count[1],
4054 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4056 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4057 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4067 static inline int get_level_prefix(GetBitContext *gb){
4071 OPEN_READER(re, gb);
4072 UPDATE_CACHE(re, gb);
4073 buf=GET_CACHE(re, gb);
4075 log= 32 - av_log2(buf);
4077 print_bin(buf>>(32-log), log);
4078 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4081 LAST_SKIP_BITS(re, gb, log);
4082 CLOSE_READER(re, gb);
4087 static inline int get_dct8x8_allowed(H264Context *h){
4090 if(!IS_SUB_8X8(h->sub_mb_type[i])
4091 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4098 * decodes a residual block.
4099 * @param n block index
4100 * @param scantable scantable
4101 * @param max_coeff number of coefficients in the block
4102 * @return <0 if an error occurred
4104 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4105 MpegEncContext * const s = &h->s;
4106 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4108 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4110 //FIXME put trailing_onex into the context
4112 if(n == CHROMA_DC_BLOCK_INDEX){
4113 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4114 total_coeff= coeff_token>>2;
4116 if(n == LUMA_DC_BLOCK_INDEX){
4117 total_coeff= pred_non_zero_count(h, 0);
4118 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4119 total_coeff= coeff_token>>2;
4121 total_coeff= pred_non_zero_count(h, n);
4122 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4123 total_coeff= coeff_token>>2;
4124 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4128 //FIXME set last_non_zero?
4132 if(total_coeff > (unsigned)max_coeff) {
4133 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4137 trailing_ones= coeff_token&3;
4138 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4139 assert(total_coeff<=16);
4141 for(i=0; i<trailing_ones; i++){
4142 level[i]= 1 - 2*get_bits1(gb);
4146 int level_code, mask;
4147 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4148 int prefix= get_level_prefix(gb);
4150 //first coefficient has suffix_length equal to 0 or 1
4151 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4153 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4155 level_code= (prefix<<suffix_length); //part
4156 }else if(prefix==14){
4158 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4160 level_code= prefix + get_bits(gb, 4); //part
4162 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4163 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4165 level_code += (1<<(prefix-3))-4096;
4168 if(trailing_ones < 3) level_code += 2;
4173 mask= -(level_code&1);
4174 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4177 //remaining coefficients have suffix_length > 0
4178 for(;i<total_coeff;i++) {
4179 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4180 prefix = get_level_prefix(gb);
4182 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4184 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4186 level_code += (1<<(prefix-3))-4096;
4188 mask= -(level_code&1);
4189 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4190 if(level_code > suffix_limit[suffix_length])
4195 if(total_coeff == max_coeff)
4198 if(n == CHROMA_DC_BLOCK_INDEX)
4199 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4201 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4204 coeff_num = zeros_left + total_coeff - 1;
4205 j = scantable[coeff_num];
4207 block[j] = level[0];
4208 for(i=1;i<total_coeff;i++) {
4211 else if(zeros_left < 7){
4212 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4214 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4216 zeros_left -= run_before;
4217 coeff_num -= 1 + run_before;
4218 j= scantable[ coeff_num ];
4223 block[j] = (level[0] * qmul[j] + 32)>>6;
4224 for(i=1;i<total_coeff;i++) {
4227 else if(zeros_left < 7){
4228 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4230 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4232 zeros_left -= run_before;
4233 coeff_num -= 1 + run_before;
4234 j= scantable[ coeff_num ];
4236 block[j]= (level[i] * qmul[j] + 32)>>6;
4241 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4248 static void predict_field_decoding_flag(H264Context *h){
4249 MpegEncContext * const s = &h->s;
4250 const int mb_xy= h->mb_xy;
4251 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4252 ? s->current_picture.mb_type[mb_xy-1]
4253 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4254 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4256 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4260 * decodes a P_SKIP or B_SKIP macroblock
4262 static void decode_mb_skip(H264Context *h){
4263 MpegEncContext * const s = &h->s;
4264 const int mb_xy= h->mb_xy;
4267 memset(h->non_zero_count[mb_xy], 0, 16);
4268 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4271 mb_type|= MB_TYPE_INTERLACED;
4273 if( h->slice_type_nos == FF_B_TYPE )
4275 // just for fill_caches. pred_direct_motion will set the real mb_type
4276 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4278 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4279 pred_direct_motion(h, &mb_type);
4280 mb_type|= MB_TYPE_SKIP;
4285 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4287 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4288 pred_pskip_motion(h, &mx, &my);
4289 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4290 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4293 write_back_motion(h, mb_type);
4294 s->current_picture.mb_type[mb_xy]= mb_type;
4295 s->current_picture.qscale_table[mb_xy]= s->qscale;
4296 h->slice_table[ mb_xy ]= h->slice_num;
4297 h->prev_mb_skipped= 1;
4301 * decodes a macroblock
4302 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4304 static int decode_mb_cavlc(H264Context *h){
4305 MpegEncContext * const s = &h->s;
4307 int partition_count;
4308 unsigned int mb_type, cbp;
4309 int dct8x8_allowed= h->pps.transform_8x8_mode;
4311 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4313 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4315 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4316 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4318 if(h->slice_type_nos != FF_I_TYPE){
4319 if(s->mb_skip_run==-1)
4320 s->mb_skip_run= get_ue_golomb(&s->gb);
4322 if (s->mb_skip_run--) {
4323 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4324 if(s->mb_skip_run==0)
4325 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4327 predict_field_decoding_flag(h);
4334 if( (s->mb_y&1) == 0 )
4335 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4338 h->prev_mb_skipped= 0;
4340 mb_type= get_ue_golomb(&s->gb);
4341 if(h->slice_type_nos == FF_B_TYPE){
4343 partition_count= b_mb_type_info[mb_type].partition_count;
4344 mb_type= b_mb_type_info[mb_type].type;
4347 goto decode_intra_mb;
4349 }else if(h->slice_type_nos == FF_P_TYPE){
4351 partition_count= p_mb_type_info[mb_type].partition_count;
4352 mb_type= p_mb_type_info[mb_type].type;
4355 goto decode_intra_mb;
4358 assert(h->slice_type_nos == FF_I_TYPE);
4359 if(h->slice_type == FF_SI_TYPE && mb_type)
4363 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4367 cbp= i_mb_type_info[mb_type].cbp;
4368 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4369 mb_type= i_mb_type_info[mb_type].type;
4373 mb_type |= MB_TYPE_INTERLACED;
4375 h->slice_table[ mb_xy ]= h->slice_num;
4377 if(IS_INTRA_PCM(mb_type)){
4380 // We assume these blocks are very rare so we do not optimize it.
4381 align_get_bits(&s->gb);
4383 // The pixels are stored in the same order as levels in h->mb array.
4384 for(x=0; x < (CHROMA ? 384 : 256); x++){
4385 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4388 // In deblocking, the quantizer is 0
4389 s->current_picture.qscale_table[mb_xy]= 0;
4390 // All coeffs are present
4391 memset(h->non_zero_count[mb_xy], 16, 16);
4393 s->current_picture.mb_type[mb_xy]= mb_type;
4398 h->ref_count[0] <<= 1;
4399 h->ref_count[1] <<= 1;
4402 fill_caches(h, mb_type, 0);
4405 if(IS_INTRA(mb_type)){
4407 // init_top_left_availability(h);
4408 if(IS_INTRA4x4(mb_type)){
4411 if(dct8x8_allowed && get_bits1(&s->gb)){
4412 mb_type |= MB_TYPE_8x8DCT;
4416 // fill_intra4x4_pred_table(h);
4417 for(i=0; i<16; i+=di){
4418 int mode= pred_intra_mode(h, i);
4420 if(!get_bits1(&s->gb)){
4421 const int rem_mode= get_bits(&s->gb, 3);
4422 mode = rem_mode + (rem_mode >= mode);
4426 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4428 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4430 write_back_intra_pred_mode(h);
4431 if( check_intra4x4_pred_mode(h) < 0)
4434 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4435 if(h->intra16x16_pred_mode < 0)
4439 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4442 h->chroma_pred_mode= pred_mode;
4444 }else if(partition_count==4){
4445 int i, j, sub_partition_count[4], list, ref[2][4];
4447 if(h->slice_type_nos == FF_B_TYPE){
4449 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4450 if(h->sub_mb_type[i] >=13){
4451 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4454 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4455 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4457 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4458 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4459 pred_direct_motion(h, &mb_type);
4460 h->ref_cache[0][scan8[4]] =
4461 h->ref_cache[1][scan8[4]] =
4462 h->ref_cache[0][scan8[12]] =
4463 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4466 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4468 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4469 if(h->sub_mb_type[i] >=4){
4470 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4473 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4474 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4478 for(list=0; list<h->list_count; list++){
4479 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4481 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4482 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4483 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4485 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4497 dct8x8_allowed = get_dct8x8_allowed(h);
4499 for(list=0; list<h->list_count; list++){
4501 if(IS_DIRECT(h->sub_mb_type[i])) {
4502 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4505 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4506 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4508 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4509 const int sub_mb_type= h->sub_mb_type[i];
4510 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4511 for(j=0; j<sub_partition_count[i]; j++){
4513 const int index= 4*i + block_width*j;
4514 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4515 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4516 mx += get_se_golomb(&s->gb);
4517 my += get_se_golomb(&s->gb);
4518 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4520 if(IS_SUB_8X8(sub_mb_type)){
4522 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4524 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4525 }else if(IS_SUB_8X4(sub_mb_type)){
4526 mv_cache[ 1 ][0]= mx;
4527 mv_cache[ 1 ][1]= my;
4528 }else if(IS_SUB_4X8(sub_mb_type)){
4529 mv_cache[ 8 ][0]= mx;
4530 mv_cache[ 8 ][1]= my;
4532 mv_cache[ 0 ][0]= mx;
4533 mv_cache[ 0 ][1]= my;
4536 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4542 }else if(IS_DIRECT(mb_type)){
4543 pred_direct_motion(h, &mb_type);
4544 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4546 int list, mx, my, i;
4547 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4548 if(IS_16X16(mb_type)){
4549 for(list=0; list<h->list_count; list++){
4551 if(IS_DIR(mb_type, 0, list)){
4552 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4553 if(val >= h->ref_count[list]){
4554 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4558 val= LIST_NOT_USED&0xFF;
4559 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4561 for(list=0; list<h->list_count; list++){
4563 if(IS_DIR(mb_type, 0, list)){
4564 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4565 mx += get_se_golomb(&s->gb);
4566 my += get_se_golomb(&s->gb);
4567 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4569 val= pack16to32(mx,my);
4572 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4575 else if(IS_16X8(mb_type)){
4576 for(list=0; list<h->list_count; list++){
4579 if(IS_DIR(mb_type, i, list)){
4580 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4581 if(val >= h->ref_count[list]){
4582 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4586 val= LIST_NOT_USED&0xFF;
4587 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4590 for(list=0; list<h->list_count; list++){
4593 if(IS_DIR(mb_type, i, list)){
4594 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4595 mx += get_se_golomb(&s->gb);
4596 my += get_se_golomb(&s->gb);
4597 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4599 val= pack16to32(mx,my);
4602 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4606 assert(IS_8X16(mb_type));
4607 for(list=0; list<h->list_count; list++){
4610 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4611 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4612 if(val >= h->ref_count[list]){
4613 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4617 val= LIST_NOT_USED&0xFF;
4618 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4621 for(list=0; list<h->list_count; list++){
4624 if(IS_DIR(mb_type, i, list)){
4625 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4626 mx += get_se_golomb(&s->gb);
4627 my += get_se_golomb(&s->gb);
4628 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4630 val= pack16to32(mx,my);
4633 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4639 if(IS_INTER(mb_type))
4640 write_back_motion(h, mb_type);
4642 if(!IS_INTRA16x16(mb_type)){
4643 cbp= get_ue_golomb(&s->gb);
4645 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4650 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4651 else cbp= golomb_to_inter_cbp [cbp];
4653 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4654 else cbp= golomb_to_inter_cbp_gray[cbp];
4659 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4660 if(get_bits1(&s->gb)){
4661 mb_type |= MB_TYPE_8x8DCT;
4662 h->cbp_table[mb_xy]= cbp;
4665 s->current_picture.mb_type[mb_xy]= mb_type;
4667 if(cbp || IS_INTRA16x16(mb_type)){
4668 int i8x8, i4x4, chroma_idx;
4670 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4671 const uint8_t *scan, *scan8x8, *dc_scan;
4673 // fill_non_zero_count_cache(h);
4675 if(IS_INTERLACED(mb_type)){
4676 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4677 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4678 dc_scan= luma_dc_field_scan;
4680 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4681 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4682 dc_scan= luma_dc_zigzag_scan;
4685 dquant= get_se_golomb(&s->gb);
4687 if( dquant > 25 || dquant < -26 ){
4688 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4692 s->qscale += dquant;
4693 if(((unsigned)s->qscale) > 51){
4694 if(s->qscale<0) s->qscale+= 52;
4695 else s->qscale-= 52;
4698 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4699 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4700 if(IS_INTRA16x16(mb_type)){
4701 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4702 return -1; //FIXME continue if partitioned and other return -1 too
4705 assert((cbp&15) == 0 || (cbp&15) == 15);
4708 for(i8x8=0; i8x8<4; i8x8++){
4709 for(i4x4=0; i4x4<4; i4x4++){
4710 const int index= i4x4 + 4*i8x8;
4711 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4717 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4720 for(i8x8=0; i8x8<4; i8x8++){
4721 if(cbp & (1<<i8x8)){
4722 if(IS_8x8DCT(mb_type)){
4723 DCTELEM *buf = &h->mb[64*i8x8];
4725 for(i4x4=0; i4x4<4; i4x4++){
4726 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4727 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4730 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4731 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4733 for(i4x4=0; i4x4<4; i4x4++){
4734 const int index= i4x4 + 4*i8x8;
4736 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4742 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4743 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4749 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4750 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4756 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4757 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4758 for(i4x4=0; i4x4<4; i4x4++){
4759 const int index= 16 + 4*chroma_idx + i4x4;
4760 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4766 uint8_t * const nnz= &h->non_zero_count_cache[0];
4767 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4768 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4771 uint8_t * const nnz= &h->non_zero_count_cache[0];
4772 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4773 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4774 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4776 s->current_picture.qscale_table[mb_xy]= s->qscale;
4777 write_back_non_zero_count(h);
4780 h->ref_count[0] >>= 1;
4781 h->ref_count[1] >>= 1;
4787 static int decode_cabac_field_decoding_flag(H264Context *h) {
4788 MpegEncContext * const s = &h->s;
4789 const int mb_x = s->mb_x;
4790 const int mb_y = s->mb_y & ~1;
4791 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4792 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4794 unsigned int ctx = 0;
4796 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4799 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4803 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4806 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4807 uint8_t *state= &h->cabac_state[ctx_base];
4811 MpegEncContext * const s = &h->s;
4812 const int mba_xy = h->left_mb_xy[0];
4813 const int mbb_xy = h->top_mb_xy;
4815 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4817 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4819 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4820 return 0; /* I4x4 */
4823 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4824 return 0; /* I4x4 */
4827 if( get_cabac_terminate( &h->cabac ) )
4828 return 25; /* PCM */
4830 mb_type = 1; /* I16x16 */
4831 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4832 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4833 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4834 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4835 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4839 static int decode_cabac_mb_type( H264Context *h ) {
4840 MpegEncContext * const s = &h->s;
4842 if( h->slice_type_nos == FF_I_TYPE ) {
4843 return decode_cabac_intra_mb_type(h, 3, 1);
4844 } else if( h->slice_type_nos == FF_P_TYPE ) {
4845 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4847 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4848 /* P_L0_D16x16, P_8x8 */
4849 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4851 /* P_L0_D8x16, P_L0_D16x8 */
4852 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4855 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4857 } else if( h->slice_type_nos == FF_B_TYPE ) {
4858 const int mba_xy = h->left_mb_xy[0];
4859 const int mbb_xy = h->top_mb_xy;
4863 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4865 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4868 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4869 return 0; /* B_Direct_16x16 */
4871 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4872 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4875 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4876 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4877 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4878 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4880 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4881 else if( bits == 13 ) {
4882 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4883 } else if( bits == 14 )
4884 return 11; /* B_L1_L0_8x16 */
4885 else if( bits == 15 )
4886 return 22; /* B_8x8 */
4888 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4889 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4891 /* TODO SI/SP frames? */
4896 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4897 MpegEncContext * const s = &h->s;
4901 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4902 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4905 && h->slice_table[mba_xy] == h->slice_num
4906 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4907 mba_xy += s->mb_stride;
4909 mbb_xy = mb_xy - s->mb_stride;
4911 && h->slice_table[mbb_xy] == h->slice_num
4912 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4913 mbb_xy -= s->mb_stride;
4915 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4917 int mb_xy = h->mb_xy;
4919 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4922 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4924 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4927 if( h->slice_type_nos == FF_B_TYPE )
4929 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4932 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4935 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4938 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4939 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4940 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4942 if( mode >= pred_mode )
4948 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4949 const int mba_xy = h->left_mb_xy[0];
4950 const int mbb_xy = h->top_mb_xy;
4954 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4955 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4958 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4961 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4964 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4966 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4972 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4973 int cbp_b, cbp_a, ctx, cbp = 0;
4975 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4976 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4978 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4979 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4980 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4981 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4982 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4983 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4984 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4985 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4988 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4992 cbp_a = (h->left_cbp>>4)&0x03;
4993 cbp_b = (h-> top_cbp>>4)&0x03;
4996 if( cbp_a > 0 ) ctx++;
4997 if( cbp_b > 0 ) ctx += 2;
4998 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5002 if( cbp_a == 2 ) ctx++;
5003 if( cbp_b == 2 ) ctx += 2;
5004 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5006 static int decode_cabac_mb_dqp( H264Context *h) {
5010 if( h->last_qscale_diff != 0 )
5013 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5019 if(val > 102) //prevent infinite loop
5026 return -(val + 1)/2;
5028 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5029 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5031 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5033 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5037 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5039 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5040 return 0; /* B_Direct_8x8 */
5041 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5042 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5044 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5045 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5046 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5049 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5050 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5054 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5055 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5058 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5059 int refa = h->ref_cache[list][scan8[n] - 1];
5060 int refb = h->ref_cache[list][scan8[n] - 8];
5064 if( h->slice_type_nos == FF_B_TYPE) {
5065 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5067 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5076 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5082 if(ref >= 32 /*h->ref_list[list]*/){
5083 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5084 return 0; //FIXME we should return -1 and check the return everywhere
5090 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5091 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5092 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5093 int ctxbase = (l == 0) ? 40 : 47;
5098 else if( amvd > 32 )
5103 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5108 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5116 while( get_cabac_bypass( &h->cabac ) ) {
5120 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5125 if( get_cabac_bypass( &h->cabac ) )
5129 return get_cabac_bypass_sign( &h->cabac, -mvd );
5132 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5138 nza = h->left_cbp&0x100;
5139 nzb = h-> top_cbp&0x100;
5141 nza = (h->left_cbp>>(6+idx))&0x01;
5142 nzb = (h-> top_cbp>>(6+idx))&0x01;
5146 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5147 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5149 assert(cat == 1 || cat == 2);
5150 nza = h->non_zero_count_cache[scan8[idx] - 1];
5151 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5161 return ctx + 4 * cat;
5164 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5165 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5167 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5168 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5171 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5172 static const int significant_coeff_flag_offset[2][6] = {
5173 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5174 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5176 static const int last_coeff_flag_offset[2][6] = {
5177 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5178 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5180 static const int coeff_abs_level_m1_offset[6] = {
5181 227+0, 227+10, 227+20, 227+30, 227+39, 426
5183 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5184 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5185 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5186 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5187 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5188 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5189 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5190 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5191 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5193 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5194 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5195 * map node ctx => cabac ctx for level=1 */
5196 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5197 /* map node ctx => cabac ctx for level>1 */
5198 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5199 static const uint8_t coeff_abs_level_transition[2][8] = {
5200 /* update node ctx after decoding a level=1 */
5201 { 1, 2, 3, 3, 4, 5, 6, 7 },
5202 /* update node ctx after decoding a level>1 */
5203 { 4, 4, 4, 4, 5, 6, 7, 7 }
5209 int coeff_count = 0;
5212 uint8_t *significant_coeff_ctx_base;
5213 uint8_t *last_coeff_ctx_base;
5214 uint8_t *abs_level_m1_ctx_base;
5217 #define CABAC_ON_STACK
5219 #ifdef CABAC_ON_STACK
5222 cc.range = h->cabac.range;
5223 cc.low = h->cabac.low;
5224 cc.bytestream= h->cabac.bytestream;
5226 #define CC &h->cabac
5230 /* cat: 0-> DC 16x16 n = 0
5231 * 1-> AC 16x16 n = luma4x4idx
5232 * 2-> Luma4x4 n = luma4x4idx
5233 * 3-> DC Chroma n = iCbCr
5234 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5235 * 5-> Luma8x8 n = 4 * luma8x8idx
5238 /* read coded block flag */
5239 if( is_dc || cat != 5 ) {
5240 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5243 h->non_zero_count_cache[scan8[16+n]] = 0;
5245 h->non_zero_count_cache[scan8[n]] = 0;
5248 #ifdef CABAC_ON_STACK
5249 h->cabac.range = cc.range ;
5250 h->cabac.low = cc.low ;
5251 h->cabac.bytestream= cc.bytestream;
5257 significant_coeff_ctx_base = h->cabac_state
5258 + significant_coeff_flag_offset[MB_FIELD][cat];
5259 last_coeff_ctx_base = h->cabac_state
5260 + last_coeff_flag_offset[MB_FIELD][cat];
5261 abs_level_m1_ctx_base = h->cabac_state
5262 + coeff_abs_level_m1_offset[cat];
5264 if( !is_dc && cat == 5 ) {
5265 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5266 for(last= 0; last < coefs; last++) { \
5267 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5268 if( get_cabac( CC, sig_ctx )) { \
5269 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5270 index[coeff_count++] = last; \
5271 if( get_cabac( CC, last_ctx ) ) { \
5277 if( last == max_coeff -1 ) {\
5278 index[coeff_count++] = last;\
5280 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5281 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5282 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5284 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5286 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5288 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5291 assert(coeff_count > 0);
5295 h->cbp_table[h->mb_xy] |= 0x100;
5297 h->cbp_table[h->mb_xy] |= 0x40 << n;
5300 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5302 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5304 assert( cat == 1 || cat == 2 );
5305 h->non_zero_count_cache[scan8[n]] = coeff_count;
5310 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5312 int j= scantable[index[--coeff_count]];
5314 if( get_cabac( CC, ctx ) == 0 ) {
5315 node_ctx = coeff_abs_level_transition[0][node_ctx];
5317 block[j] = get_cabac_bypass_sign( CC, -1);
5319 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5323 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5324 node_ctx = coeff_abs_level_transition[1][node_ctx];
5326 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5330 if( coeff_abs >= 15 ) {
5332 while( get_cabac_bypass( CC ) ) {
5338 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5344 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5346 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5349 } while( coeff_count );
5350 #ifdef CABAC_ON_STACK
5351 h->cabac.range = cc.range ;
5352 h->cabac.low = cc.low ;
5353 h->cabac.bytestream= cc.bytestream;
5358 #ifndef CONFIG_SMALL
5359 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5360 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5363 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5364 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5368 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5370 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5372 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5373 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5377 static inline void compute_mb_neighbors(H264Context *h)
5379 MpegEncContext * const s = &h->s;
5380 const int mb_xy = h->mb_xy;
5381 h->top_mb_xy = mb_xy - s->mb_stride;
5382 h->left_mb_xy[0] = mb_xy - 1;
5384 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5385 const int top_pair_xy = pair_xy - s->mb_stride;
5386 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5387 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5388 const int curr_mb_frame_flag = !MB_FIELD;
5389 const int bottom = (s->mb_y & 1);
5391 ? !curr_mb_frame_flag // bottom macroblock
5392 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5394 h->top_mb_xy -= s->mb_stride;
5396 if (left_mb_frame_flag != curr_mb_frame_flag) {
5397 h->left_mb_xy[0] = pair_xy - 1;
5399 } else if (FIELD_PICTURE) {
5400 h->top_mb_xy -= s->mb_stride;
5406 * decodes a macroblock
5407 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5409 static int decode_mb_cabac(H264Context *h) {
5410 MpegEncContext * const s = &h->s;
5412 int mb_type, partition_count, cbp = 0;
5413 int dct8x8_allowed= h->pps.transform_8x8_mode;
5415 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5417 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5419 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5420 if( h->slice_type_nos != FF_I_TYPE ) {
5422 /* a skipped mb needs the aff flag from the following mb */
5423 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5424 predict_field_decoding_flag(h);
5425 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5426 skip = h->next_mb_skipped;
5428 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5429 /* read skip flags */
5431 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5432 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5433 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5434 if(h->next_mb_skipped)
5435 predict_field_decoding_flag(h);
5437 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5442 h->cbp_table[mb_xy] = 0;
5443 h->chroma_pred_mode_table[mb_xy] = 0;
5444 h->last_qscale_diff = 0;
5451 if( (s->mb_y&1) == 0 )
5453 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5456 h->prev_mb_skipped = 0;
5458 compute_mb_neighbors(h);
5459 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5460 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5464 if( h->slice_type_nos == FF_B_TYPE ) {
5466 partition_count= b_mb_type_info[mb_type].partition_count;
5467 mb_type= b_mb_type_info[mb_type].type;
5470 goto decode_intra_mb;
5472 } else if( h->slice_type_nos == FF_P_TYPE ) {
5474 partition_count= p_mb_type_info[mb_type].partition_count;
5475 mb_type= p_mb_type_info[mb_type].type;
5478 goto decode_intra_mb;
5481 if(h->slice_type == FF_SI_TYPE && mb_type)
5483 assert(h->slice_type_nos == FF_I_TYPE);
5485 partition_count = 0;
5486 cbp= i_mb_type_info[mb_type].cbp;
5487 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5488 mb_type= i_mb_type_info[mb_type].type;
5491 mb_type |= MB_TYPE_INTERLACED;
5493 h->slice_table[ mb_xy ]= h->slice_num;
5495 if(IS_INTRA_PCM(mb_type)) {
5498 // We assume these blocks are very rare so we do not optimize it.
5499 // FIXME The two following lines get the bitstream position in the cabac
5500 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5501 ptr= h->cabac.bytestream;
5502 if(h->cabac.low&0x1) ptr--;
5504 if(h->cabac.low&0x1FF) ptr--;
5507 // The pixels are stored in the same order as levels in h->mb array.
5508 memcpy(h->mb, ptr, 256); ptr+=256;
5510 memcpy(h->mb+128, ptr, 128); ptr+=128;
5513 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5515 // All blocks are present
5516 h->cbp_table[mb_xy] = 0x1ef;
5517 h->chroma_pred_mode_table[mb_xy] = 0;
5518 // In deblocking, the quantizer is 0
5519 s->current_picture.qscale_table[mb_xy]= 0;
5520 // All coeffs are present
5521 memset(h->non_zero_count[mb_xy], 16, 16);
5522 s->current_picture.mb_type[mb_xy]= mb_type;
5523 h->last_qscale_diff = 0;
5528 h->ref_count[0] <<= 1;
5529 h->ref_count[1] <<= 1;
5532 fill_caches(h, mb_type, 0);
5534 if( IS_INTRA( mb_type ) ) {
5536 if( IS_INTRA4x4( mb_type ) ) {
5537 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5538 mb_type |= MB_TYPE_8x8DCT;
5539 for( i = 0; i < 16; i+=4 ) {
5540 int pred = pred_intra_mode( h, i );
5541 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5542 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5545 for( i = 0; i < 16; i++ ) {
5546 int pred = pred_intra_mode( h, i );
5547 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5549 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5552 write_back_intra_pred_mode(h);
5553 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5555 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5556 if( h->intra16x16_pred_mode < 0 ) return -1;
5559 h->chroma_pred_mode_table[mb_xy] =
5560 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5562 pred_mode= check_intra_pred_mode( h, pred_mode );
5563 if( pred_mode < 0 ) return -1;
5564 h->chroma_pred_mode= pred_mode;
5566 } else if( partition_count == 4 ) {
5567 int i, j, sub_partition_count[4], list, ref[2][4];
5569 if( h->slice_type_nos == FF_B_TYPE ) {
5570 for( i = 0; i < 4; i++ ) {
5571 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5572 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5573 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5575 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5576 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5577 pred_direct_motion(h, &mb_type);
5578 h->ref_cache[0][scan8[4]] =
5579 h->ref_cache[1][scan8[4]] =
5580 h->ref_cache[0][scan8[12]] =
5581 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5582 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5583 for( i = 0; i < 4; i++ )
5584 if( IS_DIRECT(h->sub_mb_type[i]) )
5585 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5589 for( i = 0; i < 4; i++ ) {
5590 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5591 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5592 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5596 for( list = 0; list < h->list_count; list++ ) {
5597 for( i = 0; i < 4; i++ ) {
5598 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5599 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5600 if( h->ref_count[list] > 1 )
5601 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5607 h->ref_cache[list][ scan8[4*i]+1 ]=
5608 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5613 dct8x8_allowed = get_dct8x8_allowed(h);
5615 for(list=0; list<h->list_count; list++){
5617 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5618 if(IS_DIRECT(h->sub_mb_type[i])){
5619 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5623 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5624 const int sub_mb_type= h->sub_mb_type[i];
5625 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5626 for(j=0; j<sub_partition_count[i]; j++){
5629 const int index= 4*i + block_width*j;
5630 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5631 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5632 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5634 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5635 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5636 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5638 if(IS_SUB_8X8(sub_mb_type)){
5640 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5642 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5645 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5647 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5648 }else if(IS_SUB_8X4(sub_mb_type)){
5649 mv_cache[ 1 ][0]= mx;
5650 mv_cache[ 1 ][1]= my;
5652 mvd_cache[ 1 ][0]= mx - mpx;
5653 mvd_cache[ 1 ][1]= my - mpy;
5654 }else if(IS_SUB_4X8(sub_mb_type)){
5655 mv_cache[ 8 ][0]= mx;
5656 mv_cache[ 8 ][1]= my;
5658 mvd_cache[ 8 ][0]= mx - mpx;
5659 mvd_cache[ 8 ][1]= my - mpy;
5661 mv_cache[ 0 ][0]= mx;
5662 mv_cache[ 0 ][1]= my;
5664 mvd_cache[ 0 ][0]= mx - mpx;
5665 mvd_cache[ 0 ][1]= my - mpy;
5668 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5669 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5670 p[0] = p[1] = p[8] = p[9] = 0;
5671 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5675 } else if( IS_DIRECT(mb_type) ) {
5676 pred_direct_motion(h, &mb_type);
5677 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5678 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5679 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5681 int list, mx, my, i, mpx, mpy;
5682 if(IS_16X16(mb_type)){
5683 for(list=0; list<h->list_count; list++){
5684 if(IS_DIR(mb_type, 0, list)){
5685 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5686 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5688 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5690 for(list=0; list<h->list_count; list++){
5691 if(IS_DIR(mb_type, 0, list)){
5692 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5694 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5695 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5696 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5698 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5699 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5701 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5704 else if(IS_16X8(mb_type)){
5705 for(list=0; list<h->list_count; list++){
5707 if(IS_DIR(mb_type, i, list)){
5708 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5709 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5711 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5714 for(list=0; list<h->list_count; list++){
5716 if(IS_DIR(mb_type, i, list)){
5717 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5718 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5719 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5720 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5722 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5723 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5725 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5726 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5731 assert(IS_8X16(mb_type));
5732 for(list=0; list<h->list_count; list++){
5734 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5735 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5736 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5738 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5741 for(list=0; list<h->list_count; list++){
5743 if(IS_DIR(mb_type, i, list)){
5744 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5745 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5746 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5748 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5749 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5750 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5752 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5753 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5760 if( IS_INTER( mb_type ) ) {
5761 h->chroma_pred_mode_table[mb_xy] = 0;
5762 write_back_motion( h, mb_type );
5765 if( !IS_INTRA16x16( mb_type ) ) {
5766 cbp = decode_cabac_mb_cbp_luma( h );
5768 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5771 h->cbp_table[mb_xy] = h->cbp = cbp;
5773 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5774 if( decode_cabac_mb_transform_size( h ) )
5775 mb_type |= MB_TYPE_8x8DCT;
5777 s->current_picture.mb_type[mb_xy]= mb_type;
5779 if( cbp || IS_INTRA16x16( mb_type ) ) {
5780 const uint8_t *scan, *scan8x8, *dc_scan;
5781 const uint32_t *qmul;
5784 if(IS_INTERLACED(mb_type)){
5785 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5786 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5787 dc_scan= luma_dc_field_scan;
5789 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5790 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5791 dc_scan= luma_dc_zigzag_scan;
5794 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5795 if( dqp == INT_MIN ){
5796 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5800 if(((unsigned)s->qscale) > 51){
5801 if(s->qscale<0) s->qscale+= 52;
5802 else s->qscale-= 52;
5804 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5805 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5807 if( IS_INTRA16x16( mb_type ) ) {
5809 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5810 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5813 qmul = h->dequant4_coeff[0][s->qscale];
5814 for( i = 0; i < 16; i++ ) {
5815 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5816 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5819 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5823 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5824 if( cbp & (1<<i8x8) ) {
5825 if( IS_8x8DCT(mb_type) ) {
5826 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5827 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5829 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5830 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5831 const int index = 4*i8x8 + i4x4;
5832 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5834 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5835 //STOP_TIMER("decode_residual")
5839 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5840 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5847 for( c = 0; c < 2; c++ ) {
5848 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5849 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5855 for( c = 0; c < 2; c++ ) {
5856 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5857 for( i = 0; i < 4; i++ ) {
5858 const int index = 16 + 4 * c + i;
5859 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5860 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5864 uint8_t * const nnz= &h->non_zero_count_cache[0];
5865 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5866 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5869 uint8_t * const nnz= &h->non_zero_count_cache[0];
5870 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5871 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5872 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5873 h->last_qscale_diff = 0;
5876 s->current_picture.qscale_table[mb_xy]= s->qscale;
5877 write_back_non_zero_count(h);
5880 h->ref_count[0] >>= 1;
5881 h->ref_count[1] >>= 1;
5888 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5890 const int index_a = qp + h->slice_alpha_c0_offset;
5891 const int alpha = (alpha_table+52)[index_a];
5892 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5897 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5898 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5900 /* 16px edge length, because bS=4 is triggered by being at
5901 * the edge of an intra MB, so all 4 bS are the same */
5902 for( d = 0; d < 16; d++ ) {
5903 const int p0 = pix[-1];
5904 const int p1 = pix[-2];
5905 const int p2 = pix[-3];
5907 const int q0 = pix[0];
5908 const int q1 = pix[1];
5909 const int q2 = pix[2];
5911 if( FFABS( p0 - q0 ) < alpha &&
5912 FFABS( p1 - p0 ) < beta &&
5913 FFABS( q1 - q0 ) < beta ) {
5915 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5916 if( FFABS( p2 - p0 ) < beta)
5918 const int p3 = pix[-4];
5920 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5921 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5922 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5925 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5927 if( FFABS( q2 - q0 ) < beta)
5929 const int q3 = pix[3];
5931 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5932 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5933 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5936 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5940 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5941 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5943 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5949 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5951 const int index_a = qp + h->slice_alpha_c0_offset;
5952 const int alpha = (alpha_table+52)[index_a];
5953 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5958 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5959 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5961 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5965 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5967 for( i = 0; i < 16; i++, pix += stride) {
5973 int bS_index = (i >> 1);
5976 bS_index |= (i & 1);
5979 if( bS[bS_index] == 0 ) {
5983 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5984 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5985 alpha = (alpha_table+52)[index_a];
5986 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5988 if( bS[bS_index] < 4 ) {
5989 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5990 const int p0 = pix[-1];
5991 const int p1 = pix[-2];
5992 const int p2 = pix[-3];
5993 const int q0 = pix[0];
5994 const int q1 = pix[1];
5995 const int q2 = pix[2];
5997 if( FFABS( p0 - q0 ) < alpha &&
5998 FFABS( p1 - p0 ) < beta &&
5999 FFABS( q1 - q0 ) < beta ) {
6003 if( FFABS( p2 - p0 ) < beta ) {
6004 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6007 if( FFABS( q2 - q0 ) < beta ) {
6008 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6012 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6013 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6014 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6015 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6018 const int p0 = pix[-1];
6019 const int p1 = pix[-2];
6020 const int p2 = pix[-3];
6022 const int q0 = pix[0];
6023 const int q1 = pix[1];
6024 const int q2 = pix[2];
6026 if( FFABS( p0 - q0 ) < alpha &&
6027 FFABS( p1 - p0 ) < beta &&
6028 FFABS( q1 - q0 ) < beta ) {
6030 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6031 if( FFABS( p2 - p0 ) < beta)
6033 const int p3 = pix[-4];
6035 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6036 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6037 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6040 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6042 if( FFABS( q2 - q0 ) < beta)
6044 const int q3 = pix[3];
6046 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6047 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6048 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6051 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6055 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6056 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6058 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6063 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6065 for( i = 0; i < 8; i++, pix += stride) {
6073 if( bS[bS_index] == 0 ) {
6077 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6078 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6079 alpha = (alpha_table+52)[index_a];
6080 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6082 if( bS[bS_index] < 4 ) {
6083 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6084 const int p0 = pix[-1];
6085 const int p1 = pix[-2];
6086 const int q0 = pix[0];
6087 const int q1 = pix[1];
6089 if( FFABS( p0 - q0 ) < alpha &&
6090 FFABS( p1 - p0 ) < beta &&
6091 FFABS( q1 - q0 ) < beta ) {
6092 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6094 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6095 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6096 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6099 const int p0 = pix[-1];
6100 const int p1 = pix[-2];
6101 const int q0 = pix[0];
6102 const int q1 = pix[1];
6104 if( FFABS( p0 - q0 ) < alpha &&
6105 FFABS( p1 - p0 ) < beta &&
6106 FFABS( q1 - q0 ) < beta ) {
6108 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6109 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6110 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6116 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6118 const int index_a = qp + h->slice_alpha_c0_offset;
6119 const int alpha = (alpha_table+52)[index_a];
6120 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6121 const int pix_next = stride;
6126 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6127 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6129 /* 16px edge length, see filter_mb_edgev */
6130 for( d = 0; d < 16; d++ ) {
6131 const int p0 = pix[-1*pix_next];
6132 const int p1 = pix[-2*pix_next];
6133 const int p2 = pix[-3*pix_next];
6134 const int q0 = pix[0];
6135 const int q1 = pix[1*pix_next];
6136 const int q2 = pix[2*pix_next];
6138 if( FFABS( p0 - q0 ) < alpha &&
6139 FFABS( p1 - p0 ) < beta &&
6140 FFABS( q1 - q0 ) < beta ) {
6142 const int p3 = pix[-4*pix_next];
6143 const int q3 = pix[ 3*pix_next];
6145 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6146 if( FFABS( p2 - p0 ) < beta) {
6148 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6149 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6150 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6153 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6155 if( FFABS( q2 - q0 ) < beta) {
6157 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6158 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6159 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6162 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6166 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6167 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6169 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6176 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6178 const int index_a = qp + h->slice_alpha_c0_offset;
6179 const int alpha = (alpha_table+52)[index_a];
6180 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6185 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6186 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6188 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6192 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6193 MpegEncContext * const s = &h->s;
6194 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6196 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6200 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6202 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6203 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6204 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6207 assert(!FRAME_MBAFF);
6209 mb_type = s->current_picture.mb_type[mb_xy];
6210 qp = s->current_picture.qscale_table[mb_xy];
6211 qp0 = s->current_picture.qscale_table[mb_xy-1];
6212 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6213 qpc = get_chroma_qp( h, 0, qp );
6214 qpc0 = get_chroma_qp( h, 0, qp0 );
6215 qpc1 = get_chroma_qp( h, 0, qp1 );
6216 qp0 = (qp + qp0 + 1) >> 1;
6217 qp1 = (qp + qp1 + 1) >> 1;
6218 qpc0 = (qpc + qpc0 + 1) >> 1;
6219 qpc1 = (qpc + qpc1 + 1) >> 1;
6220 qp_thresh = 15 - h->slice_alpha_c0_offset;
6221 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6222 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6225 if( IS_INTRA(mb_type) ) {
6226 int16_t bS4[4] = {4,4,4,4};
6227 int16_t bS3[4] = {3,3,3,3};
6228 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6229 if( IS_8x8DCT(mb_type) ) {
6230 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6231 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6232 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6233 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6235 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6236 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6237 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6238 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6239 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6240 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6241 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6242 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6244 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6245 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6246 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6247 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6248 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6249 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6250 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6251 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6254 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6255 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6257 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6259 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6261 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6262 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6263 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6264 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6266 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6267 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6268 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6269 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6271 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6272 bSv[0][0] = 0x0004000400040004ULL;
6273 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6274 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6276 #define FILTER(hv,dir,edge)\
6277 if(bSv[dir][edge]) {\
6278 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6280 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6281 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6287 } else if( IS_8x8DCT(mb_type) ) {
6306 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6307 MpegEncContext * const s = &h->s;
6308 const int mb_xy= mb_x + mb_y*s->mb_stride;
6309 const int mb_type = s->current_picture.mb_type[mb_xy];
6310 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6311 int first_vertical_edge_done = 0;
6314 //for sufficiently low qp, filtering wouldn't do anything
6315 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6317 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6318 int qp = s->current_picture.qscale_table[mb_xy];
6320 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6321 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6326 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6327 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6328 int top_type, left_type[2];
6329 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6330 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6331 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6333 if(IS_8x8DCT(top_type)){
6334 h->non_zero_count_cache[4+8*0]=
6335 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6336 h->non_zero_count_cache[6+8*0]=
6337 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6339 if(IS_8x8DCT(left_type[0])){
6340 h->non_zero_count_cache[3+8*1]=
6341 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6343 if(IS_8x8DCT(left_type[1])){
6344 h->non_zero_count_cache[3+8*3]=
6345 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6348 if(IS_8x8DCT(mb_type)){
6349 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6350 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6352 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6353 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6355 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6356 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6358 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6359 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6364 // left mb is in picture
6365 && h->slice_table[mb_xy-1] != 0xFFFF
6366 // and current and left pair do not have the same interlaced type
6367 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6368 // and left mb is in the same slice if deblocking_filter == 2
6369 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6370 /* First vertical edge is different in MBAFF frames
6371 * There are 8 different bS to compute and 2 different Qp
6373 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6374 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6379 int mb_qp, mbn0_qp, mbn1_qp;
6381 first_vertical_edge_done = 1;
6383 if( IS_INTRA(mb_type) )
6384 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6386 for( i = 0; i < 8; i++ ) {
6387 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6389 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6391 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6392 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6393 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6395 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6402 mb_qp = s->current_picture.qscale_table[mb_xy];
6403 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6404 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6405 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6406 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6407 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6408 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6409 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6410 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6411 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6412 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6413 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6414 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6417 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6418 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6419 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6420 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6421 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6423 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6424 for( dir = 0; dir < 2; dir++ )
6427 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6428 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6429 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6430 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6431 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6433 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6434 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6435 // how often to recheck mv-based bS when iterating between edges
6436 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6437 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6438 // how often to recheck mv-based bS when iterating along each edge
6439 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6441 if (first_vertical_edge_done) {
6443 first_vertical_edge_done = 0;
6446 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6449 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6450 && !IS_INTERLACED(mb_type)
6451 && IS_INTERLACED(mbm_type)
6453 // This is a special case in the norm where the filtering must
6454 // be done twice (one each of the field) even if we are in a
6455 // frame macroblock.
6457 static const int nnz_idx[4] = {4,5,6,3};
6458 unsigned int tmp_linesize = 2 * linesize;
6459 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6460 int mbn_xy = mb_xy - 2 * s->mb_stride;
6465 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6466 if( IS_INTRA(mb_type) ||
6467 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6468 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6470 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6471 for( i = 0; i < 4; i++ ) {
6472 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6473 mbn_nnz[nnz_idx[i]] != 0 )
6479 // Do not use s->qscale as luma quantizer because it has not the same
6480 // value in IPCM macroblocks.
6481 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6482 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6483 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6484 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6485 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6486 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6487 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6488 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6495 for( edge = start; edge < edges; edge++ ) {
6496 /* mbn_xy: neighbor macroblock */
6497 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6498 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6499 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6503 if( (edge&1) && IS_8x8DCT(mb_type) )
6506 if( IS_INTRA(mb_type) ||
6507 IS_INTRA(mbn_type) ) {
6510 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6511 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6520 bS[0] = bS[1] = bS[2] = bS[3] = value;
6525 if( edge & mask_edge ) {
6526 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6529 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6530 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6533 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6534 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6535 int bn_idx= b_idx - (dir ? 8:1);
6538 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6539 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6540 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6541 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6544 if(h->slice_type_nos == FF_B_TYPE && v){
6546 for( l = 0; !v && l < 2; l++ ) {
6548 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6549 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6550 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6554 bS[0] = bS[1] = bS[2] = bS[3] = v;
6560 for( i = 0; i < 4; i++ ) {
6561 int x = dir == 0 ? edge : i;
6562 int y = dir == 0 ? i : edge;
6563 int b_idx= 8 + 4 + x + 8*y;
6564 int bn_idx= b_idx - (dir ? 8:1);
6566 if( h->non_zero_count_cache[b_idx] != 0 ||
6567 h->non_zero_count_cache[bn_idx] != 0 ) {
6573 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6574 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6575 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6576 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6582 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6584 for( l = 0; l < 2; l++ ) {
6586 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6587 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6588 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6597 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6602 // Do not use s->qscale as luma quantizer because it has not the same
6603 // value in IPCM macroblocks.
6604 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6605 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6606 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6607 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6609 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6610 if( (edge&1) == 0 ) {
6611 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6612 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6613 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6614 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6617 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6618 if( (edge&1) == 0 ) {
6619 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6620 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6621 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6622 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6629 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6630 H264Context *h = *(void**)arg;
6631 MpegEncContext * const s = &h->s;
6632 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6636 if( h->pps.cabac ) {
6640 align_get_bits( &s->gb );
6643 ff_init_cabac_states( &h->cabac);
6644 ff_init_cabac_decoder( &h->cabac,
6645 s->gb.buffer + get_bits_count(&s->gb)/8,
6646 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6647 /* calculate pre-state */
6648 for( i= 0; i < 460; i++ ) {
6650 if( h->slice_type_nos == FF_I_TYPE )
6651 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6653 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6656 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6658 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6663 int ret = decode_mb_cabac(h);
6665 //STOP_TIMER("decode_mb_cabac")
6667 if(ret>=0) hl_decode_mb(h);
6669 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6672 if(ret>=0) ret = decode_mb_cabac(h);
6674 if(ret>=0) hl_decode_mb(h);
6677 eos = get_cabac_terminate( &h->cabac );
6679 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6680 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6681 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6685 if( ++s->mb_x >= s->mb_width ) {
6687 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6689 if(FIELD_OR_MBAFF_PICTURE) {
6694 if( eos || s->mb_y >= s->mb_height ) {
6695 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6696 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6703 int ret = decode_mb_cavlc(h);
6705 if(ret>=0) hl_decode_mb(h);
6707 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6709 ret = decode_mb_cavlc(h);
6711 if(ret>=0) hl_decode_mb(h);
6716 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6717 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6722 if(++s->mb_x >= s->mb_width){
6724 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6726 if(FIELD_OR_MBAFF_PICTURE) {
6729 if(s->mb_y >= s->mb_height){
6730 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6732 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6733 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6737 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6744 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6745 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6746 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6747 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6751 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6760 for(;s->mb_y < s->mb_height; s->mb_y++){
6761 for(;s->mb_x < s->mb_width; s->mb_x++){
6762 int ret= decode_mb(h);
6767 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6768 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6773 if(++s->mb_x >= s->mb_width){
6775 if(++s->mb_y >= s->mb_height){
6776 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6777 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6781 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6788 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6789 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6790 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6794 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6801 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6804 return -1; //not reached
6807 static int decode_picture_timing(H264Context *h){
6808 MpegEncContext * const s = &h->s;
6809 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6810 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6811 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6813 if(h->sps.pic_struct_present_flag){
6814 unsigned int i, num_clock_ts;
6815 h->sei_pic_struct = get_bits(&s->gb, 4);
6817 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6820 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6822 for (i = 0 ; i < num_clock_ts ; i++){
6823 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6824 unsigned int full_timestamp_flag;
6825 skip_bits(&s->gb, 2); /* ct_type */
6826 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6827 skip_bits(&s->gb, 5); /* counting_type */
6828 full_timestamp_flag = get_bits(&s->gb, 1);
6829 skip_bits(&s->gb, 1); /* discontinuity_flag */
6830 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6831 skip_bits(&s->gb, 8); /* n_frames */
6832 if(full_timestamp_flag){
6833 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6834 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6835 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6837 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6838 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6839 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6840 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6841 if(get_bits(&s->gb, 1)) /* hours_flag */
6842 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6846 if(h->sps.time_offset_length > 0)
6847 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6854 static int decode_unregistered_user_data(H264Context *h, int size){
6855 MpegEncContext * const s = &h->s;
6856 uint8_t user_data[16+256];
6862 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6863 user_data[i]= get_bits(&s->gb, 8);
6867 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6868 if(e==1 && build>=0)
6869 h->x264_build= build;
6871 if(s->avctx->debug & FF_DEBUG_BUGS)
6872 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6875 skip_bits(&s->gb, 8);
6880 static int decode_sei(H264Context *h){
6881 MpegEncContext * const s = &h->s;
6883 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6888 type+= show_bits(&s->gb, 8);
6889 }while(get_bits(&s->gb, 8) == 255);
6893 size+= show_bits(&s->gb, 8);
6894 }while(get_bits(&s->gb, 8) == 255);
6897 case 1: // Picture timing SEI
6898 if(decode_picture_timing(h) < 0)
6902 if(decode_unregistered_user_data(h, size) < 0)
6906 skip_bits(&s->gb, 8*size);
6909 //FIXME check bits here
6910 align_get_bits(&s->gb);
6916 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6917 MpegEncContext * const s = &h->s;
6919 cpb_count = get_ue_golomb(&s->gb) + 1;
6920 get_bits(&s->gb, 4); /* bit_rate_scale */
6921 get_bits(&s->gb, 4); /* cpb_size_scale */
6922 for(i=0; i<cpb_count; i++){
6923 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6924 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6925 get_bits1(&s->gb); /* cbr_flag */
6927 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6928 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6929 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6930 sps->time_offset_length = get_bits(&s->gb, 5);
6933 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6934 MpegEncContext * const s = &h->s;
6935 int aspect_ratio_info_present_flag;
6936 unsigned int aspect_ratio_idc;
6938 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6940 if( aspect_ratio_info_present_flag ) {
6941 aspect_ratio_idc= get_bits(&s->gb, 8);
6942 if( aspect_ratio_idc == EXTENDED_SAR ) {
6943 sps->sar.num= get_bits(&s->gb, 16);
6944 sps->sar.den= get_bits(&s->gb, 16);
6945 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6946 sps->sar= pixel_aspect[aspect_ratio_idc];
6948 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6955 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6957 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6958 get_bits1(&s->gb); /* overscan_appropriate_flag */
6961 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6962 get_bits(&s->gb, 3); /* video_format */
6963 get_bits1(&s->gb); /* video_full_range_flag */
6964 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6965 get_bits(&s->gb, 8); /* colour_primaries */
6966 get_bits(&s->gb, 8); /* transfer_characteristics */
6967 get_bits(&s->gb, 8); /* matrix_coefficients */
6971 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6972 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6973 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6976 sps->timing_info_present_flag = get_bits1(&s->gb);
6977 if(sps->timing_info_present_flag){
6978 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6979 sps->time_scale = get_bits_long(&s->gb, 32);
6980 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6983 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6984 if(sps->nal_hrd_parameters_present_flag)
6985 decode_hrd_parameters(h, sps);
6986 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6987 if(sps->vcl_hrd_parameters_present_flag)
6988 decode_hrd_parameters(h, sps);
6989 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6990 get_bits1(&s->gb); /* low_delay_hrd_flag */
6991 sps->pic_struct_present_flag = get_bits1(&s->gb);
6993 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6994 if(sps->bitstream_restriction_flag){
6995 unsigned int num_reorder_frames;
6996 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6997 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6998 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6999 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7000 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7001 num_reorder_frames= get_ue_golomb(&s->gb);
7002 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7004 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7005 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7009 sps->num_reorder_frames= num_reorder_frames;
7015 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7016 const uint8_t *jvt_list, const uint8_t *fallback_list){
7017 MpegEncContext * const s = &h->s;
7018 int i, last = 8, next = 8;
7019 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7020 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7021 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7023 for(i=0;i<size;i++){
7025 next = (last + get_se_golomb(&s->gb)) & 0xff;
7026 if(!i && !next){ /* matrix not written, we use the preset one */
7027 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7030 last = factors[scan[i]] = next ? next : last;
7034 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7035 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7036 MpegEncContext * const s = &h->s;
7037 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7038 const uint8_t *fallback[4] = {
7039 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7040 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7041 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7042 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7044 if(get_bits1(&s->gb)){
7045 sps->scaling_matrix_present |= is_sps;
7046 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7047 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7048 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7049 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7050 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7051 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7052 if(is_sps || pps->transform_8x8_mode){
7053 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7054 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7060 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7063 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7064 const size_t size, const char *name)
7067 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7072 vec[id] = av_mallocz(size);
7074 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7079 static inline int decode_seq_parameter_set(H264Context *h){
7080 MpegEncContext * const s = &h->s;
7081 int profile_idc, level_idc;
7082 unsigned int sps_id, tmp, mb_width, mb_height;
7086 profile_idc= get_bits(&s->gb, 8);
7087 get_bits1(&s->gb); //constraint_set0_flag
7088 get_bits1(&s->gb); //constraint_set1_flag
7089 get_bits1(&s->gb); //constraint_set2_flag
7090 get_bits1(&s->gb); //constraint_set3_flag
7091 get_bits(&s->gb, 4); // reserved
7092 level_idc= get_bits(&s->gb, 8);
7093 sps_id= get_ue_golomb(&s->gb);
7095 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7099 sps->profile_idc= profile_idc;
7100 sps->level_idc= level_idc;
7102 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7103 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7104 sps->scaling_matrix_present = 0;
7106 if(sps->profile_idc >= 100){ //high profile
7107 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7108 if(sps->chroma_format_idc == 3)
7109 get_bits1(&s->gb); //residual_color_transform_flag
7110 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7111 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7112 sps->transform_bypass = get_bits1(&s->gb);
7113 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7115 sps->chroma_format_idc= 1;
7118 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7119 sps->poc_type= get_ue_golomb(&s->gb);
7121 if(sps->poc_type == 0){ //FIXME #define
7122 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7123 } else if(sps->poc_type == 1){//FIXME #define
7124 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7125 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7126 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7127 tmp= get_ue_golomb(&s->gb);
7129 if(tmp >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7130 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7133 sps->poc_cycle_length= tmp;
7135 for(i=0; i<sps->poc_cycle_length; i++)
7136 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7137 }else if(sps->poc_type != 2){
7138 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7142 tmp= get_ue_golomb(&s->gb);
7143 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7144 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7147 sps->ref_frame_count= tmp;
7148 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7149 mb_width= get_ue_golomb(&s->gb) + 1;
7150 mb_height= get_ue_golomb(&s->gb) + 1;
7151 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7152 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7153 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7156 sps->mb_width = mb_width;
7157 sps->mb_height= mb_height;
7159 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7160 if(!sps->frame_mbs_only_flag)
7161 sps->mb_aff= get_bits1(&s->gb);
7165 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7167 #ifndef ALLOW_INTERLACE
7169 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7171 sps->crop= get_bits1(&s->gb);
7173 sps->crop_left = get_ue_golomb(&s->gb);
7174 sps->crop_right = get_ue_golomb(&s->gb);
7175 sps->crop_top = get_ue_golomb(&s->gb);
7176 sps->crop_bottom= get_ue_golomb(&s->gb);
7177 if(sps->crop_left || sps->crop_top){
7178 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7180 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7181 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7187 sps->crop_bottom= 0;
7190 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7191 if( sps->vui_parameters_present_flag )
7192 decode_vui_parameters(h, sps);
7194 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7195 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7196 sps_id, sps->profile_idc, sps->level_idc,
7198 sps->ref_frame_count,
7199 sps->mb_width, sps->mb_height,
7200 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7201 sps->direct_8x8_inference_flag ? "8B8" : "",
7202 sps->crop_left, sps->crop_right,
7203 sps->crop_top, sps->crop_bottom,
7204 sps->vui_parameters_present_flag ? "VUI" : "",
7205 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7212 build_qp_table(PPS *pps, int t, int index)
7215 for(i = 0; i < 52; i++)
7216 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7219 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7220 MpegEncContext * const s = &h->s;
7221 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7224 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7228 tmp= get_ue_golomb(&s->gb);
7229 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7230 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7235 pps->cabac= get_bits1(&s->gb);
7236 pps->pic_order_present= get_bits1(&s->gb);
7237 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7238 if(pps->slice_group_count > 1 ){
7239 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7240 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7241 switch(pps->mb_slice_group_map_type){
7244 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7245 | run_length[ i ] |1 |ue(v) |
7250 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7252 | top_left_mb[ i ] |1 |ue(v) |
7253 | bottom_right_mb[ i ] |1 |ue(v) |
7261 | slice_group_change_direction_flag |1 |u(1) |
7262 | slice_group_change_rate_minus1 |1 |ue(v) |
7267 | slice_group_id_cnt_minus1 |1 |ue(v) |
7268 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7270 | slice_group_id[ i ] |1 |u(v) |
7275 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7276 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7277 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7278 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7279 pps->ref_count[0]= pps->ref_count[1]= 1;
7283 pps->weighted_pred= get_bits1(&s->gb);
7284 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7285 pps->init_qp= get_se_golomb(&s->gb) + 26;
7286 pps->init_qs= get_se_golomb(&s->gb) + 26;
7287 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7288 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7289 pps->constrained_intra_pred= get_bits1(&s->gb);
7290 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7292 pps->transform_8x8_mode= 0;
7293 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7294 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7295 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7297 if(get_bits_count(&s->gb) < bit_length){
7298 pps->transform_8x8_mode= get_bits1(&s->gb);
7299 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7300 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7302 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7305 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7306 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7307 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7308 h->pps.chroma_qp_diff= 1;
7310 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7311 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7312 pps_id, pps->sps_id,
7313 pps->cabac ? "CABAC" : "CAVLC",
7314 pps->slice_group_count,
7315 pps->ref_count[0], pps->ref_count[1],
7316 pps->weighted_pred ? "weighted" : "",
7317 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7318 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7319 pps->constrained_intra_pred ? "CONSTR" : "",
7320 pps->redundant_pic_cnt_present ? "REDU" : "",
7321 pps->transform_8x8_mode ? "8x8DCT" : ""
7329 * Call decode_slice() for each context.
7331 * @param h h264 master context
7332 * @param context_count number of contexts to execute
7334 static void execute_decode_slices(H264Context *h, int context_count){
7335 MpegEncContext * const s = &h->s;
7336 AVCodecContext * const avctx= s->avctx;
7340 if(context_count == 1) {
7341 decode_slice(avctx, &h);
7343 for(i = 1; i < context_count; i++) {
7344 hx = h->thread_context[i];
7345 hx->s.error_recognition = avctx->error_recognition;
7346 hx->s.error_count = 0;
7349 avctx->execute(avctx, (void *)decode_slice,
7350 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7352 /* pull back stuff from slices to master context */
7353 hx = h->thread_context[context_count - 1];
7354 s->mb_x = hx->s.mb_x;
7355 s->mb_y = hx->s.mb_y;
7356 s->dropable = hx->s.dropable;
7357 s->picture_structure = hx->s.picture_structure;
7358 for(i = 1; i < context_count; i++)
7359 h->s.error_count += h->thread_context[i]->s.error_count;
7364 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7365 MpegEncContext * const s = &h->s;
7366 AVCodecContext * const avctx= s->avctx;
7368 H264Context *hx; ///< thread context
7369 int context_count = 0;
7371 h->max_contexts = avctx->thread_count;
7374 for(i=0; i<50; i++){
7375 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7378 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7379 h->current_slice = 0;
7380 if (!s->first_field)
7381 s->current_picture_ptr= NULL;
7393 if(buf_index >= buf_size) break;
7395 for(i = 0; i < h->nal_length_size; i++)
7396 nalsize = (nalsize << 8) | buf[buf_index++];
7397 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7402 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7407 // start code prefix search
7408 for(; buf_index + 3 < buf_size; buf_index++){
7409 // This should always succeed in the first iteration.
7410 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7414 if(buf_index+3 >= buf_size) break;
7419 hx = h->thread_context[context_count];
7421 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7422 if (ptr==NULL || dst_length < 0){
7425 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7427 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7429 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7430 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7433 if (h->is_avc && (nalsize != consumed)){
7434 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7438 buf_index += consumed;
7440 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7441 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7446 switch(hx->nal_unit_type){
7448 if (h->nal_unit_type != NAL_IDR_SLICE) {
7449 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7452 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7454 init_get_bits(&hx->s.gb, ptr, bit_length);
7456 hx->inter_gb_ptr= &hx->s.gb;
7457 hx->s.data_partitioning = 0;
7459 if((err = decode_slice_header(hx, h)))
7462 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7463 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7464 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7465 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7466 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7467 && avctx->skip_frame < AVDISCARD_ALL)
7471 init_get_bits(&hx->s.gb, ptr, bit_length);
7473 hx->inter_gb_ptr= NULL;
7474 hx->s.data_partitioning = 1;
7476 err = decode_slice_header(hx, h);
7479 init_get_bits(&hx->intra_gb, ptr, bit_length);
7480 hx->intra_gb_ptr= &hx->intra_gb;
7483 init_get_bits(&hx->inter_gb, ptr, bit_length);
7484 hx->inter_gb_ptr= &hx->inter_gb;
7486 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7487 && s->context_initialized
7489 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7490 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7491 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7492 && avctx->skip_frame < AVDISCARD_ALL)
7496 init_get_bits(&s->gb, ptr, bit_length);
7500 init_get_bits(&s->gb, ptr, bit_length);
7501 decode_seq_parameter_set(h);
7503 if(s->flags& CODEC_FLAG_LOW_DELAY)
7506 if(avctx->has_b_frames < 2)
7507 avctx->has_b_frames= !s->low_delay;
7510 init_get_bits(&s->gb, ptr, bit_length);
7512 decode_picture_parameter_set(h, bit_length);
7516 case NAL_END_SEQUENCE:
7517 case NAL_END_STREAM:
7518 case NAL_FILLER_DATA:
7520 case NAL_AUXILIARY_SLICE:
7523 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7526 if(context_count == h->max_contexts) {
7527 execute_decode_slices(h, context_count);
7532 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7534 /* Slice could not be decoded in parallel mode, copy down
7535 * NAL unit stuff to context 0 and restart. Note that
7536 * rbsp_buffer is not transferred, but since we no longer
7537 * run in parallel mode this should not be an issue. */
7538 h->nal_unit_type = hx->nal_unit_type;
7539 h->nal_ref_idc = hx->nal_ref_idc;
7545 execute_decode_slices(h, context_count);
7550 * returns the number of bytes consumed for building the current frame
7552 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7553 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7554 if(pos+10>buf_size) pos=buf_size; // oops ;)
7559 static int decode_frame(AVCodecContext *avctx,
7560 void *data, int *data_size,
7561 const uint8_t *buf, int buf_size)
7563 H264Context *h = avctx->priv_data;
7564 MpegEncContext *s = &h->s;
7565 AVFrame *pict = data;
7568 s->flags= avctx->flags;
7569 s->flags2= avctx->flags2;
7571 /* end of stream, output what is still in the buffers */
7572 if (buf_size == 0) {
7576 //FIXME factorize this with the output code below
7577 out = h->delayed_pic[0];
7579 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7580 if(h->delayed_pic[i]->poc < out->poc){
7581 out = h->delayed_pic[i];
7585 for(i=out_idx; h->delayed_pic[i]; i++)
7586 h->delayed_pic[i] = h->delayed_pic[i+1];
7589 *data_size = sizeof(AVFrame);
7590 *pict= *(AVFrame*)out;
7596 if(h->is_avc && !h->got_avcC) {
7597 int i, cnt, nalsize;
7598 unsigned char *p = avctx->extradata;
7599 if(avctx->extradata_size < 7) {
7600 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7604 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7607 /* sps and pps in the avcC always have length coded with 2 bytes,
7608 so put a fake nal_length_size = 2 while parsing them */
7609 h->nal_length_size = 2;
7610 // Decode sps from avcC
7611 cnt = *(p+5) & 0x1f; // Number of sps
7613 for (i = 0; i < cnt; i++) {
7614 nalsize = AV_RB16(p) + 2;
7615 if(decode_nal_units(h, p, nalsize) < 0) {
7616 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7621 // Decode pps from avcC
7622 cnt = *(p++); // Number of pps
7623 for (i = 0; i < cnt; i++) {
7624 nalsize = AV_RB16(p) + 2;
7625 if(decode_nal_units(h, p, nalsize) != nalsize) {
7626 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7631 // Now store right nal length size, that will be use to parse all other nals
7632 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7633 // Do not reparse avcC
7637 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7638 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7643 buf_index=decode_nal_units(h, buf, buf_size);
7647 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7648 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7649 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7653 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7654 Picture *out = s->current_picture_ptr;
7655 Picture *cur = s->current_picture_ptr;
7656 int i, pics, cross_idr, out_of_order, out_idx;
7660 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7661 s->current_picture_ptr->pict_type= s->pict_type;
7664 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7665 h->prev_poc_msb= h->poc_msb;
7666 h->prev_poc_lsb= h->poc_lsb;
7668 h->prev_frame_num_offset= h->frame_num_offset;
7669 h->prev_frame_num= h->frame_num;
7672 * FIXME: Error handling code does not seem to support interlaced
7673 * when slices span multiple rows
7674 * The ff_er_add_slice calls don't work right for bottom
7675 * fields; they cause massive erroneous error concealing
7676 * Error marking covers both fields (top and bottom).
7677 * This causes a mismatched s->error_count
7678 * and a bad error table. Further, the error count goes to
7679 * INT_MAX when called for bottom field, because mb_y is
7680 * past end by one (callers fault) and resync_mb_y != 0
7681 * causes problems for the first MB line, too.
7688 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7689 /* Wait for second field. */
7693 cur->repeat_pict = 0;
7695 /* Signal interlacing information externally. */
7696 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7697 if(h->sps.pic_struct_present_flag){
7698 switch (h->sei_pic_struct)
7700 case SEI_PIC_STRUCT_FRAME:
7701 cur->interlaced_frame = 0;
7703 case SEI_PIC_STRUCT_TOP_FIELD:
7704 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7705 case SEI_PIC_STRUCT_TOP_BOTTOM:
7706 case SEI_PIC_STRUCT_BOTTOM_TOP:
7707 cur->interlaced_frame = 1;
7709 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7710 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7711 // Signal the possibility of telecined film externally (pic_struct 5,6)
7712 // From these hints, let the applications decide if they apply deinterlacing.
7713 cur->repeat_pict = 1;
7714 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7716 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7717 // Force progressive here, as doubling interlaced frame is a bad idea.
7718 cur->interlaced_frame = 0;
7719 cur->repeat_pict = 2;
7721 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7722 cur->interlaced_frame = 0;
7723 cur->repeat_pict = 4;
7727 /* Derive interlacing flag from used decoding process. */
7728 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7731 if (cur->field_poc[0] != cur->field_poc[1]){
7732 /* Derive top_field_first from field pocs. */
7733 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7735 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7736 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7737 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7738 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7739 cur->top_field_first = 1;
7741 cur->top_field_first = 0;
7743 /* Most likely progressive */
7744 cur->top_field_first = 0;
7748 //FIXME do something with unavailable reference frames
7750 /* Sort B-frames into display order */
7752 if(h->sps.bitstream_restriction_flag
7753 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7754 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7758 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7759 && !h->sps.bitstream_restriction_flag){
7760 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7765 while(h->delayed_pic[pics]) pics++;
7767 assert(pics <= MAX_DELAYED_PIC_COUNT);
7769 h->delayed_pic[pics++] = cur;
7770 if(cur->reference == 0)
7771 cur->reference = DELAYED_PIC_REF;
7773 out = h->delayed_pic[0];
7775 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7776 if(h->delayed_pic[i]->poc < out->poc){
7777 out = h->delayed_pic[i];
7780 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7782 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7784 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7786 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7788 ((!cross_idr && out->poc > h->outputed_poc + 2)
7789 || cur->pict_type == FF_B_TYPE)))
7792 s->avctx->has_b_frames++;
7795 if(out_of_order || pics > s->avctx->has_b_frames){
7796 out->reference &= ~DELAYED_PIC_REF;
7797 for(i=out_idx; h->delayed_pic[i]; i++)
7798 h->delayed_pic[i] = h->delayed_pic[i+1];
7800 if(!out_of_order && pics > s->avctx->has_b_frames){
7801 *data_size = sizeof(AVFrame);
7803 h->outputed_poc = out->poc;
7804 *pict= *(AVFrame*)out;
7806 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7811 assert(pict->data[0] || !*data_size);
7812 ff_print_debug_info(s, pict);
7813 //printf("out %d\n", (int)pict->data[0]);
7816 /* Return the Picture timestamp as the frame number */
7817 /* we subtract 1 because it is added on utils.c */
7818 avctx->frame_number = s->picture_number - 1;
7820 return get_consumed_bytes(s, buf_index, buf_size);
7823 static inline void fill_mb_avail(H264Context *h){
7824 MpegEncContext * const s = &h->s;
7825 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7828 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7829 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7830 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7836 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7837 h->mb_avail[4]= 1; //FIXME move out
7838 h->mb_avail[5]= 0; //FIXME move out
7846 #define SIZE (COUNT*40)
7852 // int int_temp[10000];
7854 AVCodecContext avctx;
7856 dsputil_init(&dsp, &avctx);
7858 init_put_bits(&pb, temp, SIZE);
7859 printf("testing unsigned exp golomb\n");
7860 for(i=0; i<COUNT; i++){
7862 set_ue_golomb(&pb, i);
7863 STOP_TIMER("set_ue_golomb");
7865 flush_put_bits(&pb);
7867 init_get_bits(&gb, temp, 8*SIZE);
7868 for(i=0; i<COUNT; i++){
7871 s= show_bits(&gb, 24);
7874 j= get_ue_golomb(&gb);
7876 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7879 STOP_TIMER("get_ue_golomb");
7883 init_put_bits(&pb, temp, SIZE);
7884 printf("testing signed exp golomb\n");
7885 for(i=0; i<COUNT; i++){
7887 set_se_golomb(&pb, i - COUNT/2);
7888 STOP_TIMER("set_se_golomb");
7890 flush_put_bits(&pb);
7892 init_get_bits(&gb, temp, 8*SIZE);
7893 for(i=0; i<COUNT; i++){
7896 s= show_bits(&gb, 24);
7899 j= get_se_golomb(&gb);
7900 if(j != i - COUNT/2){
7901 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7904 STOP_TIMER("get_se_golomb");
7908 printf("testing 4x4 (I)DCT\n");
7911 uint8_t src[16], ref[16];
7912 uint64_t error= 0, max_error=0;
7914 for(i=0; i<COUNT; i++){
7916 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7917 for(j=0; j<16; j++){
7918 ref[j]= random()%255;
7919 src[j]= random()%255;
7922 h264_diff_dct_c(block, src, ref, 4);
7925 for(j=0; j<16; j++){
7926 // printf("%d ", block[j]);
7927 block[j]= block[j]*4;
7928 if(j&1) block[j]= (block[j]*4 + 2)/5;
7929 if(j&4) block[j]= (block[j]*4 + 2)/5;
7933 s->dsp.h264_idct_add(ref, block, 4);
7934 /* for(j=0; j<16; j++){
7935 printf("%d ", ref[j]);
7939 for(j=0; j<16; j++){
7940 int diff= FFABS(src[j] - ref[j]);
7943 max_error= FFMAX(max_error, diff);
7946 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7947 printf("testing quantizer\n");
7948 for(qp=0; qp<52; qp++){
7950 src1_block[i]= src2_block[i]= random()%255;
7953 printf("Testing NAL layer\n");
7955 uint8_t bitstream[COUNT];
7956 uint8_t nal[COUNT*2];
7958 memset(&h, 0, sizeof(H264Context));
7960 for(i=0; i<COUNT; i++){
7968 for(j=0; j<COUNT; j++){
7969 bitstream[j]= (random() % 255) + 1;
7972 for(j=0; j<zeros; j++){
7973 int pos= random() % COUNT;
7974 while(bitstream[pos] == 0){
7983 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7985 printf("encoding failed\n");
7989 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7993 if(out_length != COUNT){
7994 printf("incorrect length %d %d\n", out_length, COUNT);
7998 if(consumed != nal_length){
7999 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8003 if(memcmp(bitstream, out, COUNT)){
8004 printf("mismatch\n");
8010 printf("Testing RBSP\n");
8018 static av_cold int decode_end(AVCodecContext *avctx)
8020 H264Context *h = avctx->priv_data;
8021 MpegEncContext *s = &h->s;
8024 av_freep(&h->rbsp_buffer[0]);
8025 av_freep(&h->rbsp_buffer[1]);
8026 free_tables(h); //FIXME cleanup init stuff perhaps
8028 for(i = 0; i < MAX_SPS_COUNT; i++)
8029 av_freep(h->sps_buffers + i);
8031 for(i = 0; i < MAX_PPS_COUNT; i++)
8032 av_freep(h->pps_buffers + i);
8036 // memset(h, 0, sizeof(H264Context));
8042 AVCodec h264_decoder = {
8046 sizeof(H264Context),
8051 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8053 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),