2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 const uint8_t ff_rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 const uint8_t ff_div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
925 static inline void direct_ref_list_init(H264Context * const h){
926 MpegEncContext * const s = &h->s;
927 Picture * const ref1 = &h->ref_list[1][0];
928 Picture * const cur = s->current_picture_ptr;
929 int list, i, j, field, rfield;
930 int sidx= s->picture_structure&1;
931 int ref1sidx= ref1->reference&1;
932 for(list=0; list<2; list++){
933 cur->ref_count[sidx][list] = h->ref_count[list];
934 for(j=0; j<h->ref_count[list]; j++)
935 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
937 if(s->picture_structure == PICT_FRAME){
938 memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
939 memcpy(cur->ref_poc [0], cur->ref_poc [1], sizeof(cur->ref_poc [0]));
941 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
943 for(list=0; list<2; list++){
944 for(field=0; field<2; field++){
945 for(i=0; i<ref1->ref_count[field][list]; i++){
946 for(rfield=0; rfield<2; rfield++){
947 int poc = ref1->ref_poc[field][list][i];
949 poc= (poc&~3) + rfield + 1;
951 h->map_col_to_list0_field[field][list][2*i+rfield] = 0; /* bogus; fills in for missing frames */
952 for(j=16; j<16+2*h->ref_count[list]; j++)
953 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
954 h->map_col_to_list0_field[field][list][2*i+rfield] = j-16;
961 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
962 int poc = ref1->ref_poc[ref1sidx][list][i];
963 if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
964 poc= (poc&~3) + s->picture_structure;
965 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
966 for(j=0; j<h->ref_count[list]; j++)
967 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
968 h->map_col_to_list0[list][i] = j;
975 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
976 MpegEncContext * const s = &h->s;
977 int b8_stride = h->b8_stride;
978 int b4_stride = h->b_stride;
979 int mb_xy = h->mb_xy;
981 const int16_t (*l1mv0)[2], (*l1mv1)[2];
982 const int8_t *l1ref0, *l1ref1;
983 const int is_b8x8 = IS_8X8(*mb_type);
984 unsigned int sub_mb_type;
987 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
989 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
990 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
991 int cur_poc = s->current_picture_ptr->poc;
992 int *col_poc = h->ref_list[1]->field_poc;
993 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
994 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
996 }else if(!(s->picture_structure & h->ref_list[1][0].reference)){// FL -> FL & differ parity
997 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
998 mb_xy += s->mb_stride*fieldoff;
1001 }else{ // AFL/AFR/FR/FL -> AFR/FR
1002 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1003 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1004 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1005 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1008 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1009 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1010 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1012 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1013 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1015 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1016 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1018 }else{ // AFR/FR -> AFR/FR
1021 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1022 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1023 /* FIXME save sub mb types from previous frames (or derive from MVs)
1024 * so we know exactly what block size to use */
1025 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1026 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1027 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1028 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1029 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1031 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1032 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1037 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1038 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1039 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1040 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1043 l1ref0 += h->b8_stride;
1044 l1ref1 += h->b8_stride;
1045 l1mv0 += 2*b4_stride;
1046 l1mv1 += 2*b4_stride;
1050 if(h->direct_spatial_mv_pred){
1055 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1057 /* ref = min(neighbors) */
1058 for(list=0; list<2; list++){
1059 int refa = h->ref_cache[list][scan8[0] - 1];
1060 int refb = h->ref_cache[list][scan8[0] - 8];
1061 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1062 if(refc == PART_NOT_AVAILABLE)
1063 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1064 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1069 if(ref[0] < 0 && ref[1] < 0){
1070 ref[0] = ref[1] = 0;
1071 mv[0][0] = mv[0][1] =
1072 mv[1][0] = mv[1][1] = 0;
1074 for(list=0; list<2; list++){
1076 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1078 mv[list][0] = mv[list][1] = 0;
1084 *mb_type &= ~MB_TYPE_L1;
1085 sub_mb_type &= ~MB_TYPE_L1;
1086 }else if(ref[0] < 0){
1088 *mb_type &= ~MB_TYPE_L0;
1089 sub_mb_type &= ~MB_TYPE_L0;
1092 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1093 for(i8=0; i8<4; i8++){
1096 int xy8 = x8+y8*b8_stride;
1097 int xy4 = 3*x8+y8*b4_stride;
1100 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1102 h->sub_mb_type[i8] = sub_mb_type;
1104 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1105 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1106 if(!IS_INTRA(mb_type_col[y8])
1107 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1108 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1110 a= pack16to32(mv[0][0],mv[0][1]);
1112 b= pack16to32(mv[1][0],mv[1][1]);
1114 a= pack16to32(mv[0][0],mv[0][1]);
1115 b= pack16to32(mv[1][0],mv[1][1]);
1117 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1118 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1120 }else if(IS_16X16(*mb_type)){
1123 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1124 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1125 if(!IS_INTRA(mb_type_col[0])
1126 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1127 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1128 && (h->x264_build>33 || !h->x264_build)))){
1130 a= pack16to32(mv[0][0],mv[0][1]);
1132 b= pack16to32(mv[1][0],mv[1][1]);
1134 a= pack16to32(mv[0][0],mv[0][1]);
1135 b= pack16to32(mv[1][0],mv[1][1]);
1137 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1138 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1140 for(i8=0; i8<4; i8++){
1141 const int x8 = i8&1;
1142 const int y8 = i8>>1;
1144 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1146 h->sub_mb_type[i8] = sub_mb_type;
1148 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1150 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1151 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1154 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1155 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1156 && (h->x264_build>33 || !h->x264_build)))){
1157 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1158 if(IS_SUB_8X8(sub_mb_type)){
1159 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1160 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1162 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1167 for(i4=0; i4<4; i4++){
1168 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1169 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1171 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1173 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1179 }else{ /* direct temporal mv pred */
1180 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1181 const int *dist_scale_factor = h->dist_scale_factor;
1183 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1184 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1185 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1186 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1188 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1189 /* FIXME assumes direct_8x8_inference == 1 */
1190 int y_shift = 2*!IS_INTERLACED(*mb_type);
1191 int ref_shift= FRAME_MBAFF ? y_shift : 1;
1193 for(i8=0; i8<4; i8++){
1194 const int x8 = i8&1;
1195 const int y8 = i8>>1;
1197 const int16_t (*l1mv)[2]= l1mv0;
1199 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1201 h->sub_mb_type[i8] = sub_mb_type;
1203 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1204 if(IS_INTRA(mb_type_col[y8])){
1205 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1206 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1207 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1211 ref0 = l1ref0[x8 + y8*b8_stride];
1213 ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
1215 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
1218 scale = dist_scale_factor[ref0];
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1222 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1223 int my_col = (mv_col[1]<<y_shift)/2;
1224 int mx = (scale * mv_col[0] + 128) >> 8;
1225 int my = (scale * my_col + 128) >> 8;
1226 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1227 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1233 /* one-to-one mv scaling */
1235 if(IS_16X16(*mb_type)){
1238 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1239 if(IS_INTRA(mb_type_col[0])){
1242 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1243 : map_col_to_list0[1][l1ref1[0]];
1244 const int scale = dist_scale_factor[ref0];
1245 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1247 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1248 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1250 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1251 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1253 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1254 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1255 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1257 for(i8=0; i8<4; i8++){
1258 const int x8 = i8&1;
1259 const int y8 = i8>>1;
1261 const int16_t (*l1mv)[2]= l1mv0;
1263 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1265 h->sub_mb_type[i8] = sub_mb_type;
1266 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1267 if(IS_INTRA(mb_type_col[0])){
1268 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1269 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1270 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1274 ref0 = l1ref0[x8 + y8*b8_stride];
1276 ref0 = map_col_to_list0[0][ref0];
1278 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]];
1281 scale = dist_scale_factor[ref0];
1283 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1284 if(IS_SUB_8X8(sub_mb_type)){
1285 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1286 int mx = (scale * mv_col[0] + 128) >> 8;
1287 int my = (scale * mv_col[1] + 128) >> 8;
1288 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1289 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1291 for(i4=0; i4<4; i4++){
1292 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1293 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1294 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1295 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1296 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1297 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1304 static inline void write_back_motion(H264Context *h, int mb_type){
1305 MpegEncContext * const s = &h->s;
1306 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1307 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1310 if(!USES_LIST(mb_type, 0))
1311 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1313 for(list=0; list<h->list_count; list++){
1315 if(!USES_LIST(mb_type, list))
1319 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1320 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1322 if( h->pps.cabac ) {
1323 if(IS_SKIP(mb_type))
1324 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1327 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1328 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1333 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1334 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1335 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1336 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1337 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1341 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1342 if(IS_8X8(mb_type)){
1343 uint8_t *direct_table = &h->direct_table[b8_xy];
1344 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1345 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1346 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1352 * Decodes a network abstraction layer unit.
1353 * @param consumed is the number of bytes used as input
1354 * @param length is the length of the array
1355 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1356 * @returns decoded bytes, might be src+1 if no escapes
1358 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1363 // src[0]&0x80; //forbidden bit
1364 h->nal_ref_idc= src[0]>>5;
1365 h->nal_unit_type= src[0]&0x1F;
1369 for(i=0; i<length; i++)
1370 printf("%2X ", src[i]);
1372 for(i=0; i+1<length; i+=2){
1373 if(src[i]) continue;
1374 if(i>0 && src[i-1]==0) i--;
1375 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1377 /* startcode, so we must be past the end */
1384 if(i>=length-1){ //no escaped 0
1385 *dst_length= length;
1386 *consumed= length+1; //+1 for the header
1390 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1391 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1392 dst= h->rbsp_buffer[bufidx];
1398 //printf("decoding esc\n");
1401 //remove escapes (very rare 1:2^22)
1402 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1403 if(src[si+2]==3){ //escape
1408 }else //next start code
1412 dst[di++]= src[si++];
1416 *consumed= si + 1;//+1 for the header
1417 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1422 * identifies the exact end of the bitstream
1423 * @return the length of the trailing, or 0 if damaged
1425 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1429 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1439 * IDCT transforms the 16 dc values and dequantizes them.
1440 * @param qp quantization parameter
1442 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1445 int temp[16]; //FIXME check if this is a good idea
1446 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1447 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1449 //memset(block, 64, 2*256);
1452 const int offset= y_offset[i];
1453 const int z0= block[offset+stride*0] + block[offset+stride*4];
1454 const int z1= block[offset+stride*0] - block[offset+stride*4];
1455 const int z2= block[offset+stride*1] - block[offset+stride*5];
1456 const int z3= block[offset+stride*1] + block[offset+stride*5];
1465 const int offset= x_offset[i];
1466 const int z0= temp[4*0+i] + temp[4*2+i];
1467 const int z1= temp[4*0+i] - temp[4*2+i];
1468 const int z2= temp[4*1+i] - temp[4*3+i];
1469 const int z3= temp[4*1+i] + temp[4*3+i];
1471 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1472 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1473 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1474 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1480 * DCT transforms the 16 dc values.
1481 * @param qp quantization parameter ??? FIXME
1483 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1484 // const int qmul= dequant_coeff[qp][0];
1486 int temp[16]; //FIXME check if this is a good idea
1487 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1488 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1491 const int offset= y_offset[i];
1492 const int z0= block[offset+stride*0] + block[offset+stride*4];
1493 const int z1= block[offset+stride*0] - block[offset+stride*4];
1494 const int z2= block[offset+stride*1] - block[offset+stride*5];
1495 const int z3= block[offset+stride*1] + block[offset+stride*5];
1504 const int offset= x_offset[i];
1505 const int z0= temp[4*0+i] + temp[4*2+i];
1506 const int z1= temp[4*0+i] - temp[4*2+i];
1507 const int z2= temp[4*1+i] - temp[4*3+i];
1508 const int z3= temp[4*1+i] + temp[4*3+i];
1510 block[stride*0 +offset]= (z0 + z3)>>1;
1511 block[stride*2 +offset]= (z1 + z2)>>1;
1512 block[stride*8 +offset]= (z1 - z2)>>1;
1513 block[stride*10+offset]= (z0 - z3)>>1;
1521 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1522 const int stride= 16*2;
1523 const int xStride= 16;
1526 a= block[stride*0 + xStride*0];
1527 b= block[stride*0 + xStride*1];
1528 c= block[stride*1 + xStride*0];
1529 d= block[stride*1 + xStride*1];
1536 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1537 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1538 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1539 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1543 static void chroma_dc_dct_c(DCTELEM *block){
1544 const int stride= 16*2;
1545 const int xStride= 16;
1548 a= block[stride*0 + xStride*0];
1549 b= block[stride*0 + xStride*1];
1550 c= block[stride*1 + xStride*0];
1551 d= block[stride*1 + xStride*1];
1558 block[stride*0 + xStride*0]= (a+c);
1559 block[stride*0 + xStride*1]= (e+b);
1560 block[stride*1 + xStride*0]= (a-c);
1561 block[stride*1 + xStride*1]= (e-b);
1566 * gets the chroma qp.
1568 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1569 return h->pps.chroma_qp_table[t][qscale];
1572 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1573 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1574 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1576 const int * const quant_table= quant_coeff[qscale];
1577 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1578 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1579 const unsigned int threshold2= (threshold1<<1);
1585 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1586 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1587 const unsigned int dc_threshold2= (dc_threshold1<<1);
1589 int level= block[0]*quant_coeff[qscale+18][0];
1590 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1592 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1595 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1598 // last_non_zero = i;
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_table[0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1613 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1616 // last_non_zero = i;
1629 const int j= scantable[i];
1630 int level= block[j]*quant_table[j];
1632 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1633 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1634 if(((unsigned)(level+threshold1))>threshold2){
1636 level= (bias + level)>>QUANT_SHIFT;
1639 level= (bias - level)>>QUANT_SHIFT;
1648 return last_non_zero;
1651 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1652 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1653 int src_x_offset, int src_y_offset,
1654 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1655 MpegEncContext * const s = &h->s;
1656 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1657 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1658 const int luma_xy= (mx&3) + ((my&3)<<2);
1659 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1660 uint8_t * src_cb, * src_cr;
1661 int extra_width= h->emu_edge_width;
1662 int extra_height= h->emu_edge_height;
1664 const int full_mx= mx>>2;
1665 const int full_my= my>>2;
1666 const int pic_width = 16*s->mb_width;
1667 const int pic_height = 16*s->mb_height >> MB_FIELD;
1669 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1672 if(mx&7) extra_width -= 3;
1673 if(my&7) extra_height -= 3;
1675 if( full_mx < 0-extra_width
1676 || full_my < 0-extra_height
1677 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1678 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1679 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1680 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1684 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1686 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1689 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1692 // chroma offset when predicting from a field of opposite parity
1693 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1694 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1696 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1697 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1700 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1701 src_cb= s->edge_emu_buffer;
1703 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1706 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1707 src_cr= s->edge_emu_buffer;
1709 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1712 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1713 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1714 int x_offset, int y_offset,
1715 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1716 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1717 int list0, int list1){
1718 MpegEncContext * const s = &h->s;
1719 qpel_mc_func *qpix_op= qpix_put;
1720 h264_chroma_mc_func chroma_op= chroma_put;
1722 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1723 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1724 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1725 x_offset += 8*s->mb_x;
1726 y_offset += 8*(s->mb_y >> MB_FIELD);
1729 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1730 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1731 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1732 qpix_op, chroma_op);
1735 chroma_op= chroma_avg;
1739 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1740 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1741 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1742 qpix_op, chroma_op);
1746 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1747 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1748 int x_offset, int y_offset,
1749 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1750 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1751 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1752 int list0, int list1){
1753 MpegEncContext * const s = &h->s;
1755 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1756 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1757 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1758 x_offset += 8*s->mb_x;
1759 y_offset += 8*(s->mb_y >> MB_FIELD);
1762 /* don't optimize for luma-only case, since B-frames usually
1763 * use implicit weights => chroma too. */
1764 uint8_t *tmp_cb = s->obmc_scratchpad;
1765 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1766 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1767 int refn0 = h->ref_cache[0][ scan8[n] ];
1768 int refn1 = h->ref_cache[1][ scan8[n] ];
1770 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1771 dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put);
1773 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1774 tmp_y, tmp_cb, tmp_cr,
1775 x_offset, y_offset, qpix_put, chroma_put);
1777 if(h->use_weight == 2){
1778 int weight0 = h->implicit_weight[refn0][refn1];
1779 int weight1 = 64 - weight0;
1780 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1781 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1782 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1784 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1785 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1786 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1787 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1788 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1789 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1790 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1791 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1792 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1795 int list = list1 ? 1 : 0;
1796 int refn = h->ref_cache[list][ scan8[n] ];
1797 Picture *ref= &h->ref_list[list][refn];
1798 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1799 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1800 qpix_put, chroma_put);
1802 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1804 if(h->use_weight_chroma){
1805 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1807 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1808 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1813 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1814 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1815 int x_offset, int y_offset,
1816 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1817 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1818 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1819 int list0, int list1){
1820 if((h->use_weight==2 && list0 && list1
1821 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1822 || h->use_weight==1)
1823 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1824 x_offset, y_offset, qpix_put, chroma_put,
1825 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1827 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1828 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1831 static inline void prefetch_motion(H264Context *h, int list){
1832 /* fetch pixels for estimated mv 4 macroblocks ahead
1833 * optimized for 64byte cache lines */
1834 MpegEncContext * const s = &h->s;
1835 const int refn = h->ref_cache[list][scan8[0]];
1837 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1838 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1839 uint8_t **src= h->ref_list[list][refn].data;
1840 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1841 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1842 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1843 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1847 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1848 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1849 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1850 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1851 MpegEncContext * const s = &h->s;
1852 const int mb_xy= h->mb_xy;
1853 const int mb_type= s->current_picture.mb_type[mb_xy];
1855 assert(IS_INTER(mb_type));
1857 prefetch_motion(h, 0);
1859 if(IS_16X16(mb_type)){
1860 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1861 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1862 &weight_op[0], &weight_avg[0],
1863 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1864 }else if(IS_16X8(mb_type)){
1865 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1866 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1867 &weight_op[1], &weight_avg[1],
1868 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1869 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1870 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1871 &weight_op[1], &weight_avg[1],
1872 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1873 }else if(IS_8X16(mb_type)){
1874 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1875 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1876 &weight_op[2], &weight_avg[2],
1877 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1878 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1879 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1880 &weight_op[2], &weight_avg[2],
1881 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1885 assert(IS_8X8(mb_type));
1888 const int sub_mb_type= h->sub_mb_type[i];
1890 int x_offset= (i&1)<<2;
1891 int y_offset= (i&2)<<1;
1893 if(IS_SUB_8X8(sub_mb_type)){
1894 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1895 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1896 &weight_op[3], &weight_avg[3],
1897 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1898 }else if(IS_SUB_8X4(sub_mb_type)){
1899 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1900 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1901 &weight_op[4], &weight_avg[4],
1902 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1903 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1904 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1905 &weight_op[4], &weight_avg[4],
1906 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1907 }else if(IS_SUB_4X8(sub_mb_type)){
1908 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1909 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1910 &weight_op[5], &weight_avg[5],
1911 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1912 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1913 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1914 &weight_op[5], &weight_avg[5],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1918 assert(IS_SUB_4X4(sub_mb_type));
1920 int sub_x_offset= x_offset + 2*(j&1);
1921 int sub_y_offset= y_offset + (j&2);
1922 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1923 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1924 &weight_op[6], &weight_avg[6],
1925 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1931 prefetch_motion(h, 1);
1934 static av_cold void decode_init_vlc(void){
1935 static int done = 0;
1942 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1943 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1944 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1945 &chroma_dc_coeff_token_len [0], 1, 1,
1946 &chroma_dc_coeff_token_bits[0], 1, 1,
1947 INIT_VLC_USE_NEW_STATIC);
1951 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1952 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1953 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1954 &coeff_token_len [i][0], 1, 1,
1955 &coeff_token_bits[i][0], 1, 1,
1956 INIT_VLC_USE_NEW_STATIC);
1957 offset += coeff_token_vlc_tables_size[i];
1960 * This is a one time safety check to make sure that
1961 * the packed static coeff_token_vlc table sizes
1962 * were initialized correctly.
1964 assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
1967 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1968 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1969 init_vlc(&chroma_dc_total_zeros_vlc[i],
1970 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1971 &chroma_dc_total_zeros_len [i][0], 1, 1,
1972 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1973 INIT_VLC_USE_NEW_STATIC);
1975 for(i=0; i<15; i++){
1976 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1977 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1978 init_vlc(&total_zeros_vlc[i],
1979 TOTAL_ZEROS_VLC_BITS, 16,
1980 &total_zeros_len [i][0], 1, 1,
1981 &total_zeros_bits[i][0], 1, 1,
1982 INIT_VLC_USE_NEW_STATIC);
1986 run_vlc[i].table = run_vlc_tables[i];
1987 run_vlc[i].table_allocated = run_vlc_tables_size;
1988 init_vlc(&run_vlc[i],
1990 &run_len [i][0], 1, 1,
1991 &run_bits[i][0], 1, 1,
1992 INIT_VLC_USE_NEW_STATIC);
1994 run7_vlc.table = run7_vlc_table,
1995 run7_vlc.table_allocated = run7_vlc_table_size;
1996 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1997 &run_len [6][0], 1, 1,
1998 &run_bits[6][0], 1, 1,
1999 INIT_VLC_USE_NEW_STATIC);
2003 static void free_tables(H264Context *h){
2006 av_freep(&h->intra4x4_pred_mode);
2007 av_freep(&h->chroma_pred_mode_table);
2008 av_freep(&h->cbp_table);
2009 av_freep(&h->mvd_table[0]);
2010 av_freep(&h->mvd_table[1]);
2011 av_freep(&h->direct_table);
2012 av_freep(&h->non_zero_count);
2013 av_freep(&h->slice_table_base);
2014 h->slice_table= NULL;
2016 av_freep(&h->mb2b_xy);
2017 av_freep(&h->mb2b8_xy);
2019 for(i = 0; i < MAX_SPS_COUNT; i++)
2020 av_freep(h->sps_buffers + i);
2022 for(i = 0; i < MAX_PPS_COUNT; i++)
2023 av_freep(h->pps_buffers + i);
2025 for(i = 0; i < h->s.avctx->thread_count; i++) {
2026 hx = h->thread_context[i];
2028 av_freep(&hx->top_borders[1]);
2029 av_freep(&hx->top_borders[0]);
2030 av_freep(&hx->s.obmc_scratchpad);
2034 static void init_dequant8_coeff_table(H264Context *h){
2036 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2037 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2038 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2040 for(i=0; i<2; i++ ){
2041 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2042 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2046 for(q=0; q<52; q++){
2047 int shift = ff_div6[q];
2048 int idx = ff_rem6[q];
2050 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2051 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2052 h->pps.scaling_matrix8[i][x]) << shift;
2057 static void init_dequant4_coeff_table(H264Context *h){
2059 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2060 for(i=0; i<6; i++ ){
2061 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2063 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2064 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2071 for(q=0; q<52; q++){
2072 int shift = ff_div6[q] + 2;
2073 int idx = ff_rem6[q];
2075 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2076 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2077 h->pps.scaling_matrix4[i][x]) << shift;
2082 static void init_dequant_tables(H264Context *h){
2084 init_dequant4_coeff_table(h);
2085 if(h->pps.transform_8x8_mode)
2086 init_dequant8_coeff_table(h);
2087 if(h->sps.transform_bypass){
2090 h->dequant4_coeff[i][0][x] = 1<<6;
2091 if(h->pps.transform_8x8_mode)
2094 h->dequant8_coeff[i][0][x] = 1<<6;
2101 * needs width/height
2103 static int alloc_tables(H264Context *h){
2104 MpegEncContext * const s = &h->s;
2105 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2108 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2110 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2111 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2112 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2114 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2115 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2116 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2117 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2119 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2120 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2122 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2123 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2124 for(y=0; y<s->mb_height; y++){
2125 for(x=0; x<s->mb_width; x++){
2126 const int mb_xy= x + y*s->mb_stride;
2127 const int b_xy = 4*x + 4*y*h->b_stride;
2128 const int b8_xy= 2*x + 2*y*h->b8_stride;
2130 h->mb2b_xy [mb_xy]= b_xy;
2131 h->mb2b8_xy[mb_xy]= b8_xy;
2135 s->obmc_scratchpad = NULL;
2137 if(!h->dequant4_coeff[0])
2138 init_dequant_tables(h);
2147 * Mimic alloc_tables(), but for every context thread.
2149 static void clone_tables(H264Context *dst, H264Context *src){
2150 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2151 dst->non_zero_count = src->non_zero_count;
2152 dst->slice_table = src->slice_table;
2153 dst->cbp_table = src->cbp_table;
2154 dst->mb2b_xy = src->mb2b_xy;
2155 dst->mb2b8_xy = src->mb2b8_xy;
2156 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2157 dst->mvd_table[0] = src->mvd_table[0];
2158 dst->mvd_table[1] = src->mvd_table[1];
2159 dst->direct_table = src->direct_table;
2161 dst->s.obmc_scratchpad = NULL;
2162 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2167 * Allocate buffers which are not shared amongst multiple threads.
2169 static int context_init(H264Context *h){
2170 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2171 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2175 return -1; // free_tables will clean up for us
2178 static av_cold void common_init(H264Context *h){
2179 MpegEncContext * const s = &h->s;
2181 s->width = s->avctx->width;
2182 s->height = s->avctx->height;
2183 s->codec_id= s->avctx->codec->id;
2185 ff_h264_pred_init(&h->hpc, s->codec_id);
2187 h->dequant_coeff_pps= -1;
2188 s->unrestricted_mv=1;
2189 s->decode=1; //FIXME
2191 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2192 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2195 static av_cold int decode_init(AVCodecContext *avctx){
2196 H264Context *h= avctx->priv_data;
2197 MpegEncContext * const s = &h->s;
2199 MPV_decode_defaults(s);
2204 s->out_format = FMT_H264;
2205 s->workaround_bugs= avctx->workaround_bugs;
2208 // s->decode_mb= ff_h263_decode_mb;
2209 s->quarter_sample = 1;
2212 if(avctx->codec_id == CODEC_ID_SVQ3)
2213 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2215 avctx->pix_fmt= PIX_FMT_YUV420P;
2219 if(avctx->extradata_size > 0 && avctx->extradata &&
2220 *(char *)avctx->extradata == 1){
2227 h->thread_context[0] = h;
2228 h->outputed_poc = INT_MIN;
2229 h->prev_poc_msb= 1<<16;
2233 static int frame_start(H264Context *h){
2234 MpegEncContext * const s = &h->s;
2237 if(MPV_frame_start(s, s->avctx) < 0)
2239 ff_er_frame_start(s);
2241 * MPV_frame_start uses pict_type to derive key_frame.
2242 * This is incorrect for H.264; IDR markings must be used.
2243 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2244 * See decode_nal_units().
2246 s->current_picture_ptr->key_frame= 0;
2248 assert(s->linesize && s->uvlinesize);
2250 for(i=0; i<16; i++){
2251 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2252 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2255 h->block_offset[16+i]=
2256 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2257 h->block_offset[24+16+i]=
2258 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2261 /* can't be in alloc_tables because linesize isn't known there.
2262 * FIXME: redo bipred weight to not require extra buffer? */
2263 for(i = 0; i < s->avctx->thread_count; i++)
2264 if(!h->thread_context[i]->s.obmc_scratchpad)
2265 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2267 /* some macroblocks will be accessed before they're available */
2268 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2269 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2271 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2273 // We mark the current picture as non-reference after allocating it, so
2274 // that if we break out due to an error it can be released automatically
2275 // in the next MPV_frame_start().
2276 // SVQ3 as well as most other codecs have only last/next/current and thus
2277 // get released even with set reference, besides SVQ3 and others do not
2278 // mark frames as reference later "naturally".
2279 if(s->codec_id != CODEC_ID_SVQ3)
2280 s->current_picture_ptr->reference= 0;
2282 s->current_picture_ptr->field_poc[0]=
2283 s->current_picture_ptr->field_poc[1]= INT_MAX;
2284 assert(s->current_picture_ptr->long_ref==0);
2289 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2290 MpegEncContext * const s = &h->s;
2299 src_cb -= uvlinesize;
2300 src_cr -= uvlinesize;
2302 if(!simple && FRAME_MBAFF){
2304 offset = MB_MBAFF ? 1 : 17;
2305 uvoffset= MB_MBAFF ? 1 : 9;
2307 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2308 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2309 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2310 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2311 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2316 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2317 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2318 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2319 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2325 top_idx = MB_MBAFF ? 0 : 1;
2327 step= MB_MBAFF ? 2 : 1;
2330 // There are two lines saved, the line above the the top macroblock of a pair,
2331 // and the line above the bottom macroblock
2332 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2333 for(i=1; i<17 - skiplast; i++){
2334 h->left_border[offset+i*step]= src_y[15+i* linesize];
2337 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2338 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2340 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2341 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2342 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2343 for(i=1; i<9 - skiplast; i++){
2344 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2345 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2347 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2348 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2352 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2353 MpegEncContext * const s = &h->s;
2364 if(!simple && FRAME_MBAFF){
2366 offset = MB_MBAFF ? 1 : 17;
2367 uvoffset= MB_MBAFF ? 1 : 9;
2371 top_idx = MB_MBAFF ? 0 : 1;
2373 step= MB_MBAFF ? 2 : 1;
2376 if(h->deblocking_filter == 2) {
2378 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2379 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2381 deblock_left = (s->mb_x > 0);
2382 deblock_top = (s->mb_y > 0);
2385 src_y -= linesize + 1;
2386 src_cb -= uvlinesize + 1;
2387 src_cr -= uvlinesize + 1;
2389 #define XCHG(a,b,t,xchg)\
2396 for(i = !deblock_top; i<16; i++){
2397 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2399 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2403 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2404 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2405 if(s->mb_x+1 < s->mb_width){
2406 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2410 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2412 for(i = !deblock_top; i<8; i++){
2413 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2414 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2416 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2417 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2420 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2421 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2426 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2427 MpegEncContext * const s = &h->s;
2428 const int mb_x= s->mb_x;
2429 const int mb_y= s->mb_y;
2430 const int mb_xy= h->mb_xy;
2431 const int mb_type= s->current_picture.mb_type[mb_xy];
2432 uint8_t *dest_y, *dest_cb, *dest_cr;
2433 int linesize, uvlinesize /*dct_offset*/;
2435 int *block_offset = &h->block_offset[0];
2436 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2437 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2438 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2440 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2441 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2442 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2444 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2445 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2447 if (!simple && MB_FIELD) {
2448 linesize = h->mb_linesize = s->linesize * 2;
2449 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2450 block_offset = &h->block_offset[24];
2451 if(mb_y&1){ //FIXME move out of this function?
2452 dest_y -= s->linesize*15;
2453 dest_cb-= s->uvlinesize*7;
2454 dest_cr-= s->uvlinesize*7;
2458 for(list=0; list<h->list_count; list++){
2459 if(!USES_LIST(mb_type, list))
2461 if(IS_16X16(mb_type)){
2462 int8_t *ref = &h->ref_cache[list][scan8[0]];
2463 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2465 for(i=0; i<16; i+=4){
2466 int ref = h->ref_cache[list][scan8[i]];
2468 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2474 linesize = h->mb_linesize = s->linesize;
2475 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2476 // dct_offset = s->linesize * 16;
2479 if(transform_bypass){
2481 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2482 }else if(IS_8x8DCT(mb_type)){
2483 idct_dc_add = s->dsp.h264_idct8_dc_add;
2484 idct_add = s->dsp.h264_idct8_add;
2486 idct_dc_add = s->dsp.h264_idct_dc_add;
2487 idct_add = s->dsp.h264_idct_add;
2490 if (!simple && IS_INTRA_PCM(mb_type)) {
2491 for (i=0; i<16; i++) {
2492 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2494 for (i=0; i<8; i++) {
2495 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2496 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2499 if(IS_INTRA(mb_type)){
2500 if(h->deblocking_filter)
2501 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2503 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2504 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2505 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2508 if(IS_INTRA4x4(mb_type)){
2509 if(simple || !s->encoding){
2510 if(IS_8x8DCT(mb_type)){
2511 for(i=0; i<16; i+=4){
2512 uint8_t * const ptr= dest_y + block_offset[i];
2513 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2514 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2515 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2516 (h->topright_samples_available<<i)&0x4000, linesize);
2518 if(nnz == 1 && h->mb[i*16])
2519 idct_dc_add(ptr, h->mb + i*16, linesize);
2521 idct_add(ptr, h->mb + i*16, linesize);
2525 for(i=0; i<16; i++){
2526 uint8_t * const ptr= dest_y + block_offset[i];
2528 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2531 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2532 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2533 assert(mb_y || linesize <= block_offset[i]);
2534 if(!topright_avail){
2535 tr= ptr[3 - linesize]*0x01010101;
2536 topright= (uint8_t*) &tr;
2538 topright= ptr + 4 - linesize;
2542 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2543 nnz = h->non_zero_count_cache[ scan8[i] ];
2546 if(nnz == 1 && h->mb[i*16])
2547 idct_dc_add(ptr, h->mb + i*16, linesize);
2549 idct_add(ptr, h->mb + i*16, linesize);
2551 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2556 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2558 if(!transform_bypass)
2559 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2561 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2563 if(h->deblocking_filter)
2564 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2566 hl_motion(h, dest_y, dest_cb, dest_cr,
2567 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2568 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2569 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2573 if(!IS_INTRA4x4(mb_type)){
2575 if(IS_INTRA16x16(mb_type)){
2576 for(i=0; i<16; i++){
2577 if(h->non_zero_count_cache[ scan8[i] ])
2578 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2579 else if(h->mb[i*16])
2580 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2583 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2584 for(i=0; i<16; i+=di){
2585 int nnz = h->non_zero_count_cache[ scan8[i] ];
2587 if(nnz==1 && h->mb[i*16])
2588 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2590 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2595 for(i=0; i<16; i++){
2596 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2597 uint8_t * const ptr= dest_y + block_offset[i];
2598 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2604 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2605 uint8_t *dest[2] = {dest_cb, dest_cr};
2606 if(transform_bypass){
2607 idct_add = idct_dc_add = s->dsp.add_pixels4;
2609 idct_add = s->dsp.h264_idct_add;
2610 idct_dc_add = s->dsp.h264_idct_dc_add;
2611 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2612 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2615 for(i=16; i<16+8; i++){
2616 if(h->non_zero_count_cache[ scan8[i] ])
2617 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2618 else if(h->mb[i*16])
2619 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2622 for(i=16; i<16+8; i++){
2623 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2624 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2625 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2631 if(h->deblocking_filter) {
2632 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2633 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2634 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2635 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2636 if (!simple && FRAME_MBAFF) {
2637 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2639 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2645 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2647 static void hl_decode_mb_simple(H264Context *h){
2648 hl_decode_mb_internal(h, 1);
2652 * Process a macroblock; this handles edge cases, such as interlacing.
2654 static void av_noinline hl_decode_mb_complex(H264Context *h){
2655 hl_decode_mb_internal(h, 0);
2658 static void hl_decode_mb(H264Context *h){
2659 MpegEncContext * const s = &h->s;
2660 const int mb_xy= h->mb_xy;
2661 const int mb_type= s->current_picture.mb_type[mb_xy];
2662 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2663 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2665 if(ENABLE_H264_ENCODER && !s->decode)
2669 hl_decode_mb_complex(h);
2670 else hl_decode_mb_simple(h);
2673 static void pic_as_field(Picture *pic, const int parity){
2675 for (i = 0; i < 4; ++i) {
2676 if (parity == PICT_BOTTOM_FIELD)
2677 pic->data[i] += pic->linesize[i];
2678 pic->reference = parity;
2679 pic->linesize[i] *= 2;
2681 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2684 static int split_field_copy(Picture *dest, Picture *src,
2685 int parity, int id_add){
2686 int match = !!(src->reference & parity);
2690 if(parity != PICT_FRAME){
2691 pic_as_field(dest, parity);
2693 dest->pic_id += id_add;
2700 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2704 while(i[0]<len || i[1]<len){
2705 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2707 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2710 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2711 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2714 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2715 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2722 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2727 best_poc= dir ? INT_MIN : INT_MAX;
2729 for(i=0; i<len; i++){
2730 const int poc= src[i]->poc;
2731 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2733 sorted[out_i]= src[i];
2736 if(best_poc == (dir ? INT_MIN : INT_MAX))
2738 limit= sorted[out_i++]->poc - dir;
2744 * fills the default_ref_list.
2746 static int fill_default_ref_list(H264Context *h){
2747 MpegEncContext * const s = &h->s;
2750 if(h->slice_type_nos==FF_B_TYPE){
2751 Picture *sorted[32];
2756 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2758 cur_poc= s->current_picture_ptr->poc;
2760 for(list= 0; list<2; list++){
2761 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2762 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2764 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2765 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2768 if(len < h->ref_count[list])
2769 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2773 if(lens[0] == lens[1] && lens[1] > 1){
2774 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2776 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2779 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2780 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2782 if(len < h->ref_count[0])
2783 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2786 for (i=0; i<h->ref_count[0]; i++) {
2787 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2789 if(h->slice_type_nos==FF_B_TYPE){
2790 for (i=0; i<h->ref_count[1]; i++) {
2791 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2798 static void print_short_term(H264Context *h);
2799 static void print_long_term(H264Context *h);
2802 * Extract structure information about the picture described by pic_num in
2803 * the current decoding context (frame or field). Note that pic_num is
2804 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2805 * @param pic_num picture number for which to extract structure information
2806 * @param structure one of PICT_XXX describing structure of picture
2808 * @return frame number (short term) or long term index of picture
2809 * described by pic_num
2811 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2812 MpegEncContext * const s = &h->s;
2814 *structure = s->picture_structure;
2817 /* opposite field */
2818 *structure ^= PICT_FRAME;
2825 static int decode_ref_pic_list_reordering(H264Context *h){
2826 MpegEncContext * const s = &h->s;
2827 int list, index, pic_structure;
2829 print_short_term(h);
2832 for(list=0; list<h->list_count; list++){
2833 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2835 if(get_bits1(&s->gb)){
2836 int pred= h->curr_pic_num;
2838 for(index=0; ; index++){
2839 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2840 unsigned int pic_id;
2842 Picture *ref = NULL;
2844 if(reordering_of_pic_nums_idc==3)
2847 if(index >= h->ref_count[list]){
2848 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2852 if(reordering_of_pic_nums_idc<3){
2853 if(reordering_of_pic_nums_idc<2){
2854 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2857 if(abs_diff_pic_num > h->max_pic_num){
2858 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2862 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2863 else pred+= abs_diff_pic_num;
2864 pred &= h->max_pic_num - 1;
2866 frame_num = pic_num_extract(h, pred, &pic_structure);
2868 for(i= h->short_ref_count-1; i>=0; i--){
2869 ref = h->short_ref[i];
2870 assert(ref->reference);
2871 assert(!ref->long_ref);
2873 ref->frame_num == frame_num &&
2874 (ref->reference & pic_structure)
2882 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2884 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2887 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2890 ref = h->long_ref[long_idx];
2891 assert(!(ref && !ref->reference));
2892 if(ref && (ref->reference & pic_structure)){
2893 ref->pic_id= pic_id;
2894 assert(ref->long_ref);
2902 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2903 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2905 for(i=index; i+1<h->ref_count[list]; i++){
2906 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2909 for(; i > index; i--){
2910 h->ref_list[list][i]= h->ref_list[list][i-1];
2912 h->ref_list[list][index]= *ref;
2914 pic_as_field(&h->ref_list[list][index], pic_structure);
2918 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2924 for(list=0; list<h->list_count; list++){
2925 for(index= 0; index < h->ref_count[list]; index++){
2926 if(!h->ref_list[list][index].data[0]){
2927 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2928 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2936 static void fill_mbaff_ref_list(H264Context *h){
2938 for(list=0; list<2; list++){ //FIXME try list_count
2939 for(i=0; i<h->ref_count[list]; i++){
2940 Picture *frame = &h->ref_list[list][i];
2941 Picture *field = &h->ref_list[list][16+2*i];
2944 field[0].linesize[j] <<= 1;
2945 field[0].reference = PICT_TOP_FIELD;
2946 field[0].poc= field[0].field_poc[0];
2947 field[1] = field[0];
2949 field[1].data[j] += frame->linesize[j];
2950 field[1].reference = PICT_BOTTOM_FIELD;
2951 field[1].poc= field[1].field_poc[1];
2953 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2954 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2956 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2957 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2961 for(j=0; j<h->ref_count[1]; j++){
2962 for(i=0; i<h->ref_count[0]; i++)
2963 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2964 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2965 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2969 static int pred_weight_table(H264Context *h){
2970 MpegEncContext * const s = &h->s;
2972 int luma_def, chroma_def;
2975 h->use_weight_chroma= 0;
2976 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2977 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2978 luma_def = 1<<h->luma_log2_weight_denom;
2979 chroma_def = 1<<h->chroma_log2_weight_denom;
2981 for(list=0; list<2; list++){
2982 for(i=0; i<h->ref_count[list]; i++){
2983 int luma_weight_flag, chroma_weight_flag;
2985 luma_weight_flag= get_bits1(&s->gb);
2986 if(luma_weight_flag){
2987 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2988 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2989 if( h->luma_weight[list][i] != luma_def
2990 || h->luma_offset[list][i] != 0)
2993 h->luma_weight[list][i]= luma_def;
2994 h->luma_offset[list][i]= 0;
2998 chroma_weight_flag= get_bits1(&s->gb);
2999 if(chroma_weight_flag){
3002 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3003 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3004 if( h->chroma_weight[list][i][j] != chroma_def
3005 || h->chroma_offset[list][i][j] != 0)
3006 h->use_weight_chroma= 1;
3011 h->chroma_weight[list][i][j]= chroma_def;
3012 h->chroma_offset[list][i][j]= 0;
3017 if(h->slice_type_nos != FF_B_TYPE) break;
3019 h->use_weight= h->use_weight || h->use_weight_chroma;
3023 static void implicit_weight_table(H264Context *h){
3024 MpegEncContext * const s = &h->s;
3026 int cur_poc = s->current_picture_ptr->poc;
3028 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3029 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3031 h->use_weight_chroma= 0;
3036 h->use_weight_chroma= 2;
3037 h->luma_log2_weight_denom= 5;
3038 h->chroma_log2_weight_denom= 5;
3040 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3041 int poc0 = h->ref_list[0][ref0].poc;
3042 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3043 int poc1 = h->ref_list[1][ref1].poc;
3044 int td = av_clip(poc1 - poc0, -128, 127);
3046 int tb = av_clip(cur_poc - poc0, -128, 127);
3047 int tx = (16384 + (FFABS(td) >> 1)) / td;
3048 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3049 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3050 h->implicit_weight[ref0][ref1] = 32;
3052 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3054 h->implicit_weight[ref0][ref1] = 32;
3060 * Mark a picture as no longer needed for reference. The refmask
3061 * argument allows unreferencing of individual fields or the whole frame.
3062 * If the picture becomes entirely unreferenced, but is being held for
3063 * display purposes, it is marked as such.
3064 * @param refmask mask of fields to unreference; the mask is bitwise
3065 * anded with the reference marking of pic
3066 * @return non-zero if pic becomes entirely unreferenced (except possibly
3067 * for display purposes) zero if one of the fields remains in
3070 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3072 if (pic->reference &= refmask) {
3075 for(i = 0; h->delayed_pic[i]; i++)
3076 if(pic == h->delayed_pic[i]){
3077 pic->reference=DELAYED_PIC_REF;
3085 * instantaneous decoder refresh.
3087 static void idr(H264Context *h){
3090 for(i=0; i<16; i++){
3091 remove_long(h, i, 0);
3093 assert(h->long_ref_count==0);
3095 for(i=0; i<h->short_ref_count; i++){
3096 unreference_pic(h, h->short_ref[i], 0);
3097 h->short_ref[i]= NULL;
3099 h->short_ref_count=0;
3100 h->prev_frame_num= 0;
3101 h->prev_frame_num_offset= 0;
3106 /* forget old pics after a seek */
3107 static void flush_dpb(AVCodecContext *avctx){
3108 H264Context *h= avctx->priv_data;
3110 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3111 if(h->delayed_pic[i])
3112 h->delayed_pic[i]->reference= 0;
3113 h->delayed_pic[i]= NULL;
3115 h->outputed_poc= INT_MIN;
3117 if(h->s.current_picture_ptr)
3118 h->s.current_picture_ptr->reference= 0;
3119 h->s.first_field= 0;
3120 ff_mpeg_flush(avctx);
3124 * Find a Picture in the short term reference list by frame number.
3125 * @param frame_num frame number to search for
3126 * @param idx the index into h->short_ref where returned picture is found
3127 * undefined if no picture found.
3128 * @return pointer to the found picture, or NULL if no pic with the provided
3129 * frame number is found
3131 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3132 MpegEncContext * const s = &h->s;
3135 for(i=0; i<h->short_ref_count; i++){
3136 Picture *pic= h->short_ref[i];
3137 if(s->avctx->debug&FF_DEBUG_MMCO)
3138 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3139 if(pic->frame_num == frame_num) {
3148 * Remove a picture from the short term reference list by its index in
3149 * that list. This does no checking on the provided index; it is assumed
3150 * to be valid. Other list entries are shifted down.
3151 * @param i index into h->short_ref of picture to remove.
3153 static void remove_short_at_index(H264Context *h, int i){
3154 assert(i >= 0 && i < h->short_ref_count);
3155 h->short_ref[i]= NULL;
3156 if (--h->short_ref_count)
3157 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3162 * @return the removed picture or NULL if an error occurs
3164 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3165 MpegEncContext * const s = &h->s;
3169 if(s->avctx->debug&FF_DEBUG_MMCO)
3170 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3172 pic = find_short(h, frame_num, &i);
3174 if(unreference_pic(h, pic, ref_mask))
3175 remove_short_at_index(h, i);
3182 * Remove a picture from the long term reference list by its index in
3184 * @return the removed picture or NULL if an error occurs
3186 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3189 pic= h->long_ref[i];
3191 if(unreference_pic(h, pic, ref_mask)){
3192 assert(h->long_ref[i]->long_ref == 1);
3193 h->long_ref[i]->long_ref= 0;
3194 h->long_ref[i]= NULL;
3195 h->long_ref_count--;
3203 * print short term list
3205 static void print_short_term(H264Context *h) {
3207 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3208 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3209 for(i=0; i<h->short_ref_count; i++){
3210 Picture *pic= h->short_ref[i];
3211 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3217 * print long term list
3219 static void print_long_term(H264Context *h) {
3221 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3222 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3223 for(i = 0; i < 16; i++){
3224 Picture *pic= h->long_ref[i];
3226 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3233 * Executes the reference picture marking (memory management control operations).
3235 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3236 MpegEncContext * const s = &h->s;
3238 int current_ref_assigned=0;
3241 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3242 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3244 for(i=0; i<mmco_count; i++){
3245 int structure, frame_num;
3246 if(s->avctx->debug&FF_DEBUG_MMCO)
3247 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3249 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3250 || mmco[i].opcode == MMCO_SHORT2LONG){
3251 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3252 pic = find_short(h, frame_num, &j);
3254 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3255 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3256 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3261 switch(mmco[i].opcode){
3262 case MMCO_SHORT2UNUSED:
3263 if(s->avctx->debug&FF_DEBUG_MMCO)
3264 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3265 remove_short(h, frame_num, structure ^ PICT_FRAME);
3267 case MMCO_SHORT2LONG:
3268 if (h->long_ref[mmco[i].long_arg] != pic)
3269 remove_long(h, mmco[i].long_arg, 0);
3271 remove_short_at_index(h, j);
3272 h->long_ref[ mmco[i].long_arg ]= pic;
3273 if (h->long_ref[ mmco[i].long_arg ]){
3274 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3275 h->long_ref_count++;
3278 case MMCO_LONG2UNUSED:
3279 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3280 pic = h->long_ref[j];
3282 remove_long(h, j, structure ^ PICT_FRAME);
3283 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3284 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3287 // Comment below left from previous code as it is an interresting note.
3288 /* First field in pair is in short term list or
3289 * at a different long term index.
3290 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3291 * Report the problem and keep the pair where it is,
3292 * and mark this field valid.
3295 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3296 remove_long(h, mmco[i].long_arg, 0);
3298 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3299 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3300 h->long_ref_count++;
3303 s->current_picture_ptr->reference |= s->picture_structure;
3304 current_ref_assigned=1;
3306 case MMCO_SET_MAX_LONG:
3307 assert(mmco[i].long_arg <= 16);
3308 // just remove the long term which index is greater than new max
3309 for(j = mmco[i].long_arg; j<16; j++){
3310 remove_long(h, j, 0);
3314 while(h->short_ref_count){
3315 remove_short(h, h->short_ref[0]->frame_num, 0);
3317 for(j = 0; j < 16; j++) {
3318 remove_long(h, j, 0);
3320 s->current_picture_ptr->poc=
3321 s->current_picture_ptr->field_poc[0]=
3322 s->current_picture_ptr->field_poc[1]=
3326 s->current_picture_ptr->frame_num= 0;
3332 if (!current_ref_assigned) {
3333 /* Second field of complementary field pair; the first field of
3334 * which is already referenced. If short referenced, it
3335 * should be first entry in short_ref. If not, it must exist
3336 * in long_ref; trying to put it on the short list here is an
3337 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3339 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3340 /* Just mark the second field valid */
3341 s->current_picture_ptr->reference = PICT_FRAME;
3342 } else if (s->current_picture_ptr->long_ref) {
3343 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3344 "assignment for second field "
3345 "in complementary field pair "
3346 "(first field is long term)\n");
3348 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3350 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3353 if(h->short_ref_count)
3354 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3356 h->short_ref[0]= s->current_picture_ptr;
3357 h->short_ref_count++;
3358 s->current_picture_ptr->reference |= s->picture_structure;
3362 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3364 /* We have too many reference frames, probably due to corrupted
3365 * stream. Need to discard one frame. Prevents overrun of the
3366 * short_ref and long_ref buffers.
3368 av_log(h->s.avctx, AV_LOG_ERROR,
3369 "number of reference frames exceeds max (probably "
3370 "corrupt input), discarding one\n");
3372 if (h->long_ref_count && !h->short_ref_count) {
3373 for (i = 0; i < 16; ++i)
3378 remove_long(h, i, 0);
3380 pic = h->short_ref[h->short_ref_count - 1];
3381 remove_short(h, pic->frame_num, 0);
3385 print_short_term(h);
3390 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3391 MpegEncContext * const s = &h->s;
3395 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3396 s->broken_link= get_bits1(gb) -1;
3398 h->mmco[0].opcode= MMCO_LONG;
3399 h->mmco[0].long_arg= 0;
3403 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3404 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3405 MMCOOpcode opcode= get_ue_golomb(gb);
3407 h->mmco[i].opcode= opcode;
3408 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3409 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3410 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3411 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3415 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3416 unsigned int long_arg= get_ue_golomb(gb);
3417 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3418 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3421 h->mmco[i].long_arg= long_arg;
3424 if(opcode > (unsigned)MMCO_LONG){
3425 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3428 if(opcode == MMCO_END)
3433 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3435 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3436 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3437 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3438 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3440 if (FIELD_PICTURE) {
3441 h->mmco[0].short_pic_num *= 2;
3442 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3443 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3453 static int init_poc(H264Context *h){
3454 MpegEncContext * const s = &h->s;
3455 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3457 Picture *cur = s->current_picture_ptr;
3459 h->frame_num_offset= h->prev_frame_num_offset;
3460 if(h->frame_num < h->prev_frame_num)
3461 h->frame_num_offset += max_frame_num;
3463 if(h->sps.poc_type==0){
3464 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3466 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3467 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3468 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3469 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3471 h->poc_msb = h->prev_poc_msb;
3472 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3474 field_poc[1] = h->poc_msb + h->poc_lsb;
3475 if(s->picture_structure == PICT_FRAME)
3476 field_poc[1] += h->delta_poc_bottom;
3477 }else if(h->sps.poc_type==1){
3478 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3481 if(h->sps.poc_cycle_length != 0)
3482 abs_frame_num = h->frame_num_offset + h->frame_num;
3486 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3489 expected_delta_per_poc_cycle = 0;
3490 for(i=0; i < h->sps.poc_cycle_length; i++)
3491 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3493 if(abs_frame_num > 0){
3494 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3495 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3497 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3498 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3499 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3503 if(h->nal_ref_idc == 0)
3504 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3506 field_poc[0] = expectedpoc + h->delta_poc[0];
3507 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3509 if(s->picture_structure == PICT_FRAME)
3510 field_poc[1] += h->delta_poc[1];
3512 int poc= 2*(h->frame_num_offset + h->frame_num);
3521 if(s->picture_structure != PICT_BOTTOM_FIELD)
3522 s->current_picture_ptr->field_poc[0]= field_poc[0];
3523 if(s->picture_structure != PICT_TOP_FIELD)
3524 s->current_picture_ptr->field_poc[1]= field_poc[1];
3525 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3532 * initialize scan tables
3534 static void init_scan_tables(H264Context *h){
3535 MpegEncContext * const s = &h->s;
3537 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3538 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3539 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3541 for(i=0; i<16; i++){
3542 #define T(x) (x>>2) | ((x<<2) & 0xF)
3543 h->zigzag_scan[i] = T(zigzag_scan[i]);
3544 h-> field_scan[i] = T( field_scan[i]);
3548 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3549 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3550 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3551 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3552 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3554 for(i=0; i<64; i++){
3555 #define T(x) (x>>3) | ((x&7)<<3)
3556 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3557 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3558 h->field_scan8x8[i] = T(field_scan8x8[i]);
3559 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3563 if(h->sps.transform_bypass){ //FIXME same ugly
3564 h->zigzag_scan_q0 = zigzag_scan;
3565 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3566 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3567 h->field_scan_q0 = field_scan;
3568 h->field_scan8x8_q0 = field_scan8x8;
3569 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3571 h->zigzag_scan_q0 = h->zigzag_scan;
3572 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3573 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3574 h->field_scan_q0 = h->field_scan;
3575 h->field_scan8x8_q0 = h->field_scan8x8;
3576 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3581 * Replicates H264 "master" context to thread contexts.
3583 static void clone_slice(H264Context *dst, H264Context *src)
3585 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3586 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3587 dst->s.current_picture = src->s.current_picture;
3588 dst->s.linesize = src->s.linesize;
3589 dst->s.uvlinesize = src->s.uvlinesize;
3590 dst->s.first_field = src->s.first_field;
3592 dst->prev_poc_msb = src->prev_poc_msb;
3593 dst->prev_poc_lsb = src->prev_poc_lsb;
3594 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3595 dst->prev_frame_num = src->prev_frame_num;
3596 dst->short_ref_count = src->short_ref_count;
3598 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3599 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3600 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3601 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3603 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3604 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3608 * decodes a slice header.
3609 * This will also call MPV_common_init() and frame_start() as needed.
3611 * @param h h264context
3612 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3614 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3616 static int decode_slice_header(H264Context *h, H264Context *h0){
3617 MpegEncContext * const s = &h->s;
3618 MpegEncContext * const s0 = &h0->s;
3619 unsigned int first_mb_in_slice;
3620 unsigned int pps_id;
3621 int num_ref_idx_active_override_flag;
3622 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3623 unsigned int slice_type, tmp, i, j;
3624 int default_ref_list_done = 0;
3625 int last_pic_structure;
3627 s->dropable= h->nal_ref_idc == 0;
3629 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3630 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3631 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3633 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3634 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3637 first_mb_in_slice= get_ue_golomb(&s->gb);
3639 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3640 h0->current_slice = 0;
3641 if (!s0->first_field)
3642 s->current_picture_ptr= NULL;
3645 slice_type= get_ue_golomb(&s->gb);
3647 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3652 h->slice_type_fixed=1;
3654 h->slice_type_fixed=0;
3656 slice_type= slice_type_map[ slice_type ];
3657 if (slice_type == FF_I_TYPE
3658 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3659 default_ref_list_done = 1;
3661 h->slice_type= slice_type;
3662 h->slice_type_nos= slice_type & 3;
3664 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3665 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3666 av_log(h->s.avctx, AV_LOG_ERROR,
3667 "B picture before any references, skipping\n");
3671 pps_id= get_ue_golomb(&s->gb);
3672 if(pps_id>=MAX_PPS_COUNT){
3673 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3676 if(!h0->pps_buffers[pps_id]) {
3677 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3680 h->pps= *h0->pps_buffers[pps_id];
3682 if(!h0->sps_buffers[h->pps.sps_id]) {
3683 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3686 h->sps = *h0->sps_buffers[h->pps.sps_id];
3688 if(h == h0 && h->dequant_coeff_pps != pps_id){
3689 h->dequant_coeff_pps = pps_id;
3690 init_dequant_tables(h);
3693 s->mb_width= h->sps.mb_width;
3694 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3696 h->b_stride= s->mb_width*4;
3697 h->b8_stride= s->mb_width*2;
3699 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3700 if(h->sps.frame_mbs_only_flag)
3701 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3703 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3705 if (s->context_initialized
3706 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3708 return -1; // width / height changed during parallelized decoding
3712 if (!s->context_initialized) {
3714 return -1; // we cant (re-)initialize context during parallel decoding
3715 if (MPV_common_init(s) < 0)
3719 init_scan_tables(h);
3722 for(i = 1; i < s->avctx->thread_count; i++) {
3724 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3725 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3726 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3729 init_scan_tables(c);
3733 for(i = 0; i < s->avctx->thread_count; i++)
3734 if(context_init(h->thread_context[i]) < 0)
3737 s->avctx->width = s->width;
3738 s->avctx->height = s->height;
3739 s->avctx->sample_aspect_ratio= h->sps.sar;
3740 if(!s->avctx->sample_aspect_ratio.den)
3741 s->avctx->sample_aspect_ratio.den = 1;
3743 if(h->sps.timing_info_present_flag){
3744 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3745 if(h->x264_build > 0 && h->x264_build < 44)
3746 s->avctx->time_base.den *= 2;
3747 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3748 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3752 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3755 h->mb_aff_frame = 0;
3756 last_pic_structure = s0->picture_structure;
3757 if(h->sps.frame_mbs_only_flag){
3758 s->picture_structure= PICT_FRAME;
3760 if(get_bits1(&s->gb)) { //field_pic_flag
3761 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3763 s->picture_structure= PICT_FRAME;
3764 h->mb_aff_frame = h->sps.mb_aff;
3767 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3769 if(h0->current_slice == 0){
3770 while(h->frame_num != h->prev_frame_num &&
3771 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3772 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3774 h->prev_frame_num++;
3775 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3776 s->current_picture_ptr->frame_num= h->prev_frame_num;
3777 execute_ref_pic_marking(h, NULL, 0);
3780 /* See if we have a decoded first field looking for a pair... */
3781 if (s0->first_field) {
3782 assert(s0->current_picture_ptr);
3783 assert(s0->current_picture_ptr->data[0]);
3784 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3786 /* figure out if we have a complementary field pair */
3787 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3789 * Previous field is unmatched. Don't display it, but let it
3790 * remain for reference if marked as such.
3792 s0->current_picture_ptr = NULL;
3793 s0->first_field = FIELD_PICTURE;
3796 if (h->nal_ref_idc &&
3797 s0->current_picture_ptr->reference &&
3798 s0->current_picture_ptr->frame_num != h->frame_num) {
3800 * This and previous field were reference, but had
3801 * different frame_nums. Consider this field first in
3802 * pair. Throw away previous field except for reference
3805 s0->first_field = 1;
3806 s0->current_picture_ptr = NULL;
3809 /* Second field in complementary pair */
3810 s0->first_field = 0;
3815 /* Frame or first field in a potentially complementary pair */
3816 assert(!s0->current_picture_ptr);
3817 s0->first_field = FIELD_PICTURE;
3820 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3821 s0->first_field = 0;
3828 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3830 assert(s->mb_num == s->mb_width * s->mb_height);
3831 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3832 first_mb_in_slice >= s->mb_num){
3833 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3836 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3837 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3838 if (s->picture_structure == PICT_BOTTOM_FIELD)
3839 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3840 assert(s->mb_y < s->mb_height);
3842 if(s->picture_structure==PICT_FRAME){
3843 h->curr_pic_num= h->frame_num;
3844 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3846 h->curr_pic_num= 2*h->frame_num + 1;
3847 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3850 if(h->nal_unit_type == NAL_IDR_SLICE){
3851 get_ue_golomb(&s->gb); /* idr_pic_id */
3854 if(h->sps.poc_type==0){
3855 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3857 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3858 h->delta_poc_bottom= get_se_golomb(&s->gb);
3862 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3863 h->delta_poc[0]= get_se_golomb(&s->gb);
3865 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3866 h->delta_poc[1]= get_se_golomb(&s->gb);
3871 if(h->pps.redundant_pic_cnt_present){
3872 h->redundant_pic_count= get_ue_golomb(&s->gb);
3875 //set defaults, might be overridden a few lines later
3876 h->ref_count[0]= h->pps.ref_count[0];
3877 h->ref_count[1]= h->pps.ref_count[1];
3879 if(h->slice_type_nos != FF_I_TYPE){
3880 if(h->slice_type_nos == FF_B_TYPE){
3881 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3883 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3885 if(num_ref_idx_active_override_flag){
3886 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3887 if(h->slice_type_nos==FF_B_TYPE)
3888 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3890 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3891 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3892 h->ref_count[0]= h->ref_count[1]= 1;
3896 if(h->slice_type_nos == FF_B_TYPE)
3903 if(!default_ref_list_done){
3904 fill_default_ref_list(h);
3907 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3910 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3911 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3912 pred_weight_table(h);
3913 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3914 implicit_weight_table(h);
3919 decode_ref_pic_marking(h0, &s->gb);
3922 fill_mbaff_ref_list(h);
3924 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3925 direct_dist_scale_factor(h);
3926 direct_ref_list_init(h);
3928 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3929 tmp = get_ue_golomb(&s->gb);
3931 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3934 h->cabac_init_idc= tmp;
3937 h->last_qscale_diff = 0;
3938 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3940 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3944 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3945 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3946 //FIXME qscale / qp ... stuff
3947 if(h->slice_type == FF_SP_TYPE){
3948 get_bits1(&s->gb); /* sp_for_switch_flag */
3950 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3951 get_se_golomb(&s->gb); /* slice_qs_delta */
3954 h->deblocking_filter = 1;
3955 h->slice_alpha_c0_offset = 0;
3956 h->slice_beta_offset = 0;
3957 if( h->pps.deblocking_filter_parameters_present ) {
3958 tmp= get_ue_golomb(&s->gb);
3960 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3963 h->deblocking_filter= tmp;
3964 if(h->deblocking_filter < 2)
3965 h->deblocking_filter^= 1; // 1<->0
3967 if( h->deblocking_filter ) {
3968 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3969 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3973 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3974 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3975 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3976 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3977 h->deblocking_filter= 0;
3979 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3980 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3981 /* Cheat slightly for speed:
3982 Do not bother to deblock across slices. */
3983 h->deblocking_filter = 2;
3985 h0->max_contexts = 1;
3986 if(!h0->single_decode_warning) {
3987 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3988 h0->single_decode_warning = 1;
3991 return 1; // deblocking switched inside frame
3996 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3997 slice_group_change_cycle= get_bits(&s->gb, ?);
4000 h0->last_slice_type = slice_type;
4001 h->slice_num = ++h0->current_slice;
4004 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4008 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4009 +(h->ref_list[j][i].reference&3);
4012 for(i=16; i<48; i++)
4013 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4014 +(h->ref_list[j][i].reference&3);
4017 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4018 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4020 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4021 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4023 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4025 av_get_pict_type_char(h->slice_type),
4026 pps_id, h->frame_num,
4027 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4028 h->ref_count[0], h->ref_count[1],
4030 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4032 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4033 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4043 static inline int get_level_prefix(GetBitContext *gb){
4047 OPEN_READER(re, gb);
4048 UPDATE_CACHE(re, gb);
4049 buf=GET_CACHE(re, gb);
4051 log= 32 - av_log2(buf);
4053 print_bin(buf>>(32-log), log);
4054 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4057 LAST_SKIP_BITS(re, gb, log);
4058 CLOSE_READER(re, gb);
4063 static inline int get_dct8x8_allowed(H264Context *h){
4066 if(!IS_SUB_8X8(h->sub_mb_type[i])
4067 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4074 * decodes a residual block.
4075 * @param n block index
4076 * @param scantable scantable
4077 * @param max_coeff number of coefficients in the block
4078 * @return <0 if an error occurred
4080 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4081 MpegEncContext * const s = &h->s;
4082 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4084 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4086 //FIXME put trailing_onex into the context
4088 if(n == CHROMA_DC_BLOCK_INDEX){
4089 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4090 total_coeff= coeff_token>>2;
4092 if(n == LUMA_DC_BLOCK_INDEX){
4093 total_coeff= pred_non_zero_count(h, 0);
4094 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4095 total_coeff= coeff_token>>2;
4097 total_coeff= pred_non_zero_count(h, n);
4098 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4099 total_coeff= coeff_token>>2;
4100 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4104 //FIXME set last_non_zero?
4108 if(total_coeff > (unsigned)max_coeff) {
4109 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4113 trailing_ones= coeff_token&3;
4114 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4115 assert(total_coeff<=16);
4117 for(i=0; i<trailing_ones; i++){
4118 level[i]= 1 - 2*get_bits1(gb);
4122 int level_code, mask;
4123 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4124 int prefix= get_level_prefix(gb);
4126 //first coefficient has suffix_length equal to 0 or 1
4127 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4129 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4131 level_code= (prefix<<suffix_length); //part
4132 }else if(prefix==14){
4134 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4136 level_code= prefix + get_bits(gb, 4); //part
4138 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4139 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4141 level_code += (1<<(prefix-3))-4096;
4144 if(trailing_ones < 3) level_code += 2;
4149 mask= -(level_code&1);
4150 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4153 //remaining coefficients have suffix_length > 0
4154 for(;i<total_coeff;i++) {
4155 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4156 prefix = get_level_prefix(gb);
4158 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4160 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4162 level_code += (1<<(prefix-3))-4096;
4164 mask= -(level_code&1);
4165 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4166 if(level_code > suffix_limit[suffix_length])
4171 if(total_coeff == max_coeff)
4174 if(n == CHROMA_DC_BLOCK_INDEX)
4175 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4177 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4180 coeff_num = zeros_left + total_coeff - 1;
4181 j = scantable[coeff_num];
4183 block[j] = level[0];
4184 for(i=1;i<total_coeff;i++) {
4187 else if(zeros_left < 7){
4188 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4190 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4192 zeros_left -= run_before;
4193 coeff_num -= 1 + run_before;
4194 j= scantable[ coeff_num ];
4199 block[j] = (level[0] * qmul[j] + 32)>>6;
4200 for(i=1;i<total_coeff;i++) {
4203 else if(zeros_left < 7){
4204 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4206 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4208 zeros_left -= run_before;
4209 coeff_num -= 1 + run_before;
4210 j= scantable[ coeff_num ];
4212 block[j]= (level[i] * qmul[j] + 32)>>6;
4217 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4224 static void predict_field_decoding_flag(H264Context *h){
4225 MpegEncContext * const s = &h->s;
4226 const int mb_xy= h->mb_xy;
4227 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4228 ? s->current_picture.mb_type[mb_xy-1]
4229 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4230 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4232 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4236 * decodes a P_SKIP or B_SKIP macroblock
4238 static void decode_mb_skip(H264Context *h){
4239 MpegEncContext * const s = &h->s;
4240 const int mb_xy= h->mb_xy;
4243 memset(h->non_zero_count[mb_xy], 0, 16);
4244 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4247 mb_type|= MB_TYPE_INTERLACED;
4249 if( h->slice_type_nos == FF_B_TYPE )
4251 // just for fill_caches. pred_direct_motion will set the real mb_type
4252 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4254 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4255 pred_direct_motion(h, &mb_type);
4256 mb_type|= MB_TYPE_SKIP;
4261 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4263 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4264 pred_pskip_motion(h, &mx, &my);
4265 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4266 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4269 write_back_motion(h, mb_type);
4270 s->current_picture.mb_type[mb_xy]= mb_type;
4271 s->current_picture.qscale_table[mb_xy]= s->qscale;
4272 h->slice_table[ mb_xy ]= h->slice_num;
4273 h->prev_mb_skipped= 1;
4277 * decodes a macroblock
4278 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4280 static int decode_mb_cavlc(H264Context *h){
4281 MpegEncContext * const s = &h->s;
4283 int partition_count;
4284 unsigned int mb_type, cbp;
4285 int dct8x8_allowed= h->pps.transform_8x8_mode;
4287 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4289 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4291 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4292 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4294 if(h->slice_type_nos != FF_I_TYPE){
4295 if(s->mb_skip_run==-1)
4296 s->mb_skip_run= get_ue_golomb(&s->gb);
4298 if (s->mb_skip_run--) {
4299 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4300 if(s->mb_skip_run==0)
4301 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4303 predict_field_decoding_flag(h);
4310 if( (s->mb_y&1) == 0 )
4311 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4314 h->prev_mb_skipped= 0;
4316 mb_type= get_ue_golomb(&s->gb);
4317 if(h->slice_type_nos == FF_B_TYPE){
4319 partition_count= b_mb_type_info[mb_type].partition_count;
4320 mb_type= b_mb_type_info[mb_type].type;
4323 goto decode_intra_mb;
4325 }else if(h->slice_type_nos == FF_P_TYPE){
4327 partition_count= p_mb_type_info[mb_type].partition_count;
4328 mb_type= p_mb_type_info[mb_type].type;
4331 goto decode_intra_mb;
4334 assert(h->slice_type_nos == FF_I_TYPE);
4335 if(h->slice_type == FF_SI_TYPE && mb_type)
4339 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4343 cbp= i_mb_type_info[mb_type].cbp;
4344 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4345 mb_type= i_mb_type_info[mb_type].type;
4349 mb_type |= MB_TYPE_INTERLACED;
4351 h->slice_table[ mb_xy ]= h->slice_num;
4353 if(IS_INTRA_PCM(mb_type)){
4356 // We assume these blocks are very rare so we do not optimize it.
4357 align_get_bits(&s->gb);
4359 // The pixels are stored in the same order as levels in h->mb array.
4360 for(x=0; x < (CHROMA ? 384 : 256); x++){
4361 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4364 // In deblocking, the quantizer is 0
4365 s->current_picture.qscale_table[mb_xy]= 0;
4366 // All coeffs are present
4367 memset(h->non_zero_count[mb_xy], 16, 16);
4369 s->current_picture.mb_type[mb_xy]= mb_type;
4374 h->ref_count[0] <<= 1;
4375 h->ref_count[1] <<= 1;
4378 fill_caches(h, mb_type, 0);
4381 if(IS_INTRA(mb_type)){
4383 // init_top_left_availability(h);
4384 if(IS_INTRA4x4(mb_type)){
4387 if(dct8x8_allowed && get_bits1(&s->gb)){
4388 mb_type |= MB_TYPE_8x8DCT;
4392 // fill_intra4x4_pred_table(h);
4393 for(i=0; i<16; i+=di){
4394 int mode= pred_intra_mode(h, i);
4396 if(!get_bits1(&s->gb)){
4397 const int rem_mode= get_bits(&s->gb, 3);
4398 mode = rem_mode + (rem_mode >= mode);
4402 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4404 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4406 write_back_intra_pred_mode(h);
4407 if( check_intra4x4_pred_mode(h) < 0)
4410 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4411 if(h->intra16x16_pred_mode < 0)
4415 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4418 h->chroma_pred_mode= pred_mode;
4420 }else if(partition_count==4){
4421 int i, j, sub_partition_count[4], list, ref[2][4];
4423 if(h->slice_type_nos == FF_B_TYPE){
4425 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4426 if(h->sub_mb_type[i] >=13){
4427 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4430 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4431 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4433 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4434 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4435 pred_direct_motion(h, &mb_type);
4436 h->ref_cache[0][scan8[4]] =
4437 h->ref_cache[1][scan8[4]] =
4438 h->ref_cache[0][scan8[12]] =
4439 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4442 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4444 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4445 if(h->sub_mb_type[i] >=4){
4446 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4449 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4450 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4454 for(list=0; list<h->list_count; list++){
4455 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4457 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4458 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4459 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4461 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4473 dct8x8_allowed = get_dct8x8_allowed(h);
4475 for(list=0; list<h->list_count; list++){
4477 if(IS_DIRECT(h->sub_mb_type[i])) {
4478 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4481 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4482 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4484 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4485 const int sub_mb_type= h->sub_mb_type[i];
4486 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4487 for(j=0; j<sub_partition_count[i]; j++){
4489 const int index= 4*i + block_width*j;
4490 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4491 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4492 mx += get_se_golomb(&s->gb);
4493 my += get_se_golomb(&s->gb);
4494 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4496 if(IS_SUB_8X8(sub_mb_type)){
4498 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4500 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4501 }else if(IS_SUB_8X4(sub_mb_type)){
4502 mv_cache[ 1 ][0]= mx;
4503 mv_cache[ 1 ][1]= my;
4504 }else if(IS_SUB_4X8(sub_mb_type)){
4505 mv_cache[ 8 ][0]= mx;
4506 mv_cache[ 8 ][1]= my;
4508 mv_cache[ 0 ][0]= mx;
4509 mv_cache[ 0 ][1]= my;
4512 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4518 }else if(IS_DIRECT(mb_type)){
4519 pred_direct_motion(h, &mb_type);
4520 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4522 int list, mx, my, i;
4523 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4524 if(IS_16X16(mb_type)){
4525 for(list=0; list<h->list_count; list++){
4527 if(IS_DIR(mb_type, 0, list)){
4528 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4529 if(val >= h->ref_count[list]){
4530 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4534 val= LIST_NOT_USED&0xFF;
4535 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4537 for(list=0; list<h->list_count; list++){
4539 if(IS_DIR(mb_type, 0, list)){
4540 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4541 mx += get_se_golomb(&s->gb);
4542 my += get_se_golomb(&s->gb);
4543 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4545 val= pack16to32(mx,my);
4548 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4551 else if(IS_16X8(mb_type)){
4552 for(list=0; list<h->list_count; list++){
4555 if(IS_DIR(mb_type, i, list)){
4556 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4557 if(val >= h->ref_count[list]){
4558 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4562 val= LIST_NOT_USED&0xFF;
4563 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4566 for(list=0; list<h->list_count; list++){
4569 if(IS_DIR(mb_type, i, list)){
4570 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4571 mx += get_se_golomb(&s->gb);
4572 my += get_se_golomb(&s->gb);
4573 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4575 val= pack16to32(mx,my);
4578 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4582 assert(IS_8X16(mb_type));
4583 for(list=0; list<h->list_count; list++){
4586 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4587 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4588 if(val >= h->ref_count[list]){
4589 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4593 val= LIST_NOT_USED&0xFF;
4594 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4597 for(list=0; list<h->list_count; list++){
4600 if(IS_DIR(mb_type, i, list)){
4601 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4602 mx += get_se_golomb(&s->gb);
4603 my += get_se_golomb(&s->gb);
4604 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4606 val= pack16to32(mx,my);
4609 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4615 if(IS_INTER(mb_type))
4616 write_back_motion(h, mb_type);
4618 if(!IS_INTRA16x16(mb_type)){
4619 cbp= get_ue_golomb(&s->gb);
4621 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4626 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4627 else cbp= golomb_to_inter_cbp [cbp];
4629 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4630 else cbp= golomb_to_inter_cbp_gray[cbp];
4635 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4636 if(get_bits1(&s->gb)){
4637 mb_type |= MB_TYPE_8x8DCT;
4638 h->cbp_table[mb_xy]= cbp;
4641 s->current_picture.mb_type[mb_xy]= mb_type;
4643 if(cbp || IS_INTRA16x16(mb_type)){
4644 int i8x8, i4x4, chroma_idx;
4646 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4647 const uint8_t *scan, *scan8x8, *dc_scan;
4649 // fill_non_zero_count_cache(h);
4651 if(IS_INTERLACED(mb_type)){
4652 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4653 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4654 dc_scan= luma_dc_field_scan;
4656 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4657 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4658 dc_scan= luma_dc_zigzag_scan;
4661 dquant= get_se_golomb(&s->gb);
4663 if( dquant > 25 || dquant < -26 ){
4664 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4668 s->qscale += dquant;
4669 if(((unsigned)s->qscale) > 51){
4670 if(s->qscale<0) s->qscale+= 52;
4671 else s->qscale-= 52;
4674 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4675 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4676 if(IS_INTRA16x16(mb_type)){
4677 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4678 return -1; //FIXME continue if partitioned and other return -1 too
4681 assert((cbp&15) == 0 || (cbp&15) == 15);
4684 for(i8x8=0; i8x8<4; i8x8++){
4685 for(i4x4=0; i4x4<4; i4x4++){
4686 const int index= i4x4 + 4*i8x8;
4687 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4693 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4696 for(i8x8=0; i8x8<4; i8x8++){
4697 if(cbp & (1<<i8x8)){
4698 if(IS_8x8DCT(mb_type)){
4699 DCTELEM *buf = &h->mb[64*i8x8];
4701 for(i4x4=0; i4x4<4; i4x4++){
4702 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4703 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4706 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4707 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4709 for(i4x4=0; i4x4<4; i4x4++){
4710 const int index= i4x4 + 4*i8x8;
4712 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4718 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4719 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4725 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4726 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4732 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4733 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4734 for(i4x4=0; i4x4<4; i4x4++){
4735 const int index= 16 + 4*chroma_idx + i4x4;
4736 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4742 uint8_t * const nnz= &h->non_zero_count_cache[0];
4743 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4744 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4747 uint8_t * const nnz= &h->non_zero_count_cache[0];
4748 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4749 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4750 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4752 s->current_picture.qscale_table[mb_xy]= s->qscale;
4753 write_back_non_zero_count(h);
4756 h->ref_count[0] >>= 1;
4757 h->ref_count[1] >>= 1;
4763 static int decode_cabac_field_decoding_flag(H264Context *h) {
4764 MpegEncContext * const s = &h->s;
4765 const int mb_x = s->mb_x;
4766 const int mb_y = s->mb_y & ~1;
4767 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4768 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4770 unsigned int ctx = 0;
4772 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4775 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4779 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4782 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4783 uint8_t *state= &h->cabac_state[ctx_base];
4787 MpegEncContext * const s = &h->s;
4788 const int mba_xy = h->left_mb_xy[0];
4789 const int mbb_xy = h->top_mb_xy;
4791 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4793 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4795 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4796 return 0; /* I4x4 */
4799 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4800 return 0; /* I4x4 */
4803 if( get_cabac_terminate( &h->cabac ) )
4804 return 25; /* PCM */
4806 mb_type = 1; /* I16x16 */
4807 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4808 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4809 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4810 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4811 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4815 static int decode_cabac_mb_type( H264Context *h ) {
4816 MpegEncContext * const s = &h->s;
4818 if( h->slice_type_nos == FF_I_TYPE ) {
4819 return decode_cabac_intra_mb_type(h, 3, 1);
4820 } else if( h->slice_type_nos == FF_P_TYPE ) {
4821 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4823 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4824 /* P_L0_D16x16, P_8x8 */
4825 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4827 /* P_L0_D8x16, P_L0_D16x8 */
4828 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4831 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4833 } else if( h->slice_type_nos == FF_B_TYPE ) {
4834 const int mba_xy = h->left_mb_xy[0];
4835 const int mbb_xy = h->top_mb_xy;
4839 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4841 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4844 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4845 return 0; /* B_Direct_16x16 */
4847 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4848 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4851 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4852 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4853 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4854 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4856 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4857 else if( bits == 13 ) {
4858 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4859 } else if( bits == 14 )
4860 return 11; /* B_L1_L0_8x16 */
4861 else if( bits == 15 )
4862 return 22; /* B_8x8 */
4864 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4865 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4867 /* TODO SI/SP frames? */
4872 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4873 MpegEncContext * const s = &h->s;
4877 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4878 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4881 && h->slice_table[mba_xy] == h->slice_num
4882 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4883 mba_xy += s->mb_stride;
4885 mbb_xy = mb_xy - s->mb_stride;
4887 && h->slice_table[mbb_xy] == h->slice_num
4888 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4889 mbb_xy -= s->mb_stride;
4891 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4893 int mb_xy = h->mb_xy;
4895 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4898 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4900 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4903 if( h->slice_type_nos == FF_B_TYPE )
4905 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4908 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4911 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4914 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4915 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4916 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4918 if( mode >= pred_mode )
4924 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4925 const int mba_xy = h->left_mb_xy[0];
4926 const int mbb_xy = h->top_mb_xy;
4930 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4931 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4934 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4937 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4940 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4942 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4948 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4949 int cbp_b, cbp_a, ctx, cbp = 0;
4951 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4952 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4954 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4955 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4956 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4957 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4958 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4959 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4960 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4961 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4964 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4968 cbp_a = (h->left_cbp>>4)&0x03;
4969 cbp_b = (h-> top_cbp>>4)&0x03;
4972 if( cbp_a > 0 ) ctx++;
4973 if( cbp_b > 0 ) ctx += 2;
4974 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4978 if( cbp_a == 2 ) ctx++;
4979 if( cbp_b == 2 ) ctx += 2;
4980 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4982 static int decode_cabac_mb_dqp( H264Context *h) {
4986 if( h->last_qscale_diff != 0 )
4989 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4995 if(val > 102) //prevent infinite loop
5002 return -(val + 1)/2;
5004 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5005 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5007 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5009 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5013 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5015 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5016 return 0; /* B_Direct_8x8 */
5017 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5018 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5020 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5021 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5022 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5025 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5026 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5030 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5031 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5034 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5035 int refa = h->ref_cache[list][scan8[n] - 1];
5036 int refb = h->ref_cache[list][scan8[n] - 8];
5040 if( h->slice_type_nos == FF_B_TYPE) {
5041 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5043 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5052 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5058 if(ref >= 32 /*h->ref_list[list]*/){
5059 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5060 return 0; //FIXME we should return -1 and check the return everywhere
5066 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5067 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5068 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5069 int ctxbase = (l == 0) ? 40 : 47;
5074 else if( amvd > 32 )
5079 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5084 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5092 while( get_cabac_bypass( &h->cabac ) ) {
5096 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5101 if( get_cabac_bypass( &h->cabac ) )
5105 return get_cabac_bypass_sign( &h->cabac, -mvd );
5108 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5114 nza = h->left_cbp&0x100;
5115 nzb = h-> top_cbp&0x100;
5117 nza = (h->left_cbp>>(6+idx))&0x01;
5118 nzb = (h-> top_cbp>>(6+idx))&0x01;
5122 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5123 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5125 assert(cat == 1 || cat == 2);
5126 nza = h->non_zero_count_cache[scan8[idx] - 1];
5127 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5137 return ctx + 4 * cat;
5140 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5141 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5142 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5143 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5144 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5147 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5148 static const int significant_coeff_flag_offset[2][6] = {
5149 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5150 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5152 static const int last_coeff_flag_offset[2][6] = {
5153 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5154 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5156 static const int coeff_abs_level_m1_offset[6] = {
5157 227+0, 227+10, 227+20, 227+30, 227+39, 426
5159 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5160 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5161 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5162 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5163 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5164 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5165 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5166 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5167 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5169 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5170 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5171 * map node ctx => cabac ctx for level=1 */
5172 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5173 /* map node ctx => cabac ctx for level>1 */
5174 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5175 static const uint8_t coeff_abs_level_transition[2][8] = {
5176 /* update node ctx after decoding a level=1 */
5177 { 1, 2, 3, 3, 4, 5, 6, 7 },
5178 /* update node ctx after decoding a level>1 */
5179 { 4, 4, 4, 4, 5, 6, 7, 7 }
5185 int coeff_count = 0;
5188 uint8_t *significant_coeff_ctx_base;
5189 uint8_t *last_coeff_ctx_base;
5190 uint8_t *abs_level_m1_ctx_base;
5193 #define CABAC_ON_STACK
5195 #ifdef CABAC_ON_STACK
5198 cc.range = h->cabac.range;
5199 cc.low = h->cabac.low;
5200 cc.bytestream= h->cabac.bytestream;
5202 #define CC &h->cabac
5206 /* cat: 0-> DC 16x16 n = 0
5207 * 1-> AC 16x16 n = luma4x4idx
5208 * 2-> Luma4x4 n = luma4x4idx
5209 * 3-> DC Chroma n = iCbCr
5210 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5211 * 5-> Luma8x8 n = 4 * luma8x8idx
5214 /* read coded block flag */
5215 if( is_dc || cat != 5 ) {
5216 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5219 h->non_zero_count_cache[scan8[16+n]] = 0;
5221 h->non_zero_count_cache[scan8[n]] = 0;
5224 #ifdef CABAC_ON_STACK
5225 h->cabac.range = cc.range ;
5226 h->cabac.low = cc.low ;
5227 h->cabac.bytestream= cc.bytestream;
5233 significant_coeff_ctx_base = h->cabac_state
5234 + significant_coeff_flag_offset[MB_FIELD][cat];
5235 last_coeff_ctx_base = h->cabac_state
5236 + last_coeff_flag_offset[MB_FIELD][cat];
5237 abs_level_m1_ctx_base = h->cabac_state
5238 + coeff_abs_level_m1_offset[cat];
5240 if( !is_dc && cat == 5 ) {
5241 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5242 for(last= 0; last < coefs; last++) { \
5243 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5244 if( get_cabac( CC, sig_ctx )) { \
5245 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5246 index[coeff_count++] = last; \
5247 if( get_cabac( CC, last_ctx ) ) { \
5253 if( last == max_coeff -1 ) {\
5254 index[coeff_count++] = last;\
5256 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5257 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5258 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5260 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5262 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5264 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5267 assert(coeff_count > 0);
5271 h->cbp_table[h->mb_xy] |= 0x100;
5273 h->cbp_table[h->mb_xy] |= 0x40 << n;
5276 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5278 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5280 assert( cat == 1 || cat == 2 );
5281 h->non_zero_count_cache[scan8[n]] = coeff_count;
5286 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5288 int j= scantable[index[--coeff_count]];
5290 if( get_cabac( CC, ctx ) == 0 ) {
5291 node_ctx = coeff_abs_level_transition[0][node_ctx];
5293 block[j] = get_cabac_bypass_sign( CC, -1);
5295 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5299 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5300 node_ctx = coeff_abs_level_transition[1][node_ctx];
5302 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5306 if( coeff_abs >= 15 ) {
5308 while( get_cabac_bypass( CC ) ) {
5314 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5320 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5322 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5325 } while( coeff_count );
5326 #ifdef CABAC_ON_STACK
5327 h->cabac.range = cc.range ;
5328 h->cabac.low = cc.low ;
5329 h->cabac.bytestream= cc.bytestream;
5334 #ifndef CONFIG_SMALL
5335 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5336 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5339 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5340 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5344 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5346 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5348 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5349 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5353 static inline void compute_mb_neighbors(H264Context *h)
5355 MpegEncContext * const s = &h->s;
5356 const int mb_xy = h->mb_xy;
5357 h->top_mb_xy = mb_xy - s->mb_stride;
5358 h->left_mb_xy[0] = mb_xy - 1;
5360 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5361 const int top_pair_xy = pair_xy - s->mb_stride;
5362 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5363 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5364 const int curr_mb_frame_flag = !MB_FIELD;
5365 const int bottom = (s->mb_y & 1);
5367 ? !curr_mb_frame_flag // bottom macroblock
5368 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5370 h->top_mb_xy -= s->mb_stride;
5372 if (left_mb_frame_flag != curr_mb_frame_flag) {
5373 h->left_mb_xy[0] = pair_xy - 1;
5375 } else if (FIELD_PICTURE) {
5376 h->top_mb_xy -= s->mb_stride;
5382 * decodes a macroblock
5383 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5385 static int decode_mb_cabac(H264Context *h) {
5386 MpegEncContext * const s = &h->s;
5388 int mb_type, partition_count, cbp = 0;
5389 int dct8x8_allowed= h->pps.transform_8x8_mode;
5391 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5393 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5395 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5396 if( h->slice_type_nos != FF_I_TYPE ) {
5398 /* a skipped mb needs the aff flag from the following mb */
5399 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5400 predict_field_decoding_flag(h);
5401 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5402 skip = h->next_mb_skipped;
5404 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5405 /* read skip flags */
5407 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5408 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5409 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5410 if(h->next_mb_skipped)
5411 predict_field_decoding_flag(h);
5413 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5418 h->cbp_table[mb_xy] = 0;
5419 h->chroma_pred_mode_table[mb_xy] = 0;
5420 h->last_qscale_diff = 0;
5427 if( (s->mb_y&1) == 0 )
5429 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5432 h->prev_mb_skipped = 0;
5434 compute_mb_neighbors(h);
5435 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5436 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5440 if( h->slice_type_nos == FF_B_TYPE ) {
5442 partition_count= b_mb_type_info[mb_type].partition_count;
5443 mb_type= b_mb_type_info[mb_type].type;
5446 goto decode_intra_mb;
5448 } else if( h->slice_type_nos == FF_P_TYPE ) {
5450 partition_count= p_mb_type_info[mb_type].partition_count;
5451 mb_type= p_mb_type_info[mb_type].type;
5454 goto decode_intra_mb;
5457 if(h->slice_type == FF_SI_TYPE && mb_type)
5459 assert(h->slice_type_nos == FF_I_TYPE);
5461 partition_count = 0;
5462 cbp= i_mb_type_info[mb_type].cbp;
5463 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5464 mb_type= i_mb_type_info[mb_type].type;
5467 mb_type |= MB_TYPE_INTERLACED;
5469 h->slice_table[ mb_xy ]= h->slice_num;
5471 if(IS_INTRA_PCM(mb_type)) {
5474 // We assume these blocks are very rare so we do not optimize it.
5475 // FIXME The two following lines get the bitstream position in the cabac
5476 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5477 ptr= h->cabac.bytestream;
5478 if(h->cabac.low&0x1) ptr--;
5480 if(h->cabac.low&0x1FF) ptr--;
5483 // The pixels are stored in the same order as levels in h->mb array.
5484 memcpy(h->mb, ptr, 256); ptr+=256;
5486 memcpy(h->mb+128, ptr, 128); ptr+=128;
5489 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5491 // All blocks are present
5492 h->cbp_table[mb_xy] = 0x1ef;
5493 h->chroma_pred_mode_table[mb_xy] = 0;
5494 // In deblocking, the quantizer is 0
5495 s->current_picture.qscale_table[mb_xy]= 0;
5496 // All coeffs are present
5497 memset(h->non_zero_count[mb_xy], 16, 16);
5498 s->current_picture.mb_type[mb_xy]= mb_type;
5499 h->last_qscale_diff = 0;
5504 h->ref_count[0] <<= 1;
5505 h->ref_count[1] <<= 1;
5508 fill_caches(h, mb_type, 0);
5510 if( IS_INTRA( mb_type ) ) {
5512 if( IS_INTRA4x4( mb_type ) ) {
5513 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5514 mb_type |= MB_TYPE_8x8DCT;
5515 for( i = 0; i < 16; i+=4 ) {
5516 int pred = pred_intra_mode( h, i );
5517 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5518 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5521 for( i = 0; i < 16; i++ ) {
5522 int pred = pred_intra_mode( h, i );
5523 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5525 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5528 write_back_intra_pred_mode(h);
5529 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5531 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5532 if( h->intra16x16_pred_mode < 0 ) return -1;
5535 h->chroma_pred_mode_table[mb_xy] =
5536 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5538 pred_mode= check_intra_pred_mode( h, pred_mode );
5539 if( pred_mode < 0 ) return -1;
5540 h->chroma_pred_mode= pred_mode;
5542 } else if( partition_count == 4 ) {
5543 int i, j, sub_partition_count[4], list, ref[2][4];
5545 if( h->slice_type_nos == FF_B_TYPE ) {
5546 for( i = 0; i < 4; i++ ) {
5547 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5548 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5549 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5551 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5552 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5553 pred_direct_motion(h, &mb_type);
5554 h->ref_cache[0][scan8[4]] =
5555 h->ref_cache[1][scan8[4]] =
5556 h->ref_cache[0][scan8[12]] =
5557 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5558 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5559 for( i = 0; i < 4; i++ )
5560 if( IS_DIRECT(h->sub_mb_type[i]) )
5561 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5565 for( i = 0; i < 4; i++ ) {
5566 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5567 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5568 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5572 for( list = 0; list < h->list_count; list++ ) {
5573 for( i = 0; i < 4; i++ ) {
5574 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5575 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5576 if( h->ref_count[list] > 1 )
5577 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5583 h->ref_cache[list][ scan8[4*i]+1 ]=
5584 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5589 dct8x8_allowed = get_dct8x8_allowed(h);
5591 for(list=0; list<h->list_count; list++){
5593 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5594 if(IS_DIRECT(h->sub_mb_type[i])){
5595 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5599 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5600 const int sub_mb_type= h->sub_mb_type[i];
5601 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5602 for(j=0; j<sub_partition_count[i]; j++){
5605 const int index= 4*i + block_width*j;
5606 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5607 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5608 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5610 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5611 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5612 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5614 if(IS_SUB_8X8(sub_mb_type)){
5616 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5618 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5621 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5623 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5624 }else if(IS_SUB_8X4(sub_mb_type)){
5625 mv_cache[ 1 ][0]= mx;
5626 mv_cache[ 1 ][1]= my;
5628 mvd_cache[ 1 ][0]= mx - mpx;
5629 mvd_cache[ 1 ][1]= my - mpy;
5630 }else if(IS_SUB_4X8(sub_mb_type)){
5631 mv_cache[ 8 ][0]= mx;
5632 mv_cache[ 8 ][1]= my;
5634 mvd_cache[ 8 ][0]= mx - mpx;
5635 mvd_cache[ 8 ][1]= my - mpy;
5637 mv_cache[ 0 ][0]= mx;
5638 mv_cache[ 0 ][1]= my;
5640 mvd_cache[ 0 ][0]= mx - mpx;
5641 mvd_cache[ 0 ][1]= my - mpy;
5644 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5645 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5646 p[0] = p[1] = p[8] = p[9] = 0;
5647 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5651 } else if( IS_DIRECT(mb_type) ) {
5652 pred_direct_motion(h, &mb_type);
5653 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5654 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5655 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5657 int list, mx, my, i, mpx, mpy;
5658 if(IS_16X16(mb_type)){
5659 for(list=0; list<h->list_count; list++){
5660 if(IS_DIR(mb_type, 0, list)){
5661 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5662 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5664 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5666 for(list=0; list<h->list_count; list++){
5667 if(IS_DIR(mb_type, 0, list)){
5668 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5670 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5671 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5672 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5674 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5675 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5677 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5680 else if(IS_16X8(mb_type)){
5681 for(list=0; list<h->list_count; list++){
5683 if(IS_DIR(mb_type, i, list)){
5684 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5685 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5687 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5690 for(list=0; list<h->list_count; list++){
5692 if(IS_DIR(mb_type, i, list)){
5693 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5694 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5695 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5696 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5698 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5699 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5701 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5702 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5707 assert(IS_8X16(mb_type));
5708 for(list=0; list<h->list_count; list++){
5710 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5711 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5712 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5714 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5717 for(list=0; list<h->list_count; list++){
5719 if(IS_DIR(mb_type, i, list)){
5720 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5721 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5722 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5724 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5725 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5726 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5728 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5729 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5736 if( IS_INTER( mb_type ) ) {
5737 h->chroma_pred_mode_table[mb_xy] = 0;
5738 write_back_motion( h, mb_type );
5741 if( !IS_INTRA16x16( mb_type ) ) {
5742 cbp = decode_cabac_mb_cbp_luma( h );
5744 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5747 h->cbp_table[mb_xy] = h->cbp = cbp;
5749 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5750 if( decode_cabac_mb_transform_size( h ) )
5751 mb_type |= MB_TYPE_8x8DCT;
5753 s->current_picture.mb_type[mb_xy]= mb_type;
5755 if( cbp || IS_INTRA16x16( mb_type ) ) {
5756 const uint8_t *scan, *scan8x8, *dc_scan;
5757 const uint32_t *qmul;
5760 if(IS_INTERLACED(mb_type)){
5761 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5762 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5763 dc_scan= luma_dc_field_scan;
5765 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5766 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5767 dc_scan= luma_dc_zigzag_scan;
5770 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5771 if( dqp == INT_MIN ){
5772 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5776 if(((unsigned)s->qscale) > 51){
5777 if(s->qscale<0) s->qscale+= 52;
5778 else s->qscale-= 52;
5780 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5781 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5783 if( IS_INTRA16x16( mb_type ) ) {
5785 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5786 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5789 qmul = h->dequant4_coeff[0][s->qscale];
5790 for( i = 0; i < 16; i++ ) {
5791 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5792 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5795 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5799 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5800 if( cbp & (1<<i8x8) ) {
5801 if( IS_8x8DCT(mb_type) ) {
5802 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5803 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5805 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5806 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5807 const int index = 4*i8x8 + i4x4;
5808 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5810 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5811 //STOP_TIMER("decode_residual")
5815 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5816 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5823 for( c = 0; c < 2; c++ ) {
5824 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5825 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5831 for( c = 0; c < 2; c++ ) {
5832 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5833 for( i = 0; i < 4; i++ ) {
5834 const int index = 16 + 4 * c + i;
5835 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5836 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5840 uint8_t * const nnz= &h->non_zero_count_cache[0];
5841 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5842 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5845 uint8_t * const nnz= &h->non_zero_count_cache[0];
5846 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5847 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5848 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5849 h->last_qscale_diff = 0;
5852 s->current_picture.qscale_table[mb_xy]= s->qscale;
5853 write_back_non_zero_count(h);
5856 h->ref_count[0] >>= 1;
5857 h->ref_count[1] >>= 1;
5864 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5866 const int index_a = qp + h->slice_alpha_c0_offset;
5867 const int alpha = (alpha_table+52)[index_a];
5868 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5873 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5874 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5876 /* 16px edge length, because bS=4 is triggered by being at
5877 * the edge of an intra MB, so all 4 bS are the same */
5878 for( d = 0; d < 16; d++ ) {
5879 const int p0 = pix[-1];
5880 const int p1 = pix[-2];
5881 const int p2 = pix[-3];
5883 const int q0 = pix[0];
5884 const int q1 = pix[1];
5885 const int q2 = pix[2];
5887 if( FFABS( p0 - q0 ) < alpha &&
5888 FFABS( p1 - p0 ) < beta &&
5889 FFABS( q1 - q0 ) < beta ) {
5891 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5892 if( FFABS( p2 - p0 ) < beta)
5894 const int p3 = pix[-4];
5896 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5897 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5898 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5901 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5903 if( FFABS( q2 - q0 ) < beta)
5905 const int q3 = pix[3];
5907 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5908 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5909 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5912 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5916 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5917 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5919 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5925 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5927 const int index_a = qp + h->slice_alpha_c0_offset;
5928 const int alpha = (alpha_table+52)[index_a];
5929 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5934 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5935 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5937 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5941 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5943 for( i = 0; i < 16; i++, pix += stride) {
5949 int bS_index = (i >> 1);
5952 bS_index |= (i & 1);
5955 if( bS[bS_index] == 0 ) {
5959 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5960 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5961 alpha = (alpha_table+52)[index_a];
5962 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5964 if( bS[bS_index] < 4 ) {
5965 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5966 const int p0 = pix[-1];
5967 const int p1 = pix[-2];
5968 const int p2 = pix[-3];
5969 const int q0 = pix[0];
5970 const int q1 = pix[1];
5971 const int q2 = pix[2];
5973 if( FFABS( p0 - q0 ) < alpha &&
5974 FFABS( p1 - p0 ) < beta &&
5975 FFABS( q1 - q0 ) < beta ) {
5979 if( FFABS( p2 - p0 ) < beta ) {
5980 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5983 if( FFABS( q2 - q0 ) < beta ) {
5984 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5988 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5989 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5990 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5991 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5994 const int p0 = pix[-1];
5995 const int p1 = pix[-2];
5996 const int p2 = pix[-3];
5998 const int q0 = pix[0];
5999 const int q1 = pix[1];
6000 const int q2 = pix[2];
6002 if( FFABS( p0 - q0 ) < alpha &&
6003 FFABS( p1 - p0 ) < beta &&
6004 FFABS( q1 - q0 ) < beta ) {
6006 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6007 if( FFABS( p2 - p0 ) < beta)
6009 const int p3 = pix[-4];
6011 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6012 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6013 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6016 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6018 if( FFABS( q2 - q0 ) < beta)
6020 const int q3 = pix[3];
6022 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6023 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6024 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6027 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6031 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6032 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6034 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6039 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6041 for( i = 0; i < 8; i++, pix += stride) {
6049 if( bS[bS_index] == 0 ) {
6053 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6054 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6055 alpha = (alpha_table+52)[index_a];
6056 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6058 if( bS[bS_index] < 4 ) {
6059 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6060 const int p0 = pix[-1];
6061 const int p1 = pix[-2];
6062 const int q0 = pix[0];
6063 const int q1 = pix[1];
6065 if( FFABS( p0 - q0 ) < alpha &&
6066 FFABS( p1 - p0 ) < beta &&
6067 FFABS( q1 - q0 ) < beta ) {
6068 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6070 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6071 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6072 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6075 const int p0 = pix[-1];
6076 const int p1 = pix[-2];
6077 const int q0 = pix[0];
6078 const int q1 = pix[1];
6080 if( FFABS( p0 - q0 ) < alpha &&
6081 FFABS( p1 - p0 ) < beta &&
6082 FFABS( q1 - q0 ) < beta ) {
6084 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6085 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6086 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6092 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6094 const int index_a = qp + h->slice_alpha_c0_offset;
6095 const int alpha = (alpha_table+52)[index_a];
6096 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6097 const int pix_next = stride;
6102 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6103 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6105 /* 16px edge length, see filter_mb_edgev */
6106 for( d = 0; d < 16; d++ ) {
6107 const int p0 = pix[-1*pix_next];
6108 const int p1 = pix[-2*pix_next];
6109 const int p2 = pix[-3*pix_next];
6110 const int q0 = pix[0];
6111 const int q1 = pix[1*pix_next];
6112 const int q2 = pix[2*pix_next];
6114 if( FFABS( p0 - q0 ) < alpha &&
6115 FFABS( p1 - p0 ) < beta &&
6116 FFABS( q1 - q0 ) < beta ) {
6118 const int p3 = pix[-4*pix_next];
6119 const int q3 = pix[ 3*pix_next];
6121 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6122 if( FFABS( p2 - p0 ) < beta) {
6124 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6125 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6126 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6129 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6131 if( FFABS( q2 - q0 ) < beta) {
6133 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6134 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6135 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6138 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6142 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6143 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6145 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6152 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6154 const int index_a = qp + h->slice_alpha_c0_offset;
6155 const int alpha = (alpha_table+52)[index_a];
6156 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6161 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6162 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6164 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6168 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6169 MpegEncContext * const s = &h->s;
6170 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6172 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6176 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6178 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6179 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6180 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6183 assert(!FRAME_MBAFF);
6185 mb_type = s->current_picture.mb_type[mb_xy];
6186 qp = s->current_picture.qscale_table[mb_xy];
6187 qp0 = s->current_picture.qscale_table[mb_xy-1];
6188 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6189 qpc = get_chroma_qp( h, 0, qp );
6190 qpc0 = get_chroma_qp( h, 0, qp0 );
6191 qpc1 = get_chroma_qp( h, 0, qp1 );
6192 qp0 = (qp + qp0 + 1) >> 1;
6193 qp1 = (qp + qp1 + 1) >> 1;
6194 qpc0 = (qpc + qpc0 + 1) >> 1;
6195 qpc1 = (qpc + qpc1 + 1) >> 1;
6196 qp_thresh = 15 - h->slice_alpha_c0_offset;
6197 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6198 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6201 if( IS_INTRA(mb_type) ) {
6202 int16_t bS4[4] = {4,4,4,4};
6203 int16_t bS3[4] = {3,3,3,3};
6204 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6205 if( IS_8x8DCT(mb_type) ) {
6206 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6207 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6208 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6209 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6211 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6212 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6213 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6214 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6215 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6216 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6217 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6218 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6220 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6221 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6222 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6223 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6224 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6225 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6226 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6227 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6230 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6231 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6233 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6235 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6237 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6238 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6239 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6240 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6242 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6243 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6244 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6245 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6247 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6248 bSv[0][0] = 0x0004000400040004ULL;
6249 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6250 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6252 #define FILTER(hv,dir,edge)\
6253 if(bSv[dir][edge]) {\
6254 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6256 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6257 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6263 } else if( IS_8x8DCT(mb_type) ) {
6282 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6283 MpegEncContext * const s = &h->s;
6284 const int mb_xy= mb_x + mb_y*s->mb_stride;
6285 const int mb_type = s->current_picture.mb_type[mb_xy];
6286 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6287 int first_vertical_edge_done = 0;
6290 //for sufficiently low qp, filtering wouldn't do anything
6291 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6293 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6294 int qp = s->current_picture.qscale_table[mb_xy];
6296 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6297 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6302 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6303 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6304 int top_type, left_type[2];
6305 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6306 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6307 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6309 if(IS_8x8DCT(top_type)){
6310 h->non_zero_count_cache[4+8*0]=
6311 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6312 h->non_zero_count_cache[6+8*0]=
6313 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6315 if(IS_8x8DCT(left_type[0])){
6316 h->non_zero_count_cache[3+8*1]=
6317 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6319 if(IS_8x8DCT(left_type[1])){
6320 h->non_zero_count_cache[3+8*3]=
6321 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6324 if(IS_8x8DCT(mb_type)){
6325 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6326 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6328 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6329 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6331 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6332 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6334 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6335 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6340 // left mb is in picture
6341 && h->slice_table[mb_xy-1] != 255
6342 // and current and left pair do not have the same interlaced type
6343 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6344 // and left mb is in the same slice if deblocking_filter == 2
6345 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6346 /* First vertical edge is different in MBAFF frames
6347 * There are 8 different bS to compute and 2 different Qp
6349 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6350 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6355 int mb_qp, mbn0_qp, mbn1_qp;
6357 first_vertical_edge_done = 1;
6359 if( IS_INTRA(mb_type) )
6360 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6362 for( i = 0; i < 8; i++ ) {
6363 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6365 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6367 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6368 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6369 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6376 mb_qp = s->current_picture.qscale_table[mb_xy];
6377 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6378 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6379 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6380 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6381 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6382 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6383 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6384 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6385 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6386 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6387 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6388 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6391 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6392 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6393 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6394 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6395 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6397 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6398 for( dir = 0; dir < 2; dir++ )
6401 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6402 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6403 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &15 ][0] + (MB_MBAFF ? 20 : 2);
6404 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&15 ][0] + (MB_MBAFF ? 20 : 2);
6405 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6407 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6408 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6409 // how often to recheck mv-based bS when iterating between edges
6410 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6411 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6412 // how often to recheck mv-based bS when iterating along each edge
6413 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6415 if (first_vertical_edge_done) {
6417 first_vertical_edge_done = 0;
6420 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6423 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6424 && !IS_INTERLACED(mb_type)
6425 && IS_INTERLACED(mbm_type)
6427 // This is a special case in the norm where the filtering must
6428 // be done twice (one each of the field) even if we are in a
6429 // frame macroblock.
6431 static const int nnz_idx[4] = {4,5,6,3};
6432 unsigned int tmp_linesize = 2 * linesize;
6433 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6434 int mbn_xy = mb_xy - 2 * s->mb_stride;
6439 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6440 if( IS_INTRA(mb_type) ||
6441 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6442 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6444 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6445 for( i = 0; i < 4; i++ ) {
6446 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6447 mbn_nnz[nnz_idx[i]] != 0 )
6453 // Do not use s->qscale as luma quantizer because it has not the same
6454 // value in IPCM macroblocks.
6455 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6456 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6457 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6458 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6459 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6460 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6461 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6462 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6469 for( edge = start; edge < edges; edge++ ) {
6470 /* mbn_xy: neighbor macroblock */
6471 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6472 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6473 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6477 if( (edge&1) && IS_8x8DCT(mb_type) )
6480 if( IS_INTRA(mb_type) ||
6481 IS_INTRA(mbn_type) ) {
6484 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6485 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6494 bS[0] = bS[1] = bS[2] = bS[3] = value;
6499 if( edge & mask_edge ) {
6500 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6503 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6504 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6507 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6508 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6509 int bn_idx= b_idx - (dir ? 8:1);
6512 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6513 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6514 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6515 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6518 if(h->slice_type_nos == FF_B_TYPE && v){
6520 for( l = 0; !v && l < 2; l++ ) {
6522 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6523 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6524 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6528 bS[0] = bS[1] = bS[2] = bS[3] = v;
6534 for( i = 0; i < 4; i++ ) {
6535 int x = dir == 0 ? edge : i;
6536 int y = dir == 0 ? i : edge;
6537 int b_idx= 8 + 4 + x + 8*y;
6538 int bn_idx= b_idx - (dir ? 8:1);
6540 if( h->non_zero_count_cache[b_idx] != 0 ||
6541 h->non_zero_count_cache[bn_idx] != 0 ) {
6547 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6548 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6549 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6550 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6556 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6558 for( l = 0; l < 2; l++ ) {
6560 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6561 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6562 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6571 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6576 // Do not use s->qscale as luma quantizer because it has not the same
6577 // value in IPCM macroblocks.
6578 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6579 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6580 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6581 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6583 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6584 if( (edge&1) == 0 ) {
6585 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6586 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6587 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6588 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6591 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6592 if( (edge&1) == 0 ) {
6593 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6594 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6595 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6596 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6603 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6604 MpegEncContext * const s = &h->s;
6605 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6609 if( h->pps.cabac ) {
6613 align_get_bits( &s->gb );
6616 ff_init_cabac_states( &h->cabac);
6617 ff_init_cabac_decoder( &h->cabac,
6618 s->gb.buffer + get_bits_count(&s->gb)/8,
6619 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6620 /* calculate pre-state */
6621 for( i= 0; i < 460; i++ ) {
6623 if( h->slice_type_nos == FF_I_TYPE )
6624 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6626 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6629 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6631 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6636 int ret = decode_mb_cabac(h);
6638 //STOP_TIMER("decode_mb_cabac")
6640 if(ret>=0) hl_decode_mb(h);
6642 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6645 if(ret>=0) ret = decode_mb_cabac(h);
6647 if(ret>=0) hl_decode_mb(h);
6650 eos = get_cabac_terminate( &h->cabac );
6652 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6653 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6654 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6658 if( ++s->mb_x >= s->mb_width ) {
6660 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6662 if(FIELD_OR_MBAFF_PICTURE) {
6667 if( eos || s->mb_y >= s->mb_height ) {
6668 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6669 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6676 int ret = decode_mb_cavlc(h);
6678 if(ret>=0) hl_decode_mb(h);
6680 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6682 ret = decode_mb_cavlc(h);
6684 if(ret>=0) hl_decode_mb(h);
6689 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6690 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6695 if(++s->mb_x >= s->mb_width){
6697 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6699 if(FIELD_OR_MBAFF_PICTURE) {
6702 if(s->mb_y >= s->mb_height){
6703 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6705 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6706 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6710 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6717 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6718 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6719 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6720 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6724 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6733 for(;s->mb_y < s->mb_height; s->mb_y++){
6734 for(;s->mb_x < s->mb_width; s->mb_x++){
6735 int ret= decode_mb(h);
6740 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6741 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6746 if(++s->mb_x >= s->mb_width){
6748 if(++s->mb_y >= s->mb_height){
6749 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6754 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6761 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6762 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6763 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6767 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6774 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6777 return -1; //not reached
6780 static int decode_unregistered_user_data(H264Context *h, int size){
6781 MpegEncContext * const s = &h->s;
6782 uint8_t user_data[16+256];
6788 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6789 user_data[i]= get_bits(&s->gb, 8);
6793 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6794 if(e==1 && build>=0)
6795 h->x264_build= build;
6797 if(s->avctx->debug & FF_DEBUG_BUGS)
6798 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6801 skip_bits(&s->gb, 8);
6806 static int decode_sei(H264Context *h){
6807 MpegEncContext * const s = &h->s;
6809 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6814 type+= show_bits(&s->gb, 8);
6815 }while(get_bits(&s->gb, 8) == 255);
6819 size+= show_bits(&s->gb, 8);
6820 }while(get_bits(&s->gb, 8) == 255);
6824 if(decode_unregistered_user_data(h, size) < 0)
6828 skip_bits(&s->gb, 8*size);
6831 //FIXME check bits here
6832 align_get_bits(&s->gb);
6838 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6839 MpegEncContext * const s = &h->s;
6841 cpb_count = get_ue_golomb(&s->gb) + 1;
6842 get_bits(&s->gb, 4); /* bit_rate_scale */
6843 get_bits(&s->gb, 4); /* cpb_size_scale */
6844 for(i=0; i<cpb_count; i++){
6845 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6846 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6847 get_bits1(&s->gb); /* cbr_flag */
6849 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6850 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6851 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6852 get_bits(&s->gb, 5); /* time_offset_length */
6855 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6856 MpegEncContext * const s = &h->s;
6857 int aspect_ratio_info_present_flag;
6858 unsigned int aspect_ratio_idc;
6859 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6861 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6863 if( aspect_ratio_info_present_flag ) {
6864 aspect_ratio_idc= get_bits(&s->gb, 8);
6865 if( aspect_ratio_idc == EXTENDED_SAR ) {
6866 sps->sar.num= get_bits(&s->gb, 16);
6867 sps->sar.den= get_bits(&s->gb, 16);
6868 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6869 sps->sar= pixel_aspect[aspect_ratio_idc];
6871 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6878 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6880 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6881 get_bits1(&s->gb); /* overscan_appropriate_flag */
6884 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6885 get_bits(&s->gb, 3); /* video_format */
6886 get_bits1(&s->gb); /* video_full_range_flag */
6887 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6888 get_bits(&s->gb, 8); /* colour_primaries */
6889 get_bits(&s->gb, 8); /* transfer_characteristics */
6890 get_bits(&s->gb, 8); /* matrix_coefficients */
6894 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6895 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6896 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6899 sps->timing_info_present_flag = get_bits1(&s->gb);
6900 if(sps->timing_info_present_flag){
6901 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6902 sps->time_scale = get_bits_long(&s->gb, 32);
6903 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6906 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6907 if(nal_hrd_parameters_present_flag)
6908 decode_hrd_parameters(h, sps);
6909 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6910 if(vcl_hrd_parameters_present_flag)
6911 decode_hrd_parameters(h, sps);
6912 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6913 get_bits1(&s->gb); /* low_delay_hrd_flag */
6914 get_bits1(&s->gb); /* pic_struct_present_flag */
6916 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6917 if(sps->bitstream_restriction_flag){
6918 unsigned int num_reorder_frames;
6919 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6920 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6921 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6922 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6923 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6924 num_reorder_frames= get_ue_golomb(&s->gb);
6925 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6927 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6928 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6932 sps->num_reorder_frames= num_reorder_frames;
6938 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6939 const uint8_t *jvt_list, const uint8_t *fallback_list){
6940 MpegEncContext * const s = &h->s;
6941 int i, last = 8, next = 8;
6942 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6943 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6944 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6946 for(i=0;i<size;i++){
6948 next = (last + get_se_golomb(&s->gb)) & 0xff;
6949 if(!i && !next){ /* matrix not written, we use the preset one */
6950 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6953 last = factors[scan[i]] = next ? next : last;
6957 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6958 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6959 MpegEncContext * const s = &h->s;
6960 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6961 const uint8_t *fallback[4] = {
6962 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6963 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6964 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6965 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6967 if(get_bits1(&s->gb)){
6968 sps->scaling_matrix_present |= is_sps;
6969 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6970 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6971 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6972 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6973 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6974 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6975 if(is_sps || pps->transform_8x8_mode){
6976 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6977 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6983 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
6986 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
6987 const size_t size, const char *name)
6990 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
6995 vec[id] = av_mallocz(size);
6997 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7002 static inline int decode_seq_parameter_set(H264Context *h){
7003 MpegEncContext * const s = &h->s;
7004 int profile_idc, level_idc;
7005 unsigned int sps_id, tmp, mb_width, mb_height;
7009 profile_idc= get_bits(&s->gb, 8);
7010 get_bits1(&s->gb); //constraint_set0_flag
7011 get_bits1(&s->gb); //constraint_set1_flag
7012 get_bits1(&s->gb); //constraint_set2_flag
7013 get_bits1(&s->gb); //constraint_set3_flag
7014 get_bits(&s->gb, 4); // reserved
7015 level_idc= get_bits(&s->gb, 8);
7016 sps_id= get_ue_golomb(&s->gb);
7018 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7022 sps->profile_idc= profile_idc;
7023 sps->level_idc= level_idc;
7025 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7026 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7027 sps->scaling_matrix_present = 0;
7029 if(sps->profile_idc >= 100){ //high profile
7030 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7031 if(sps->chroma_format_idc == 3)
7032 get_bits1(&s->gb); //residual_color_transform_flag
7033 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7034 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7035 sps->transform_bypass = get_bits1(&s->gb);
7036 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7038 sps->chroma_format_idc= 1;
7041 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7042 sps->poc_type= get_ue_golomb(&s->gb);
7044 if(sps->poc_type == 0){ //FIXME #define
7045 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7046 } else if(sps->poc_type == 1){//FIXME #define
7047 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7048 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7049 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7050 tmp= get_ue_golomb(&s->gb);
7052 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7053 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7056 sps->poc_cycle_length= tmp;
7058 for(i=0; i<sps->poc_cycle_length; i++)
7059 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7060 }else if(sps->poc_type != 2){
7061 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7065 tmp= get_ue_golomb(&s->gb);
7066 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7067 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7070 sps->ref_frame_count= tmp;
7071 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7072 mb_width= get_ue_golomb(&s->gb) + 1;
7073 mb_height= get_ue_golomb(&s->gb) + 1;
7074 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7075 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7076 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7079 sps->mb_width = mb_width;
7080 sps->mb_height= mb_height;
7082 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7083 if(!sps->frame_mbs_only_flag)
7084 sps->mb_aff= get_bits1(&s->gb);
7088 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7090 #ifndef ALLOW_INTERLACE
7092 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7094 sps->crop= get_bits1(&s->gb);
7096 sps->crop_left = get_ue_golomb(&s->gb);
7097 sps->crop_right = get_ue_golomb(&s->gb);
7098 sps->crop_top = get_ue_golomb(&s->gb);
7099 sps->crop_bottom= get_ue_golomb(&s->gb);
7100 if(sps->crop_left || sps->crop_top){
7101 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7103 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7104 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7110 sps->crop_bottom= 0;
7113 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7114 if( sps->vui_parameters_present_flag )
7115 decode_vui_parameters(h, sps);
7117 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7118 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7119 sps_id, sps->profile_idc, sps->level_idc,
7121 sps->ref_frame_count,
7122 sps->mb_width, sps->mb_height,
7123 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7124 sps->direct_8x8_inference_flag ? "8B8" : "",
7125 sps->crop_left, sps->crop_right,
7126 sps->crop_top, sps->crop_bottom,
7127 sps->vui_parameters_present_flag ? "VUI" : "",
7128 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7135 build_qp_table(PPS *pps, int t, int index)
7138 for(i = 0; i < 52; i++)
7139 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7142 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7143 MpegEncContext * const s = &h->s;
7144 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7147 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7151 tmp= get_ue_golomb(&s->gb);
7152 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7153 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7158 pps->cabac= get_bits1(&s->gb);
7159 pps->pic_order_present= get_bits1(&s->gb);
7160 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7161 if(pps->slice_group_count > 1 ){
7162 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7163 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7164 switch(pps->mb_slice_group_map_type){
7167 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7168 | run_length[ i ] |1 |ue(v) |
7173 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7175 | top_left_mb[ i ] |1 |ue(v) |
7176 | bottom_right_mb[ i ] |1 |ue(v) |
7184 | slice_group_change_direction_flag |1 |u(1) |
7185 | slice_group_change_rate_minus1 |1 |ue(v) |
7190 | slice_group_id_cnt_minus1 |1 |ue(v) |
7191 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7193 | slice_group_id[ i ] |1 |u(v) |
7198 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7199 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7200 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7201 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7202 pps->ref_count[0]= pps->ref_count[1]= 1;
7206 pps->weighted_pred= get_bits1(&s->gb);
7207 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7208 pps->init_qp= get_se_golomb(&s->gb) + 26;
7209 pps->init_qs= get_se_golomb(&s->gb) + 26;
7210 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7211 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7212 pps->constrained_intra_pred= get_bits1(&s->gb);
7213 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7215 pps->transform_8x8_mode= 0;
7216 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7217 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7218 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7220 if(get_bits_count(&s->gb) < bit_length){
7221 pps->transform_8x8_mode= get_bits1(&s->gb);
7222 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7223 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7225 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7228 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7229 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7230 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7231 h->pps.chroma_qp_diff= 1;
7233 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7234 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7235 pps_id, pps->sps_id,
7236 pps->cabac ? "CABAC" : "CAVLC",
7237 pps->slice_group_count,
7238 pps->ref_count[0], pps->ref_count[1],
7239 pps->weighted_pred ? "weighted" : "",
7240 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7241 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7242 pps->constrained_intra_pred ? "CONSTR" : "",
7243 pps->redundant_pic_cnt_present ? "REDU" : "",
7244 pps->transform_8x8_mode ? "8x8DCT" : ""
7252 * Call decode_slice() for each context.
7254 * @param h h264 master context
7255 * @param context_count number of contexts to execute
7257 static void execute_decode_slices(H264Context *h, int context_count){
7258 MpegEncContext * const s = &h->s;
7259 AVCodecContext * const avctx= s->avctx;
7263 if(context_count == 1) {
7264 decode_slice(avctx, h);
7266 for(i = 1; i < context_count; i++) {
7267 hx = h->thread_context[i];
7268 hx->s.error_recognition = avctx->error_recognition;
7269 hx->s.error_count = 0;
7272 avctx->execute(avctx, (void *)decode_slice,
7273 (void **)h->thread_context, NULL, context_count);
7275 /* pull back stuff from slices to master context */
7276 hx = h->thread_context[context_count - 1];
7277 s->mb_x = hx->s.mb_x;
7278 s->mb_y = hx->s.mb_y;
7279 s->dropable = hx->s.dropable;
7280 s->picture_structure = hx->s.picture_structure;
7281 for(i = 1; i < context_count; i++)
7282 h->s.error_count += h->thread_context[i]->s.error_count;
7287 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7288 MpegEncContext * const s = &h->s;
7289 AVCodecContext * const avctx= s->avctx;
7291 H264Context *hx; ///< thread context
7292 int context_count = 0;
7294 h->max_contexts = avctx->thread_count;
7297 for(i=0; i<50; i++){
7298 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7301 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7302 h->current_slice = 0;
7303 if (!s->first_field)
7304 s->current_picture_ptr= NULL;
7316 if(buf_index >= buf_size) break;
7318 for(i = 0; i < h->nal_length_size; i++)
7319 nalsize = (nalsize << 8) | buf[buf_index++];
7320 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7325 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7330 // start code prefix search
7331 for(; buf_index + 3 < buf_size; buf_index++){
7332 // This should always succeed in the first iteration.
7333 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7337 if(buf_index+3 >= buf_size) break;
7342 hx = h->thread_context[context_count];
7344 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7345 if (ptr==NULL || dst_length < 0){
7348 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7350 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7352 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7353 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7356 if (h->is_avc && (nalsize != consumed)){
7357 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7361 buf_index += consumed;
7363 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7364 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7369 switch(hx->nal_unit_type){
7371 if (h->nal_unit_type != NAL_IDR_SLICE) {
7372 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7375 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7377 init_get_bits(&hx->s.gb, ptr, bit_length);
7379 hx->inter_gb_ptr= &hx->s.gb;
7380 hx->s.data_partitioning = 0;
7382 if((err = decode_slice_header(hx, h)))
7385 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7386 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7387 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7388 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7389 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7390 && avctx->skip_frame < AVDISCARD_ALL)
7394 init_get_bits(&hx->s.gb, ptr, bit_length);
7396 hx->inter_gb_ptr= NULL;
7397 hx->s.data_partitioning = 1;
7399 err = decode_slice_header(hx, h);
7402 init_get_bits(&hx->intra_gb, ptr, bit_length);
7403 hx->intra_gb_ptr= &hx->intra_gb;
7406 init_get_bits(&hx->inter_gb, ptr, bit_length);
7407 hx->inter_gb_ptr= &hx->inter_gb;
7409 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7410 && s->context_initialized
7412 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7413 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7414 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7415 && avctx->skip_frame < AVDISCARD_ALL)
7419 init_get_bits(&s->gb, ptr, bit_length);
7423 init_get_bits(&s->gb, ptr, bit_length);
7424 decode_seq_parameter_set(h);
7426 if(s->flags& CODEC_FLAG_LOW_DELAY)
7429 if(avctx->has_b_frames < 2)
7430 avctx->has_b_frames= !s->low_delay;
7433 init_get_bits(&s->gb, ptr, bit_length);
7435 decode_picture_parameter_set(h, bit_length);
7439 case NAL_END_SEQUENCE:
7440 case NAL_END_STREAM:
7441 case NAL_FILLER_DATA:
7443 case NAL_AUXILIARY_SLICE:
7446 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7449 if(context_count == h->max_contexts) {
7450 execute_decode_slices(h, context_count);
7455 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7457 /* Slice could not be decoded in parallel mode, copy down
7458 * NAL unit stuff to context 0 and restart. Note that
7459 * rbsp_buffer is not transferred, but since we no longer
7460 * run in parallel mode this should not be an issue. */
7461 h->nal_unit_type = hx->nal_unit_type;
7462 h->nal_ref_idc = hx->nal_ref_idc;
7468 execute_decode_slices(h, context_count);
7473 * returns the number of bytes consumed for building the current frame
7475 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7476 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7477 if(pos+10>buf_size) pos=buf_size; // oops ;)
7482 static int decode_frame(AVCodecContext *avctx,
7483 void *data, int *data_size,
7484 const uint8_t *buf, int buf_size)
7486 H264Context *h = avctx->priv_data;
7487 MpegEncContext *s = &h->s;
7488 AVFrame *pict = data;
7491 s->flags= avctx->flags;
7492 s->flags2= avctx->flags2;
7494 /* end of stream, output what is still in the buffers */
7495 if (buf_size == 0) {
7499 //FIXME factorize this with the output code below
7500 out = h->delayed_pic[0];
7502 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7503 if(h->delayed_pic[i]->poc < out->poc){
7504 out = h->delayed_pic[i];
7508 for(i=out_idx; h->delayed_pic[i]; i++)
7509 h->delayed_pic[i] = h->delayed_pic[i+1];
7512 *data_size = sizeof(AVFrame);
7513 *pict= *(AVFrame*)out;
7519 if(h->is_avc && !h->got_avcC) {
7520 int i, cnt, nalsize;
7521 unsigned char *p = avctx->extradata;
7522 if(avctx->extradata_size < 7) {
7523 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7527 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7530 /* sps and pps in the avcC always have length coded with 2 bytes,
7531 so put a fake nal_length_size = 2 while parsing them */
7532 h->nal_length_size = 2;
7533 // Decode sps from avcC
7534 cnt = *(p+5) & 0x1f; // Number of sps
7536 for (i = 0; i < cnt; i++) {
7537 nalsize = AV_RB16(p) + 2;
7538 if(decode_nal_units(h, p, nalsize) < 0) {
7539 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7544 // Decode pps from avcC
7545 cnt = *(p++); // Number of pps
7546 for (i = 0; i < cnt; i++) {
7547 nalsize = AV_RB16(p) + 2;
7548 if(decode_nal_units(h, p, nalsize) != nalsize) {
7549 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7554 // Now store right nal length size, that will be use to parse all other nals
7555 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7556 // Do not reparse avcC
7560 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7561 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7566 buf_index=decode_nal_units(h, buf, buf_size);
7570 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7571 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7572 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7576 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7577 Picture *out = s->current_picture_ptr;
7578 Picture *cur = s->current_picture_ptr;
7579 int i, pics, cross_idr, out_of_order, out_idx;
7583 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7584 s->current_picture_ptr->pict_type= s->pict_type;
7587 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7588 h->prev_poc_msb= h->poc_msb;
7589 h->prev_poc_lsb= h->poc_lsb;
7591 h->prev_frame_num_offset= h->frame_num_offset;
7592 h->prev_frame_num= h->frame_num;
7595 * FIXME: Error handling code does not seem to support interlaced
7596 * when slices span multiple rows
7597 * The ff_er_add_slice calls don't work right for bottom
7598 * fields; they cause massive erroneous error concealing
7599 * Error marking covers both fields (top and bottom).
7600 * This causes a mismatched s->error_count
7601 * and a bad error table. Further, the error count goes to
7602 * INT_MAX when called for bottom field, because mb_y is
7603 * past end by one (callers fault) and resync_mb_y != 0
7604 * causes problems for the first MB line, too.
7611 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7612 /* Wait for second field. */
7616 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7617 /* Derive top_field_first from field pocs. */
7618 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7620 //FIXME do something with unavailable reference frames
7622 /* Sort B-frames into display order */
7624 if(h->sps.bitstream_restriction_flag
7625 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7626 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7630 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7631 && !h->sps.bitstream_restriction_flag){
7632 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7637 while(h->delayed_pic[pics]) pics++;
7639 assert(pics <= MAX_DELAYED_PIC_COUNT);
7641 h->delayed_pic[pics++] = cur;
7642 if(cur->reference == 0)
7643 cur->reference = DELAYED_PIC_REF;
7645 out = h->delayed_pic[0];
7647 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7648 if(h->delayed_pic[i]->poc < out->poc){
7649 out = h->delayed_pic[i];
7652 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7654 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7656 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7658 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7660 ((!cross_idr && out->poc > h->outputed_poc + 2)
7661 || cur->pict_type == FF_B_TYPE)))
7664 s->avctx->has_b_frames++;
7667 if(out_of_order || pics > s->avctx->has_b_frames){
7668 out->reference &= ~DELAYED_PIC_REF;
7669 for(i=out_idx; h->delayed_pic[i]; i++)
7670 h->delayed_pic[i] = h->delayed_pic[i+1];
7672 if(!out_of_order && pics > s->avctx->has_b_frames){
7673 *data_size = sizeof(AVFrame);
7675 h->outputed_poc = out->poc;
7676 *pict= *(AVFrame*)out;
7678 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7683 assert(pict->data[0] || !*data_size);
7684 ff_print_debug_info(s, pict);
7685 //printf("out %d\n", (int)pict->data[0]);
7688 /* Return the Picture timestamp as the frame number */
7689 /* we subtract 1 because it is added on utils.c */
7690 avctx->frame_number = s->picture_number - 1;
7692 return get_consumed_bytes(s, buf_index, buf_size);
7695 static inline void fill_mb_avail(H264Context *h){
7696 MpegEncContext * const s = &h->s;
7697 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7700 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7701 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7702 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7708 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7709 h->mb_avail[4]= 1; //FIXME move out
7710 h->mb_avail[5]= 0; //FIXME move out
7718 #define SIZE (COUNT*40)
7724 // int int_temp[10000];
7726 AVCodecContext avctx;
7728 dsputil_init(&dsp, &avctx);
7730 init_put_bits(&pb, temp, SIZE);
7731 printf("testing unsigned exp golomb\n");
7732 for(i=0; i<COUNT; i++){
7734 set_ue_golomb(&pb, i);
7735 STOP_TIMER("set_ue_golomb");
7737 flush_put_bits(&pb);
7739 init_get_bits(&gb, temp, 8*SIZE);
7740 for(i=0; i<COUNT; i++){
7743 s= show_bits(&gb, 24);
7746 j= get_ue_golomb(&gb);
7748 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7751 STOP_TIMER("get_ue_golomb");
7755 init_put_bits(&pb, temp, SIZE);
7756 printf("testing signed exp golomb\n");
7757 for(i=0; i<COUNT; i++){
7759 set_se_golomb(&pb, i - COUNT/2);
7760 STOP_TIMER("set_se_golomb");
7762 flush_put_bits(&pb);
7764 init_get_bits(&gb, temp, 8*SIZE);
7765 for(i=0; i<COUNT; i++){
7768 s= show_bits(&gb, 24);
7771 j= get_se_golomb(&gb);
7772 if(j != i - COUNT/2){
7773 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7776 STOP_TIMER("get_se_golomb");
7780 printf("testing 4x4 (I)DCT\n");
7783 uint8_t src[16], ref[16];
7784 uint64_t error= 0, max_error=0;
7786 for(i=0; i<COUNT; i++){
7788 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7789 for(j=0; j<16; j++){
7790 ref[j]= random()%255;
7791 src[j]= random()%255;
7794 h264_diff_dct_c(block, src, ref, 4);
7797 for(j=0; j<16; j++){
7798 // printf("%d ", block[j]);
7799 block[j]= block[j]*4;
7800 if(j&1) block[j]= (block[j]*4 + 2)/5;
7801 if(j&4) block[j]= (block[j]*4 + 2)/5;
7805 s->dsp.h264_idct_add(ref, block, 4);
7806 /* for(j=0; j<16; j++){
7807 printf("%d ", ref[j]);
7811 for(j=0; j<16; j++){
7812 int diff= FFABS(src[j] - ref[j]);
7815 max_error= FFMAX(max_error, diff);
7818 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7819 printf("testing quantizer\n");
7820 for(qp=0; qp<52; qp++){
7822 src1_block[i]= src2_block[i]= random()%255;
7825 printf("Testing NAL layer\n");
7827 uint8_t bitstream[COUNT];
7828 uint8_t nal[COUNT*2];
7830 memset(&h, 0, sizeof(H264Context));
7832 for(i=0; i<COUNT; i++){
7840 for(j=0; j<COUNT; j++){
7841 bitstream[j]= (random() % 255) + 1;
7844 for(j=0; j<zeros; j++){
7845 int pos= random() % COUNT;
7846 while(bitstream[pos] == 0){
7855 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7857 printf("encoding failed\n");
7861 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7865 if(out_length != COUNT){
7866 printf("incorrect length %d %d\n", out_length, COUNT);
7870 if(consumed != nal_length){
7871 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7875 if(memcmp(bitstream, out, COUNT)){
7876 printf("mismatch\n");
7882 printf("Testing RBSP\n");
7890 static av_cold int decode_end(AVCodecContext *avctx)
7892 H264Context *h = avctx->priv_data;
7893 MpegEncContext *s = &h->s;
7895 av_freep(&h->rbsp_buffer[0]);
7896 av_freep(&h->rbsp_buffer[1]);
7897 free_tables(h); //FIXME cleanup init stuff perhaps
7900 // memset(h, 0, sizeof(H264Context));
7906 AVCodec h264_decoder = {
7910 sizeof(H264Context),
7915 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7917 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),