2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1591 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1592 int src_x_offset, int src_y_offset,
1593 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1594 MpegEncContext * const s = &h->s;
1595 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1596 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1597 const int luma_xy= (mx&3) + ((my&3)<<2);
1598 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1599 uint8_t * src_cb, * src_cr;
1600 int extra_width= h->emu_edge_width;
1601 int extra_height= h->emu_edge_height;
1603 const int full_mx= mx>>2;
1604 const int full_my= my>>2;
1605 const int pic_width = 16*s->mb_width;
1606 const int pic_height = 16*s->mb_height >> MB_FIELD;
1608 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1611 if(mx&7) extra_width -= 3;
1612 if(my&7) extra_height -= 3;
1614 if( full_mx < 0-extra_width
1615 || full_my < 0-extra_height
1616 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1617 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1618 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1619 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1623 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1625 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1628 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1631 // chroma offset when predicting from a field of opposite parity
1632 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1633 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1635 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1636 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1639 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1640 src_cb= s->edge_emu_buffer;
1642 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1645 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1646 src_cr= s->edge_emu_buffer;
1648 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1651 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1652 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1653 int x_offset, int y_offset,
1654 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1655 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1656 int list0, int list1){
1657 MpegEncContext * const s = &h->s;
1658 qpel_mc_func *qpix_op= qpix_put;
1659 h264_chroma_mc_func chroma_op= chroma_put;
1661 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1662 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1663 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1664 x_offset += 8*s->mb_x;
1665 y_offset += 8*(s->mb_y >> MB_FIELD);
1668 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1669 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1670 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1671 qpix_op, chroma_op);
1674 chroma_op= chroma_avg;
1678 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1679 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1680 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1681 qpix_op, chroma_op);
1685 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1686 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1687 int x_offset, int y_offset,
1688 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1689 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1690 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1691 int list0, int list1){
1692 MpegEncContext * const s = &h->s;
1694 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1695 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1696 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1697 x_offset += 8*s->mb_x;
1698 y_offset += 8*(s->mb_y >> MB_FIELD);
1701 /* don't optimize for luma-only case, since B-frames usually
1702 * use implicit weights => chroma too. */
1703 uint8_t *tmp_cb = s->obmc_scratchpad;
1704 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1705 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1706 int refn0 = h->ref_cache[0][ scan8[n] ];
1707 int refn1 = h->ref_cache[1][ scan8[n] ];
1709 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1710 dest_y, dest_cb, dest_cr,
1711 x_offset, y_offset, qpix_put, chroma_put);
1712 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1713 tmp_y, tmp_cb, tmp_cr,
1714 x_offset, y_offset, qpix_put, chroma_put);
1716 if(h->use_weight == 2){
1717 int weight0 = h->implicit_weight[refn0][refn1];
1718 int weight1 = 64 - weight0;
1719 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1720 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1721 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1723 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1724 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1725 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1726 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1727 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1728 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1729 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1730 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1731 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1734 int list = list1 ? 1 : 0;
1735 int refn = h->ref_cache[list][ scan8[n] ];
1736 Picture *ref= &h->ref_list[list][refn];
1737 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1738 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1739 qpix_put, chroma_put);
1741 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1742 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1743 if(h->use_weight_chroma){
1744 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1745 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1746 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1752 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1753 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1754 int x_offset, int y_offset,
1755 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1756 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1757 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1758 int list0, int list1){
1759 if((h->use_weight==2 && list0 && list1
1760 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1761 || h->use_weight==1)
1762 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1763 x_offset, y_offset, qpix_put, chroma_put,
1764 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1766 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1767 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1770 static inline void prefetch_motion(H264Context *h, int list){
1771 /* fetch pixels for estimated mv 4 macroblocks ahead
1772 * optimized for 64byte cache lines */
1773 MpegEncContext * const s = &h->s;
1774 const int refn = h->ref_cache[list][scan8[0]];
1776 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1777 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1778 uint8_t **src= h->ref_list[list][refn].data;
1779 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1780 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1781 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1782 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1786 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1788 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1789 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1790 MpegEncContext * const s = &h->s;
1791 const int mb_xy= h->mb_xy;
1792 const int mb_type= s->current_picture.mb_type[mb_xy];
1794 assert(IS_INTER(mb_type));
1796 prefetch_motion(h, 0);
1798 if(IS_16X16(mb_type)){
1799 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1800 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1801 &weight_op[0], &weight_avg[0],
1802 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1803 }else if(IS_16X8(mb_type)){
1804 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1805 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1806 &weight_op[1], &weight_avg[1],
1807 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1809 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1810 &weight_op[1], &weight_avg[1],
1811 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1812 }else if(IS_8X16(mb_type)){
1813 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1815 &weight_op[2], &weight_avg[2],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1818 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1819 &weight_op[2], &weight_avg[2],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1824 assert(IS_8X8(mb_type));
1827 const int sub_mb_type= h->sub_mb_type[i];
1829 int x_offset= (i&1)<<2;
1830 int y_offset= (i&2)<<1;
1832 if(IS_SUB_8X8(sub_mb_type)){
1833 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1834 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1835 &weight_op[3], &weight_avg[3],
1836 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1837 }else if(IS_SUB_8X4(sub_mb_type)){
1838 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1840 &weight_op[4], &weight_avg[4],
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1843 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1844 &weight_op[4], &weight_avg[4],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_4X8(sub_mb_type)){
1847 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1849 &weight_op[5], &weight_avg[5],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1852 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1853 &weight_op[5], &weight_avg[5],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1857 assert(IS_SUB_4X4(sub_mb_type));
1859 int sub_x_offset= x_offset + 2*(j&1);
1860 int sub_y_offset= y_offset + (j&2);
1861 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1862 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1863 &weight_op[6], &weight_avg[6],
1864 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1870 prefetch_motion(h, 1);
1873 static av_cold void decode_init_vlc(void){
1874 static int done = 0;
1881 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1882 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1883 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1884 &chroma_dc_coeff_token_len [0], 1, 1,
1885 &chroma_dc_coeff_token_bits[0], 1, 1,
1886 INIT_VLC_USE_NEW_STATIC);
1890 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1891 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1892 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1893 &coeff_token_len [i][0], 1, 1,
1894 &coeff_token_bits[i][0], 1, 1,
1895 INIT_VLC_USE_NEW_STATIC);
1896 offset += coeff_token_vlc_tables_size[i];
1899 * This is a one time safety check to make sure that
1900 * the packed static coeff_token_vlc table sizes
1901 * were initialized correctly.
1903 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1906 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1907 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1908 init_vlc(&chroma_dc_total_zeros_vlc[i],
1909 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1910 &chroma_dc_total_zeros_len [i][0], 1, 1,
1911 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1912 INIT_VLC_USE_NEW_STATIC);
1914 for(i=0; i<15; i++){
1915 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1916 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1917 init_vlc(&total_zeros_vlc[i],
1918 TOTAL_ZEROS_VLC_BITS, 16,
1919 &total_zeros_len [i][0], 1, 1,
1920 &total_zeros_bits[i][0], 1, 1,
1921 INIT_VLC_USE_NEW_STATIC);
1925 run_vlc[i].table = run_vlc_tables[i];
1926 run_vlc[i].table_allocated = run_vlc_tables_size;
1927 init_vlc(&run_vlc[i],
1929 &run_len [i][0], 1, 1,
1930 &run_bits[i][0], 1, 1,
1931 INIT_VLC_USE_NEW_STATIC);
1933 run7_vlc.table = run7_vlc_table,
1934 run7_vlc.table_allocated = run7_vlc_table_size;
1935 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1936 &run_len [6][0], 1, 1,
1937 &run_bits[6][0], 1, 1,
1938 INIT_VLC_USE_NEW_STATIC);
1942 static void free_tables(H264Context *h){
1945 av_freep(&h->intra4x4_pred_mode);
1946 av_freep(&h->chroma_pred_mode_table);
1947 av_freep(&h->cbp_table);
1948 av_freep(&h->mvd_table[0]);
1949 av_freep(&h->mvd_table[1]);
1950 av_freep(&h->direct_table);
1951 av_freep(&h->non_zero_count);
1952 av_freep(&h->slice_table_base);
1953 h->slice_table= NULL;
1955 av_freep(&h->mb2b_xy);
1956 av_freep(&h->mb2b8_xy);
1958 for(i = 0; i < h->s.avctx->thread_count; i++) {
1959 hx = h->thread_context[i];
1961 av_freep(&hx->top_borders[1]);
1962 av_freep(&hx->top_borders[0]);
1963 av_freep(&hx->s.obmc_scratchpad);
1967 static void init_dequant8_coeff_table(H264Context *h){
1969 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1970 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1971 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1973 for(i=0; i<2; i++ ){
1974 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1975 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1979 for(q=0; q<52; q++){
1980 int shift = div6[q];
1983 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1984 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1985 h->pps.scaling_matrix8[i][x]) << shift;
1990 static void init_dequant4_coeff_table(H264Context *h){
1992 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1993 for(i=0; i<6; i++ ){
1994 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1996 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1997 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2004 for(q=0; q<52; q++){
2005 int shift = div6[q] + 2;
2008 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2009 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2010 h->pps.scaling_matrix4[i][x]) << shift;
2015 static void init_dequant_tables(H264Context *h){
2017 init_dequant4_coeff_table(h);
2018 if(h->pps.transform_8x8_mode)
2019 init_dequant8_coeff_table(h);
2020 if(h->sps.transform_bypass){
2023 h->dequant4_coeff[i][0][x] = 1<<6;
2024 if(h->pps.transform_8x8_mode)
2027 h->dequant8_coeff[i][0][x] = 1<<6;
2034 * needs width/height
2036 static int alloc_tables(H264Context *h){
2037 MpegEncContext * const s = &h->s;
2038 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2041 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2043 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2044 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2045 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2047 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2048 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2049 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2050 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2052 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2053 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2055 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2056 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2057 for(y=0; y<s->mb_height; y++){
2058 for(x=0; x<s->mb_width; x++){
2059 const int mb_xy= x + y*s->mb_stride;
2060 const int b_xy = 4*x + 4*y*h->b_stride;
2061 const int b8_xy= 2*x + 2*y*h->b8_stride;
2063 h->mb2b_xy [mb_xy]= b_xy;
2064 h->mb2b8_xy[mb_xy]= b8_xy;
2068 s->obmc_scratchpad = NULL;
2070 if(!h->dequant4_coeff[0])
2071 init_dequant_tables(h);
2080 * Mimic alloc_tables(), but for every context thread.
2082 static void clone_tables(H264Context *dst, H264Context *src){
2083 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2084 dst->non_zero_count = src->non_zero_count;
2085 dst->slice_table = src->slice_table;
2086 dst->cbp_table = src->cbp_table;
2087 dst->mb2b_xy = src->mb2b_xy;
2088 dst->mb2b8_xy = src->mb2b8_xy;
2089 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2090 dst->mvd_table[0] = src->mvd_table[0];
2091 dst->mvd_table[1] = src->mvd_table[1];
2092 dst->direct_table = src->direct_table;
2094 dst->s.obmc_scratchpad = NULL;
2095 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2100 * Allocate buffers which are not shared amongst multiple threads.
2102 static int context_init(H264Context *h){
2103 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2108 return -1; // free_tables will clean up for us
2111 static av_cold void common_init(H264Context *h){
2112 MpegEncContext * const s = &h->s;
2114 s->width = s->avctx->width;
2115 s->height = s->avctx->height;
2116 s->codec_id= s->avctx->codec->id;
2118 ff_h264_pred_init(&h->hpc, s->codec_id);
2120 h->dequant_coeff_pps= -1;
2121 s->unrestricted_mv=1;
2122 s->decode=1; //FIXME
2124 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2125 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2128 static av_cold int decode_init(AVCodecContext *avctx){
2129 H264Context *h= avctx->priv_data;
2130 MpegEncContext * const s = &h->s;
2132 MPV_decode_defaults(s);
2137 s->out_format = FMT_H264;
2138 s->workaround_bugs= avctx->workaround_bugs;
2141 // s->decode_mb= ff_h263_decode_mb;
2142 s->quarter_sample = 1;
2145 if(avctx->codec_id == CODEC_ID_SVQ3)
2146 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2148 avctx->pix_fmt= PIX_FMT_YUV420P;
2152 if(avctx->extradata_size > 0 && avctx->extradata &&
2153 *(char *)avctx->extradata == 1){
2160 h->thread_context[0] = h;
2161 h->outputed_poc = INT_MIN;
2162 h->prev_poc_msb= 1<<16;
2166 static int frame_start(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2170 if(MPV_frame_start(s, s->avctx) < 0)
2172 ff_er_frame_start(s);
2174 * MPV_frame_start uses pict_type to derive key_frame.
2175 * This is incorrect for H.264; IDR markings must be used.
2176 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2177 * See decode_nal_units().
2179 s->current_picture_ptr->key_frame= 0;
2181 assert(s->linesize && s->uvlinesize);
2183 for(i=0; i<16; i++){
2184 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2185 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2188 h->block_offset[16+i]=
2189 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2190 h->block_offset[24+16+i]=
2191 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2194 /* can't be in alloc_tables because linesize isn't known there.
2195 * FIXME: redo bipred weight to not require extra buffer? */
2196 for(i = 0; i < s->avctx->thread_count; i++)
2197 if(!h->thread_context[i]->s.obmc_scratchpad)
2198 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2200 /* some macroblocks will be accessed before they're available */
2201 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2202 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2204 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2206 // We mark the current picture as non-reference after allocating it, so
2207 // that if we break out due to an error it can be released automatically
2208 // in the next MPV_frame_start().
2209 // SVQ3 as well as most other codecs have only last/next/current and thus
2210 // get released even with set reference, besides SVQ3 and others do not
2211 // mark frames as reference later "naturally".
2212 if(s->codec_id != CODEC_ID_SVQ3)
2213 s->current_picture_ptr->reference= 0;
2215 s->current_picture_ptr->field_poc[0]=
2216 s->current_picture_ptr->field_poc[1]= INT_MAX;
2217 assert(s->current_picture_ptr->long_ref==0);
2222 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2223 MpegEncContext * const s = &h->s;
2232 src_cb -= uvlinesize;
2233 src_cr -= uvlinesize;
2235 if(!simple && FRAME_MBAFF){
2237 offset = MB_MBAFF ? 1 : 17;
2238 uvoffset= MB_MBAFF ? 1 : 9;
2240 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2242 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2243 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2244 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2249 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2250 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2251 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2252 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2258 top_idx = MB_MBAFF ? 0 : 1;
2260 step= MB_MBAFF ? 2 : 1;
2263 // There are two lines saved, the line above the the top macroblock of a pair,
2264 // and the line above the bottom macroblock
2265 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2266 for(i=1; i<17 - skiplast; i++){
2267 h->left_border[offset+i*step]= src_y[15+i* linesize];
2270 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2271 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2273 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2274 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2275 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2276 for(i=1; i<9 - skiplast; i++){
2277 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2278 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2280 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2281 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2285 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2286 MpegEncContext * const s = &h->s;
2297 if(!simple && FRAME_MBAFF){
2299 offset = MB_MBAFF ? 1 : 17;
2300 uvoffset= MB_MBAFF ? 1 : 9;
2304 top_idx = MB_MBAFF ? 0 : 1;
2306 step= MB_MBAFF ? 2 : 1;
2309 if(h->deblocking_filter == 2) {
2311 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2312 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2314 deblock_left = (s->mb_x > 0);
2315 deblock_top = (s->mb_y > !!MB_FIELD);
2318 src_y -= linesize + 1;
2319 src_cb -= uvlinesize + 1;
2320 src_cr -= uvlinesize + 1;
2322 #define XCHG(a,b,t,xchg)\
2329 for(i = !deblock_top; i<16; i++){
2330 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2332 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2336 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2337 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2338 if(s->mb_x+1 < s->mb_width){
2339 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2343 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2345 for(i = !deblock_top; i<8; i++){
2346 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2347 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2350 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2353 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2359 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2360 MpegEncContext * const s = &h->s;
2361 const int mb_x= s->mb_x;
2362 const int mb_y= s->mb_y;
2363 const int mb_xy= h->mb_xy;
2364 const int mb_type= s->current_picture.mb_type[mb_xy];
2365 uint8_t *dest_y, *dest_cb, *dest_cr;
2366 int linesize, uvlinesize /*dct_offset*/;
2368 int *block_offset = &h->block_offset[0];
2369 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2370 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2371 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2372 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2374 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2375 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2376 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2378 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2379 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2381 if (!simple && MB_FIELD) {
2382 linesize = h->mb_linesize = s->linesize * 2;
2383 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2384 block_offset = &h->block_offset[24];
2385 if(mb_y&1){ //FIXME move out of this function?
2386 dest_y -= s->linesize*15;
2387 dest_cb-= s->uvlinesize*7;
2388 dest_cr-= s->uvlinesize*7;
2392 for(list=0; list<h->list_count; list++){
2393 if(!USES_LIST(mb_type, list))
2395 if(IS_16X16(mb_type)){
2396 int8_t *ref = &h->ref_cache[list][scan8[0]];
2397 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2399 for(i=0; i<16; i+=4){
2400 int ref = h->ref_cache[list][scan8[i]];
2402 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2408 linesize = h->mb_linesize = s->linesize;
2409 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2410 // dct_offset = s->linesize * 16;
2413 if (!simple && IS_INTRA_PCM(mb_type)) {
2414 for (i=0; i<16; i++) {
2415 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2417 for (i=0; i<8; i++) {
2418 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2419 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2422 if(IS_INTRA(mb_type)){
2423 if(h->deblocking_filter)
2424 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2426 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2427 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2428 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2431 if(IS_INTRA4x4(mb_type)){
2432 if(simple || !s->encoding){
2433 if(IS_8x8DCT(mb_type)){
2434 if(transform_bypass){
2436 idct_add = s->dsp.add_pixels8;
2438 idct_dc_add = s->dsp.h264_idct8_dc_add;
2439 idct_add = s->dsp.h264_idct8_add;
2441 for(i=0; i<16; i+=4){
2442 uint8_t * const ptr= dest_y + block_offset[i];
2443 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2444 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2445 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2447 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2448 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2449 (h->topright_samples_available<<i)&0x4000, linesize);
2451 if(nnz == 1 && h->mb[i*16])
2452 idct_dc_add(ptr, h->mb + i*16, linesize);
2454 idct_add (ptr, h->mb + i*16, linesize);
2459 if(transform_bypass){
2461 idct_add = s->dsp.add_pixels4;
2463 idct_dc_add = s->dsp.h264_idct_dc_add;
2464 idct_add = s->dsp.h264_idct_add;
2466 for(i=0; i<16; i++){
2467 uint8_t * const ptr= dest_y + block_offset[i];
2468 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2470 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2471 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2475 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2476 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2477 assert(mb_y || linesize <= block_offset[i]);
2478 if(!topright_avail){
2479 tr= ptr[3 - linesize]*0x01010101;
2480 topright= (uint8_t*) &tr;
2482 topright= ptr + 4 - linesize;
2486 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2487 nnz = h->non_zero_count_cache[ scan8[i] ];
2490 if(nnz == 1 && h->mb[i*16])
2491 idct_dc_add(ptr, h->mb + i*16, linesize);
2493 idct_add (ptr, h->mb + i*16, linesize);
2495 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2502 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2504 if(!transform_bypass)
2505 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2507 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2509 if(h->deblocking_filter)
2510 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2512 hl_motion(h, dest_y, dest_cb, dest_cr,
2513 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2514 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2515 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2519 if(!IS_INTRA4x4(mb_type)){
2521 if(IS_INTRA16x16(mb_type)){
2522 if(transform_bypass){
2523 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2524 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2526 for(i=0; i<16; i++){
2527 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2528 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2532 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2534 }else if(h->cbp&15){
2535 if(transform_bypass){
2536 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2537 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2538 for(i=0; i<16; i+=di){
2539 if(h->non_zero_count_cache[ scan8[i] ]){
2540 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2544 if(IS_8x8DCT(mb_type)){
2545 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2547 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2552 for(i=0; i<16; i++){
2553 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2554 uint8_t * const ptr= dest_y + block_offset[i];
2555 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2561 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2562 uint8_t *dest[2] = {dest_cb, dest_cr};
2563 if(transform_bypass){
2564 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2565 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2566 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2568 idct_add = s->dsp.add_pixels4;
2569 for(i=16; i<16+8; i++){
2570 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2571 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2575 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2576 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2578 idct_add = s->dsp.h264_idct_add;
2579 idct_dc_add = s->dsp.h264_idct_dc_add;
2580 for(i=16; i<16+8; i++){
2581 if(h->non_zero_count_cache[ scan8[i] ])
2582 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2583 else if(h->mb[i*16])
2584 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2587 for(i=16; i<16+8; i++){
2588 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2589 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2590 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2597 if(h->deblocking_filter) {
2598 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2599 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2600 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2601 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2602 if (!simple && FRAME_MBAFF) {
2603 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2605 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2611 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2613 static void hl_decode_mb_simple(H264Context *h){
2614 hl_decode_mb_internal(h, 1);
2618 * Process a macroblock; this handles edge cases, such as interlacing.
2620 static void av_noinline hl_decode_mb_complex(H264Context *h){
2621 hl_decode_mb_internal(h, 0);
2624 static void hl_decode_mb(H264Context *h){
2625 MpegEncContext * const s = &h->s;
2626 const int mb_xy= h->mb_xy;
2627 const int mb_type= s->current_picture.mb_type[mb_xy];
2628 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2630 if(ENABLE_H264_ENCODER && !s->decode)
2634 hl_decode_mb_complex(h);
2635 else hl_decode_mb_simple(h);
2638 static void pic_as_field(Picture *pic, const int parity){
2640 for (i = 0; i < 4; ++i) {
2641 if (parity == PICT_BOTTOM_FIELD)
2642 pic->data[i] += pic->linesize[i];
2643 pic->reference = parity;
2644 pic->linesize[i] *= 2;
2646 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2649 static int split_field_copy(Picture *dest, Picture *src,
2650 int parity, int id_add){
2651 int match = !!(src->reference & parity);
2655 if(parity != PICT_FRAME){
2656 pic_as_field(dest, parity);
2658 dest->pic_id += id_add;
2665 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2669 while(i[0]<len || i[1]<len){
2670 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2672 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2675 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2676 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2679 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2680 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2687 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2692 best_poc= dir ? INT_MIN : INT_MAX;
2694 for(i=0; i<len; i++){
2695 const int poc= src[i]->poc;
2696 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2698 sorted[out_i]= src[i];
2701 if(best_poc == (dir ? INT_MIN : INT_MAX))
2703 limit= sorted[out_i++]->poc - dir;
2709 * fills the default_ref_list.
2711 static int fill_default_ref_list(H264Context *h){
2712 MpegEncContext * const s = &h->s;
2715 if(h->slice_type_nos==FF_B_TYPE){
2716 Picture *sorted[32];
2721 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2723 cur_poc= s->current_picture_ptr->poc;
2725 for(list= 0; list<2; list++){
2726 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2727 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2729 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2730 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2733 if(len < h->ref_count[list])
2734 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2738 if(lens[0] == lens[1] && lens[1] > 1){
2739 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2741 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2744 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2745 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2747 if(len < h->ref_count[0])
2748 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2751 for (i=0; i<h->ref_count[0]; i++) {
2752 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2754 if(h->slice_type_nos==FF_B_TYPE){
2755 for (i=0; i<h->ref_count[1]; i++) {
2756 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2763 static void print_short_term(H264Context *h);
2764 static void print_long_term(H264Context *h);
2767 * Extract structure information about the picture described by pic_num in
2768 * the current decoding context (frame or field). Note that pic_num is
2769 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2770 * @param pic_num picture number for which to extract structure information
2771 * @param structure one of PICT_XXX describing structure of picture
2773 * @return frame number (short term) or long term index of picture
2774 * described by pic_num
2776 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2777 MpegEncContext * const s = &h->s;
2779 *structure = s->picture_structure;
2782 /* opposite field */
2783 *structure ^= PICT_FRAME;
2790 static int decode_ref_pic_list_reordering(H264Context *h){
2791 MpegEncContext * const s = &h->s;
2792 int list, index, pic_structure;
2794 print_short_term(h);
2797 for(list=0; list<h->list_count; list++){
2798 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2800 if(get_bits1(&s->gb)){
2801 int pred= h->curr_pic_num;
2803 for(index=0; ; index++){
2804 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2805 unsigned int pic_id;
2807 Picture *ref = NULL;
2809 if(reordering_of_pic_nums_idc==3)
2812 if(index >= h->ref_count[list]){
2813 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2817 if(reordering_of_pic_nums_idc<3){
2818 if(reordering_of_pic_nums_idc<2){
2819 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2822 if(abs_diff_pic_num > h->max_pic_num){
2823 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2827 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2828 else pred+= abs_diff_pic_num;
2829 pred &= h->max_pic_num - 1;
2831 frame_num = pic_num_extract(h, pred, &pic_structure);
2833 for(i= h->short_ref_count-1; i>=0; i--){
2834 ref = h->short_ref[i];
2835 assert(ref->reference);
2836 assert(!ref->long_ref);
2838 ref->frame_num == frame_num &&
2839 (ref->reference & pic_structure)
2847 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2849 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2852 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2855 ref = h->long_ref[long_idx];
2856 assert(!(ref && !ref->reference));
2857 if(ref && (ref->reference & pic_structure)){
2858 ref->pic_id= pic_id;
2859 assert(ref->long_ref);
2867 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2868 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2870 for(i=index; i+1<h->ref_count[list]; i++){
2871 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2874 for(; i > index; i--){
2875 h->ref_list[list][i]= h->ref_list[list][i-1];
2877 h->ref_list[list][index]= *ref;
2879 pic_as_field(&h->ref_list[list][index], pic_structure);
2883 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2889 for(list=0; list<h->list_count; list++){
2890 for(index= 0; index < h->ref_count[list]; index++){
2891 if(!h->ref_list[list][index].data[0]){
2892 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2893 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2901 static void fill_mbaff_ref_list(H264Context *h){
2903 for(list=0; list<2; list++){ //FIXME try list_count
2904 for(i=0; i<h->ref_count[list]; i++){
2905 Picture *frame = &h->ref_list[list][i];
2906 Picture *field = &h->ref_list[list][16+2*i];
2909 field[0].linesize[j] <<= 1;
2910 field[0].reference = PICT_TOP_FIELD;
2911 field[0].poc= field[0].field_poc[0];
2912 field[1] = field[0];
2914 field[1].data[j] += frame->linesize[j];
2915 field[1].reference = PICT_BOTTOM_FIELD;
2916 field[1].poc= field[1].field_poc[1];
2918 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2919 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2921 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2922 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2926 for(j=0; j<h->ref_count[1]; j++){
2927 for(i=0; i<h->ref_count[0]; i++)
2928 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2929 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2930 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2934 static int pred_weight_table(H264Context *h){
2935 MpegEncContext * const s = &h->s;
2937 int luma_def, chroma_def;
2940 h->use_weight_chroma= 0;
2941 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2942 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2943 luma_def = 1<<h->luma_log2_weight_denom;
2944 chroma_def = 1<<h->chroma_log2_weight_denom;
2946 for(list=0; list<2; list++){
2947 for(i=0; i<h->ref_count[list]; i++){
2948 int luma_weight_flag, chroma_weight_flag;
2950 luma_weight_flag= get_bits1(&s->gb);
2951 if(luma_weight_flag){
2952 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2953 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2954 if( h->luma_weight[list][i] != luma_def
2955 || h->luma_offset[list][i] != 0)
2958 h->luma_weight[list][i]= luma_def;
2959 h->luma_offset[list][i]= 0;
2963 chroma_weight_flag= get_bits1(&s->gb);
2964 if(chroma_weight_flag){
2967 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2968 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2969 if( h->chroma_weight[list][i][j] != chroma_def
2970 || h->chroma_offset[list][i][j] != 0)
2971 h->use_weight_chroma= 1;
2976 h->chroma_weight[list][i][j]= chroma_def;
2977 h->chroma_offset[list][i][j]= 0;
2982 if(h->slice_type_nos != FF_B_TYPE) break;
2984 h->use_weight= h->use_weight || h->use_weight_chroma;
2988 static void implicit_weight_table(H264Context *h){
2989 MpegEncContext * const s = &h->s;
2991 int cur_poc = s->current_picture_ptr->poc;
2993 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2994 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2996 h->use_weight_chroma= 0;
3001 h->use_weight_chroma= 2;
3002 h->luma_log2_weight_denom= 5;
3003 h->chroma_log2_weight_denom= 5;
3005 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3006 int poc0 = h->ref_list[0][ref0].poc;
3007 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3008 int poc1 = h->ref_list[1][ref1].poc;
3009 int td = av_clip(poc1 - poc0, -128, 127);
3011 int tb = av_clip(cur_poc - poc0, -128, 127);
3012 int tx = (16384 + (FFABS(td) >> 1)) / td;
3013 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3014 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3015 h->implicit_weight[ref0][ref1] = 32;
3017 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3019 h->implicit_weight[ref0][ref1] = 32;
3025 * Mark a picture as no longer needed for reference. The refmask
3026 * argument allows unreferencing of individual fields or the whole frame.
3027 * If the picture becomes entirely unreferenced, but is being held for
3028 * display purposes, it is marked as such.
3029 * @param refmask mask of fields to unreference; the mask is bitwise
3030 * anded with the reference marking of pic
3031 * @return non-zero if pic becomes entirely unreferenced (except possibly
3032 * for display purposes) zero if one of the fields remains in
3035 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3037 if (pic->reference &= refmask) {
3040 for(i = 0; h->delayed_pic[i]; i++)
3041 if(pic == h->delayed_pic[i]){
3042 pic->reference=DELAYED_PIC_REF;
3050 * instantaneous decoder refresh.
3052 static void idr(H264Context *h){
3055 for(i=0; i<16; i++){
3056 remove_long(h, i, 0);
3058 assert(h->long_ref_count==0);
3060 for(i=0; i<h->short_ref_count; i++){
3061 unreference_pic(h, h->short_ref[i], 0);
3062 h->short_ref[i]= NULL;
3064 h->short_ref_count=0;
3065 h->prev_frame_num= 0;
3066 h->prev_frame_num_offset= 0;
3071 /* forget old pics after a seek */
3072 static void flush_dpb(AVCodecContext *avctx){
3073 H264Context *h= avctx->priv_data;
3075 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3076 if(h->delayed_pic[i])
3077 h->delayed_pic[i]->reference= 0;
3078 h->delayed_pic[i]= NULL;
3080 h->outputed_poc= INT_MIN;
3082 if(h->s.current_picture_ptr)
3083 h->s.current_picture_ptr->reference= 0;
3084 h->s.first_field= 0;
3085 ff_mpeg_flush(avctx);
3089 * Find a Picture in the short term reference list by frame number.
3090 * @param frame_num frame number to search for
3091 * @param idx the index into h->short_ref where returned picture is found
3092 * undefined if no picture found.
3093 * @return pointer to the found picture, or NULL if no pic with the provided
3094 * frame number is found
3096 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3097 MpegEncContext * const s = &h->s;
3100 for(i=0; i<h->short_ref_count; i++){
3101 Picture *pic= h->short_ref[i];
3102 if(s->avctx->debug&FF_DEBUG_MMCO)
3103 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3104 if(pic->frame_num == frame_num) {
3113 * Remove a picture from the short term reference list by its index in
3114 * that list. This does no checking on the provided index; it is assumed
3115 * to be valid. Other list entries are shifted down.
3116 * @param i index into h->short_ref of picture to remove.
3118 static void remove_short_at_index(H264Context *h, int i){
3119 assert(i >= 0 && i < h->short_ref_count);
3120 h->short_ref[i]= NULL;
3121 if (--h->short_ref_count)
3122 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3127 * @return the removed picture or NULL if an error occurs
3129 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3130 MpegEncContext * const s = &h->s;
3134 if(s->avctx->debug&FF_DEBUG_MMCO)
3135 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3137 pic = find_short(h, frame_num, &i);
3139 if(unreference_pic(h, pic, ref_mask))
3140 remove_short_at_index(h, i);
3147 * Remove a picture from the long term reference list by its index in
3149 * @return the removed picture or NULL if an error occurs
3151 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3154 pic= h->long_ref[i];
3156 if(unreference_pic(h, pic, ref_mask)){
3157 assert(h->long_ref[i]->long_ref == 1);
3158 h->long_ref[i]->long_ref= 0;
3159 h->long_ref[i]= NULL;
3160 h->long_ref_count--;
3168 * print short term list
3170 static void print_short_term(H264Context *h) {
3172 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3173 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3174 for(i=0; i<h->short_ref_count; i++){
3175 Picture *pic= h->short_ref[i];
3176 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3182 * print long term list
3184 static void print_long_term(H264Context *h) {
3186 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3187 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3188 for(i = 0; i < 16; i++){
3189 Picture *pic= h->long_ref[i];
3191 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3198 * Executes the reference picture marking (memory management control operations).
3200 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3201 MpegEncContext * const s = &h->s;
3203 int current_ref_assigned=0;
3206 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3207 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3209 for(i=0; i<mmco_count; i++){
3210 int structure, frame_num;
3211 if(s->avctx->debug&FF_DEBUG_MMCO)
3212 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3214 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3215 || mmco[i].opcode == MMCO_SHORT2LONG){
3216 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3217 pic = find_short(h, frame_num, &j);
3219 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3220 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3221 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3226 switch(mmco[i].opcode){
3227 case MMCO_SHORT2UNUSED:
3228 if(s->avctx->debug&FF_DEBUG_MMCO)
3229 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3230 remove_short(h, frame_num, structure ^ PICT_FRAME);
3232 case MMCO_SHORT2LONG:
3233 if (h->long_ref[mmco[i].long_arg] != pic)
3234 remove_long(h, mmco[i].long_arg, 0);
3236 remove_short_at_index(h, j);
3237 h->long_ref[ mmco[i].long_arg ]= pic;
3238 if (h->long_ref[ mmco[i].long_arg ]){
3239 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3240 h->long_ref_count++;
3243 case MMCO_LONG2UNUSED:
3244 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3245 pic = h->long_ref[j];
3247 remove_long(h, j, structure ^ PICT_FRAME);
3248 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3249 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3252 // Comment below left from previous code as it is an interresting note.
3253 /* First field in pair is in short term list or
3254 * at a different long term index.
3255 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3256 * Report the problem and keep the pair where it is,
3257 * and mark this field valid.
3260 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3261 remove_long(h, mmco[i].long_arg, 0);
3263 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3264 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3265 h->long_ref_count++;
3268 s->current_picture_ptr->reference |= s->picture_structure;
3269 current_ref_assigned=1;
3271 case MMCO_SET_MAX_LONG:
3272 assert(mmco[i].long_arg <= 16);
3273 // just remove the long term which index is greater than new max
3274 for(j = mmco[i].long_arg; j<16; j++){
3275 remove_long(h, j, 0);
3279 while(h->short_ref_count){
3280 remove_short(h, h->short_ref[0]->frame_num, 0);
3282 for(j = 0; j < 16; j++) {
3283 remove_long(h, j, 0);
3285 s->current_picture_ptr->poc=
3286 s->current_picture_ptr->field_poc[0]=
3287 s->current_picture_ptr->field_poc[1]=
3291 s->current_picture_ptr->frame_num= 0;
3297 if (!current_ref_assigned) {
3298 /* Second field of complementary field pair; the first field of
3299 * which is already referenced. If short referenced, it
3300 * should be first entry in short_ref. If not, it must exist
3301 * in long_ref; trying to put it on the short list here is an
3302 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3304 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3305 /* Just mark the second field valid */
3306 s->current_picture_ptr->reference = PICT_FRAME;
3307 } else if (s->current_picture_ptr->long_ref) {
3308 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3309 "assignment for second field "
3310 "in complementary field pair "
3311 "(first field is long term)\n");
3313 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3315 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3318 if(h->short_ref_count)
3319 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3321 h->short_ref[0]= s->current_picture_ptr;
3322 h->short_ref_count++;
3323 s->current_picture_ptr->reference |= s->picture_structure;
3327 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3329 /* We have too many reference frames, probably due to corrupted
3330 * stream. Need to discard one frame. Prevents overrun of the
3331 * short_ref and long_ref buffers.
3333 av_log(h->s.avctx, AV_LOG_ERROR,
3334 "number of reference frames exceeds max (probably "
3335 "corrupt input), discarding one\n");
3337 if (h->long_ref_count && !h->short_ref_count) {
3338 for (i = 0; i < 16; ++i)
3343 remove_long(h, i, 0);
3345 pic = h->short_ref[h->short_ref_count - 1];
3346 remove_short(h, pic->frame_num, 0);
3350 print_short_term(h);
3355 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3356 MpegEncContext * const s = &h->s;
3360 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3361 s->broken_link= get_bits1(gb) -1;
3363 h->mmco[0].opcode= MMCO_LONG;
3364 h->mmco[0].long_arg= 0;
3368 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3369 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3370 MMCOOpcode opcode= get_ue_golomb(gb);
3372 h->mmco[i].opcode= opcode;
3373 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3374 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3375 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3376 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3380 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3381 unsigned int long_arg= get_ue_golomb(gb);
3382 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3383 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3386 h->mmco[i].long_arg= long_arg;
3389 if(opcode > (unsigned)MMCO_LONG){
3390 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3393 if(opcode == MMCO_END)
3398 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3400 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3401 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3402 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3403 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3405 if (FIELD_PICTURE) {
3406 h->mmco[0].short_pic_num *= 2;
3407 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3408 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3418 static int init_poc(H264Context *h){
3419 MpegEncContext * const s = &h->s;
3420 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3422 Picture *cur = s->current_picture_ptr;
3424 h->frame_num_offset= h->prev_frame_num_offset;
3425 if(h->frame_num < h->prev_frame_num)
3426 h->frame_num_offset += max_frame_num;
3428 if(h->sps.poc_type==0){
3429 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3431 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3432 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3433 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3434 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3436 h->poc_msb = h->prev_poc_msb;
3437 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3439 field_poc[1] = h->poc_msb + h->poc_lsb;
3440 if(s->picture_structure == PICT_FRAME)
3441 field_poc[1] += h->delta_poc_bottom;
3442 }else if(h->sps.poc_type==1){
3443 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3446 if(h->sps.poc_cycle_length != 0)
3447 abs_frame_num = h->frame_num_offset + h->frame_num;
3451 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3454 expected_delta_per_poc_cycle = 0;
3455 for(i=0; i < h->sps.poc_cycle_length; i++)
3456 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3458 if(abs_frame_num > 0){
3459 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3460 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3462 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3463 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3464 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3468 if(h->nal_ref_idc == 0)
3469 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3471 field_poc[0] = expectedpoc + h->delta_poc[0];
3472 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3474 if(s->picture_structure == PICT_FRAME)
3475 field_poc[1] += h->delta_poc[1];
3477 int poc= 2*(h->frame_num_offset + h->frame_num);
3486 if(s->picture_structure != PICT_BOTTOM_FIELD)
3487 s->current_picture_ptr->field_poc[0]= field_poc[0];
3488 if(s->picture_structure != PICT_TOP_FIELD)
3489 s->current_picture_ptr->field_poc[1]= field_poc[1];
3490 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3497 * initialize scan tables
3499 static void init_scan_tables(H264Context *h){
3500 MpegEncContext * const s = &h->s;
3502 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3503 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3504 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3506 for(i=0; i<16; i++){
3507 #define T(x) (x>>2) | ((x<<2) & 0xF)
3508 h->zigzag_scan[i] = T(zigzag_scan[i]);
3509 h-> field_scan[i] = T( field_scan[i]);
3513 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3514 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3515 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3516 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3517 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3519 for(i=0; i<64; i++){
3520 #define T(x) (x>>3) | ((x&7)<<3)
3521 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3522 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3523 h->field_scan8x8[i] = T(field_scan8x8[i]);
3524 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3528 if(h->sps.transform_bypass){ //FIXME same ugly
3529 h->zigzag_scan_q0 = zigzag_scan;
3530 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3531 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3532 h->field_scan_q0 = field_scan;
3533 h->field_scan8x8_q0 = field_scan8x8;
3534 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3536 h->zigzag_scan_q0 = h->zigzag_scan;
3537 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3538 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3539 h->field_scan_q0 = h->field_scan;
3540 h->field_scan8x8_q0 = h->field_scan8x8;
3541 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3546 * Replicates H264 "master" context to thread contexts.
3548 static void clone_slice(H264Context *dst, H264Context *src)
3550 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3551 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3552 dst->s.current_picture = src->s.current_picture;
3553 dst->s.linesize = src->s.linesize;
3554 dst->s.uvlinesize = src->s.uvlinesize;
3555 dst->s.first_field = src->s.first_field;
3557 dst->prev_poc_msb = src->prev_poc_msb;
3558 dst->prev_poc_lsb = src->prev_poc_lsb;
3559 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3560 dst->prev_frame_num = src->prev_frame_num;
3561 dst->short_ref_count = src->short_ref_count;
3563 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3564 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3565 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3566 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3568 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3569 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3573 * decodes a slice header.
3574 * This will also call MPV_common_init() and frame_start() as needed.
3576 * @param h h264context
3577 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3579 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3581 static int decode_slice_header(H264Context *h, H264Context *h0){
3582 MpegEncContext * const s = &h->s;
3583 MpegEncContext * const s0 = &h0->s;
3584 unsigned int first_mb_in_slice;
3585 unsigned int pps_id;
3586 int num_ref_idx_active_override_flag;
3587 unsigned int slice_type, tmp, i, j;
3588 int default_ref_list_done = 0;
3589 int last_pic_structure;
3591 s->dropable= h->nal_ref_idc == 0;
3593 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3594 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3595 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3597 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3598 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3601 first_mb_in_slice= get_ue_golomb(&s->gb);
3603 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3604 h0->current_slice = 0;
3605 if (!s0->first_field)
3606 s->current_picture_ptr= NULL;
3609 slice_type= get_ue_golomb(&s->gb);
3611 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3616 h->slice_type_fixed=1;
3618 h->slice_type_fixed=0;
3620 slice_type= golomb_to_pict_type[ slice_type ];
3621 if (slice_type == FF_I_TYPE
3622 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3623 default_ref_list_done = 1;
3625 h->slice_type= slice_type;
3626 h->slice_type_nos= slice_type & 3;
3628 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3629 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3630 av_log(h->s.avctx, AV_LOG_ERROR,
3631 "B picture before any references, skipping\n");
3635 pps_id= get_ue_golomb(&s->gb);
3636 if(pps_id>=MAX_PPS_COUNT){
3637 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3640 if(!h0->pps_buffers[pps_id]) {
3641 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3644 h->pps= *h0->pps_buffers[pps_id];
3646 if(!h0->sps_buffers[h->pps.sps_id]) {
3647 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3650 h->sps = *h0->sps_buffers[h->pps.sps_id];
3652 if(h == h0 && h->dequant_coeff_pps != pps_id){
3653 h->dequant_coeff_pps = pps_id;
3654 init_dequant_tables(h);
3657 s->mb_width= h->sps.mb_width;
3658 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3660 h->b_stride= s->mb_width*4;
3661 h->b8_stride= s->mb_width*2;
3663 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3664 if(h->sps.frame_mbs_only_flag)
3665 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3667 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3669 if (s->context_initialized
3670 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3672 return -1; // width / height changed during parallelized decoding
3674 flush_dpb(s->avctx);
3677 if (!s->context_initialized) {
3679 return -1; // we cant (re-)initialize context during parallel decoding
3680 if (MPV_common_init(s) < 0)
3684 init_scan_tables(h);
3687 for(i = 1; i < s->avctx->thread_count; i++) {
3689 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3690 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3691 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3694 init_scan_tables(c);
3698 for(i = 0; i < s->avctx->thread_count; i++)
3699 if(context_init(h->thread_context[i]) < 0)
3702 s->avctx->width = s->width;
3703 s->avctx->height = s->height;
3704 s->avctx->sample_aspect_ratio= h->sps.sar;
3705 if(!s->avctx->sample_aspect_ratio.den)
3706 s->avctx->sample_aspect_ratio.den = 1;
3708 if(h->sps.timing_info_present_flag){
3709 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3710 if(h->x264_build > 0 && h->x264_build < 44)
3711 s->avctx->time_base.den *= 2;
3712 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3713 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3717 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3720 h->mb_aff_frame = 0;
3721 last_pic_structure = s0->picture_structure;
3722 if(h->sps.frame_mbs_only_flag){
3723 s->picture_structure= PICT_FRAME;
3725 if(get_bits1(&s->gb)) { //field_pic_flag
3726 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3728 s->picture_structure= PICT_FRAME;
3729 h->mb_aff_frame = h->sps.mb_aff;
3732 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3734 if(h0->current_slice == 0){
3735 while(h->frame_num != h->prev_frame_num &&
3736 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3737 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3739 h->prev_frame_num++;
3740 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3741 s->current_picture_ptr->frame_num= h->prev_frame_num;
3742 execute_ref_pic_marking(h, NULL, 0);
3745 /* See if we have a decoded first field looking for a pair... */
3746 if (s0->first_field) {
3747 assert(s0->current_picture_ptr);
3748 assert(s0->current_picture_ptr->data[0]);
3749 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3751 /* figure out if we have a complementary field pair */
3752 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3754 * Previous field is unmatched. Don't display it, but let it
3755 * remain for reference if marked as such.
3757 s0->current_picture_ptr = NULL;
3758 s0->first_field = FIELD_PICTURE;
3761 if (h->nal_ref_idc &&
3762 s0->current_picture_ptr->reference &&
3763 s0->current_picture_ptr->frame_num != h->frame_num) {
3765 * This and previous field were reference, but had
3766 * different frame_nums. Consider this field first in
3767 * pair. Throw away previous field except for reference
3770 s0->first_field = 1;
3771 s0->current_picture_ptr = NULL;
3774 /* Second field in complementary pair */
3775 s0->first_field = 0;
3780 /* Frame or first field in a potentially complementary pair */
3781 assert(!s0->current_picture_ptr);
3782 s0->first_field = FIELD_PICTURE;
3785 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3786 s0->first_field = 0;
3793 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3795 assert(s->mb_num == s->mb_width * s->mb_height);
3796 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3797 first_mb_in_slice >= s->mb_num){
3798 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3801 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3802 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3803 if (s->picture_structure == PICT_BOTTOM_FIELD)
3804 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3805 assert(s->mb_y < s->mb_height);
3807 if(s->picture_structure==PICT_FRAME){
3808 h->curr_pic_num= h->frame_num;
3809 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3811 h->curr_pic_num= 2*h->frame_num + 1;
3812 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3815 if(h->nal_unit_type == NAL_IDR_SLICE){
3816 get_ue_golomb(&s->gb); /* idr_pic_id */
3819 if(h->sps.poc_type==0){
3820 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3822 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3823 h->delta_poc_bottom= get_se_golomb(&s->gb);
3827 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3828 h->delta_poc[0]= get_se_golomb(&s->gb);
3830 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3831 h->delta_poc[1]= get_se_golomb(&s->gb);
3836 if(h->pps.redundant_pic_cnt_present){
3837 h->redundant_pic_count= get_ue_golomb(&s->gb);
3840 //set defaults, might be overridden a few lines later
3841 h->ref_count[0]= h->pps.ref_count[0];
3842 h->ref_count[1]= h->pps.ref_count[1];
3844 if(h->slice_type_nos != FF_I_TYPE){
3845 if(h->slice_type_nos == FF_B_TYPE){
3846 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3848 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3850 if(num_ref_idx_active_override_flag){
3851 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3852 if(h->slice_type_nos==FF_B_TYPE)
3853 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3855 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3856 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3857 h->ref_count[0]= h->ref_count[1]= 1;
3861 if(h->slice_type_nos == FF_B_TYPE)
3868 if(!default_ref_list_done){
3869 fill_default_ref_list(h);
3872 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3875 if(h->slice_type_nos!=FF_I_TYPE){
3876 s->last_picture_ptr= &h->ref_list[0][0];
3877 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3879 if(h->slice_type_nos==FF_B_TYPE){
3880 s->next_picture_ptr= &h->ref_list[1][0];
3881 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3884 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3885 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3886 pred_weight_table(h);
3887 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3888 implicit_weight_table(h);
3893 decode_ref_pic_marking(h0, &s->gb);
3896 fill_mbaff_ref_list(h);
3898 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3899 direct_dist_scale_factor(h);
3900 direct_ref_list_init(h);
3902 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3903 tmp = get_ue_golomb(&s->gb);
3905 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3908 h->cabac_init_idc= tmp;
3911 h->last_qscale_diff = 0;
3912 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3914 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3918 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3919 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3920 //FIXME qscale / qp ... stuff
3921 if(h->slice_type == FF_SP_TYPE){
3922 get_bits1(&s->gb); /* sp_for_switch_flag */
3924 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3925 get_se_golomb(&s->gb); /* slice_qs_delta */
3928 h->deblocking_filter = 1;
3929 h->slice_alpha_c0_offset = 0;
3930 h->slice_beta_offset = 0;
3931 if( h->pps.deblocking_filter_parameters_present ) {
3932 tmp= get_ue_golomb(&s->gb);
3934 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3937 h->deblocking_filter= tmp;
3938 if(h->deblocking_filter < 2)
3939 h->deblocking_filter^= 1; // 1<->0
3941 if( h->deblocking_filter ) {
3942 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3943 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3947 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3948 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3949 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3950 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3951 h->deblocking_filter= 0;
3953 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3954 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3955 /* Cheat slightly for speed:
3956 Do not bother to deblock across slices. */
3957 h->deblocking_filter = 2;
3959 h0->max_contexts = 1;
3960 if(!h0->single_decode_warning) {
3961 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3962 h0->single_decode_warning = 1;
3965 return 1; // deblocking switched inside frame
3970 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3971 slice_group_change_cycle= get_bits(&s->gb, ?);
3974 h0->last_slice_type = slice_type;
3975 h->slice_num = ++h0->current_slice;
3976 if(h->slice_num >= MAX_SLICES){
3977 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3981 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3985 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3986 +(h->ref_list[j][i].reference&3);
3989 for(i=16; i<48; i++)
3990 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3991 +(h->ref_list[j][i].reference&3);
3994 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3995 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3997 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3998 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4000 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4002 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4003 pps_id, h->frame_num,
4004 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4005 h->ref_count[0], h->ref_count[1],
4007 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4009 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4010 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4020 static inline int get_level_prefix(GetBitContext *gb){
4024 OPEN_READER(re, gb);
4025 UPDATE_CACHE(re, gb);
4026 buf=GET_CACHE(re, gb);
4028 log= 32 - av_log2(buf);
4030 print_bin(buf>>(32-log), log);
4031 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4034 LAST_SKIP_BITS(re, gb, log);
4035 CLOSE_READER(re, gb);
4040 static inline int get_dct8x8_allowed(H264Context *h){
4043 if(!IS_SUB_8X8(h->sub_mb_type[i])
4044 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4051 * decodes a residual block.
4052 * @param n block index
4053 * @param scantable scantable
4054 * @param max_coeff number of coefficients in the block
4055 * @return <0 if an error occurred
4057 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4058 MpegEncContext * const s = &h->s;
4059 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4061 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4063 //FIXME put trailing_onex into the context
4065 if(n == CHROMA_DC_BLOCK_INDEX){
4066 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4067 total_coeff= coeff_token>>2;
4069 if(n == LUMA_DC_BLOCK_INDEX){
4070 total_coeff= pred_non_zero_count(h, 0);
4071 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4072 total_coeff= coeff_token>>2;
4074 total_coeff= pred_non_zero_count(h, n);
4075 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4076 total_coeff= coeff_token>>2;
4077 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4081 //FIXME set last_non_zero?
4085 if(total_coeff > (unsigned)max_coeff) {
4086 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4090 trailing_ones= coeff_token&3;
4091 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4092 assert(total_coeff<=16);
4094 i = show_bits(gb, 3);
4095 skip_bits(gb, trailing_ones);
4096 level[0] = 1-((i&4)>>1);
4097 level[1] = 1-((i&2) );
4098 level[2] = 1-((i&1)<<1);
4100 if(trailing_ones<total_coeff) {
4101 int level_code, mask;
4102 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4103 int prefix= get_level_prefix(gb);
4105 //first coefficient has suffix_length equal to 0 or 1
4106 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4108 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4110 level_code= (prefix<<suffix_length); //part
4111 }else if(prefix==14){
4113 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4115 level_code= prefix + get_bits(gb, 4); //part
4117 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4118 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4120 level_code += (1<<(prefix-3))-4096;
4123 if(trailing_ones < 3) level_code += 2;
4128 mask= -(level_code&1);
4129 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4131 //remaining coefficients have suffix_length > 0
4132 for(i=trailing_ones+1;i<total_coeff;i++) {
4133 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4134 prefix = get_level_prefix(gb);
4136 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4138 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4140 level_code += (1<<(prefix-3))-4096;
4142 mask= -(level_code&1);
4143 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4144 if(level_code > suffix_limit[suffix_length])
4149 if(total_coeff == max_coeff)
4152 if(n == CHROMA_DC_BLOCK_INDEX)
4153 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4155 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4158 coeff_num = zeros_left + total_coeff - 1;
4159 j = scantable[coeff_num];
4161 block[j] = level[0];
4162 for(i=1;i<total_coeff;i++) {
4165 else if(zeros_left < 7){
4166 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4168 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4170 zeros_left -= run_before;
4171 coeff_num -= 1 + run_before;
4172 j= scantable[ coeff_num ];
4177 block[j] = (level[0] * qmul[j] + 32)>>6;
4178 for(i=1;i<total_coeff;i++) {
4181 else if(zeros_left < 7){
4182 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4184 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4186 zeros_left -= run_before;
4187 coeff_num -= 1 + run_before;
4188 j= scantable[ coeff_num ];
4190 block[j]= (level[i] * qmul[j] + 32)>>6;
4195 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4202 static void predict_field_decoding_flag(H264Context *h){
4203 MpegEncContext * const s = &h->s;
4204 const int mb_xy= h->mb_xy;
4205 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4206 ? s->current_picture.mb_type[mb_xy-1]
4207 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4208 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4210 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4214 * decodes a P_SKIP or B_SKIP macroblock
4216 static void decode_mb_skip(H264Context *h){
4217 MpegEncContext * const s = &h->s;
4218 const int mb_xy= h->mb_xy;
4221 memset(h->non_zero_count[mb_xy], 0, 16);
4222 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4225 mb_type|= MB_TYPE_INTERLACED;
4227 if( h->slice_type_nos == FF_B_TYPE )
4229 // just for fill_caches. pred_direct_motion will set the real mb_type
4230 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4232 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4233 pred_direct_motion(h, &mb_type);
4234 mb_type|= MB_TYPE_SKIP;
4239 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4241 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4242 pred_pskip_motion(h, &mx, &my);
4243 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4244 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4247 write_back_motion(h, mb_type);
4248 s->current_picture.mb_type[mb_xy]= mb_type;
4249 s->current_picture.qscale_table[mb_xy]= s->qscale;
4250 h->slice_table[ mb_xy ]= h->slice_num;
4251 h->prev_mb_skipped= 1;
4255 * decodes a macroblock
4256 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4258 static int decode_mb_cavlc(H264Context *h){
4259 MpegEncContext * const s = &h->s;
4261 int partition_count;
4262 unsigned int mb_type, cbp;
4263 int dct8x8_allowed= h->pps.transform_8x8_mode;
4265 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4267 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4269 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4270 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4272 if(h->slice_type_nos != FF_I_TYPE){
4273 if(s->mb_skip_run==-1)
4274 s->mb_skip_run= get_ue_golomb(&s->gb);
4276 if (s->mb_skip_run--) {
4277 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4278 if(s->mb_skip_run==0)
4279 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4281 predict_field_decoding_flag(h);
4288 if( (s->mb_y&1) == 0 )
4289 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4292 h->prev_mb_skipped= 0;
4294 mb_type= get_ue_golomb(&s->gb);
4295 if(h->slice_type_nos == FF_B_TYPE){
4297 partition_count= b_mb_type_info[mb_type].partition_count;
4298 mb_type= b_mb_type_info[mb_type].type;
4301 goto decode_intra_mb;
4303 }else if(h->slice_type_nos == FF_P_TYPE){
4305 partition_count= p_mb_type_info[mb_type].partition_count;
4306 mb_type= p_mb_type_info[mb_type].type;
4309 goto decode_intra_mb;
4312 assert(h->slice_type_nos == FF_I_TYPE);
4313 if(h->slice_type == FF_SI_TYPE && mb_type)
4317 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4321 cbp= i_mb_type_info[mb_type].cbp;
4322 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4323 mb_type= i_mb_type_info[mb_type].type;
4327 mb_type |= MB_TYPE_INTERLACED;
4329 h->slice_table[ mb_xy ]= h->slice_num;
4331 if(IS_INTRA_PCM(mb_type)){
4334 // We assume these blocks are very rare so we do not optimize it.
4335 align_get_bits(&s->gb);
4337 // The pixels are stored in the same order as levels in h->mb array.
4338 for(x=0; x < (CHROMA ? 384 : 256); x++){
4339 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4342 // In deblocking, the quantizer is 0
4343 s->current_picture.qscale_table[mb_xy]= 0;
4344 // All coeffs are present
4345 memset(h->non_zero_count[mb_xy], 16, 16);
4347 s->current_picture.mb_type[mb_xy]= mb_type;
4352 h->ref_count[0] <<= 1;
4353 h->ref_count[1] <<= 1;
4356 fill_caches(h, mb_type, 0);
4359 if(IS_INTRA(mb_type)){
4361 // init_top_left_availability(h);
4362 if(IS_INTRA4x4(mb_type)){
4365 if(dct8x8_allowed && get_bits1(&s->gb)){
4366 mb_type |= MB_TYPE_8x8DCT;
4370 // fill_intra4x4_pred_table(h);
4371 for(i=0; i<16; i+=di){
4372 int mode= pred_intra_mode(h, i);
4374 if(!get_bits1(&s->gb)){
4375 const int rem_mode= get_bits(&s->gb, 3);
4376 mode = rem_mode + (rem_mode >= mode);
4380 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4382 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4384 write_back_intra_pred_mode(h);
4385 if( check_intra4x4_pred_mode(h) < 0)
4388 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4389 if(h->intra16x16_pred_mode < 0)
4393 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4396 h->chroma_pred_mode= pred_mode;
4398 }else if(partition_count==4){
4399 int i, j, sub_partition_count[4], list, ref[2][4];
4401 if(h->slice_type_nos == FF_B_TYPE){
4403 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4404 if(h->sub_mb_type[i] >=13){
4405 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4408 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4409 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4411 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4412 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4413 pred_direct_motion(h, &mb_type);
4414 h->ref_cache[0][scan8[4]] =
4415 h->ref_cache[1][scan8[4]] =
4416 h->ref_cache[0][scan8[12]] =
4417 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4420 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4422 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4423 if(h->sub_mb_type[i] >=4){
4424 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4427 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4428 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4432 for(list=0; list<h->list_count; list++){
4433 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4435 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4436 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4437 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4439 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4451 dct8x8_allowed = get_dct8x8_allowed(h);
4453 for(list=0; list<h->list_count; list++){
4455 if(IS_DIRECT(h->sub_mb_type[i])) {
4456 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4459 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4460 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4462 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4463 const int sub_mb_type= h->sub_mb_type[i];
4464 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4465 for(j=0; j<sub_partition_count[i]; j++){
4467 const int index= 4*i + block_width*j;
4468 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4469 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4470 mx += get_se_golomb(&s->gb);
4471 my += get_se_golomb(&s->gb);
4472 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4474 if(IS_SUB_8X8(sub_mb_type)){
4476 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4478 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4479 }else if(IS_SUB_8X4(sub_mb_type)){
4480 mv_cache[ 1 ][0]= mx;
4481 mv_cache[ 1 ][1]= my;
4482 }else if(IS_SUB_4X8(sub_mb_type)){
4483 mv_cache[ 8 ][0]= mx;
4484 mv_cache[ 8 ][1]= my;
4486 mv_cache[ 0 ][0]= mx;
4487 mv_cache[ 0 ][1]= my;
4490 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4496 }else if(IS_DIRECT(mb_type)){
4497 pred_direct_motion(h, &mb_type);
4498 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4500 int list, mx, my, i;
4501 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4502 if(IS_16X16(mb_type)){
4503 for(list=0; list<h->list_count; list++){
4505 if(IS_DIR(mb_type, 0, list)){
4506 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4507 if(val >= h->ref_count[list]){
4508 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4512 val= LIST_NOT_USED&0xFF;
4513 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4515 for(list=0; list<h->list_count; list++){
4517 if(IS_DIR(mb_type, 0, list)){
4518 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4519 mx += get_se_golomb(&s->gb);
4520 my += get_se_golomb(&s->gb);
4521 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4523 val= pack16to32(mx,my);
4526 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4529 else if(IS_16X8(mb_type)){
4530 for(list=0; list<h->list_count; list++){
4533 if(IS_DIR(mb_type, i, list)){
4534 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4535 if(val >= h->ref_count[list]){
4536 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4540 val= LIST_NOT_USED&0xFF;
4541 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4544 for(list=0; list<h->list_count; list++){
4547 if(IS_DIR(mb_type, i, list)){
4548 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4549 mx += get_se_golomb(&s->gb);
4550 my += get_se_golomb(&s->gb);
4551 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4553 val= pack16to32(mx,my);
4556 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4560 assert(IS_8X16(mb_type));
4561 for(list=0; list<h->list_count; list++){
4564 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4565 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4566 if(val >= h->ref_count[list]){
4567 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4571 val= LIST_NOT_USED&0xFF;
4572 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4575 for(list=0; list<h->list_count; list++){
4578 if(IS_DIR(mb_type, i, list)){
4579 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4580 mx += get_se_golomb(&s->gb);
4581 my += get_se_golomb(&s->gb);
4582 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4584 val= pack16to32(mx,my);
4587 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4593 if(IS_INTER(mb_type))
4594 write_back_motion(h, mb_type);
4596 if(!IS_INTRA16x16(mb_type)){
4597 cbp= get_ue_golomb(&s->gb);
4599 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4604 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4605 else cbp= golomb_to_inter_cbp [cbp];
4607 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4608 else cbp= golomb_to_inter_cbp_gray[cbp];
4613 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4614 if(get_bits1(&s->gb)){
4615 mb_type |= MB_TYPE_8x8DCT;
4616 h->cbp_table[mb_xy]= cbp;
4619 s->current_picture.mb_type[mb_xy]= mb_type;
4621 if(cbp || IS_INTRA16x16(mb_type)){
4622 int i8x8, i4x4, chroma_idx;
4624 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4625 const uint8_t *scan, *scan8x8, *dc_scan;
4627 // fill_non_zero_count_cache(h);
4629 if(IS_INTERLACED(mb_type)){
4630 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4631 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4632 dc_scan= luma_dc_field_scan;
4634 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4635 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4636 dc_scan= luma_dc_zigzag_scan;
4639 dquant= get_se_golomb(&s->gb);
4641 if( dquant > 25 || dquant < -26 ){
4642 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4646 s->qscale += dquant;
4647 if(((unsigned)s->qscale) > 51){
4648 if(s->qscale<0) s->qscale+= 52;
4649 else s->qscale-= 52;
4652 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4653 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4654 if(IS_INTRA16x16(mb_type)){
4655 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4656 return -1; //FIXME continue if partitioned and other return -1 too
4659 assert((cbp&15) == 0 || (cbp&15) == 15);
4662 for(i8x8=0; i8x8<4; i8x8++){
4663 for(i4x4=0; i4x4<4; i4x4++){
4664 const int index= i4x4 + 4*i8x8;
4665 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4671 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4674 for(i8x8=0; i8x8<4; i8x8++){
4675 if(cbp & (1<<i8x8)){
4676 if(IS_8x8DCT(mb_type)){
4677 DCTELEM *buf = &h->mb[64*i8x8];
4679 for(i4x4=0; i4x4<4; i4x4++){
4680 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4681 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4684 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4685 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4687 for(i4x4=0; i4x4<4; i4x4++){
4688 const int index= i4x4 + 4*i8x8;
4690 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4696 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4697 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4703 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4704 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4710 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4711 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4712 for(i4x4=0; i4x4<4; i4x4++){
4713 const int index= 16 + 4*chroma_idx + i4x4;
4714 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4720 uint8_t * const nnz= &h->non_zero_count_cache[0];
4721 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4722 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4725 uint8_t * const nnz= &h->non_zero_count_cache[0];
4726 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4727 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4728 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4730 s->current_picture.qscale_table[mb_xy]= s->qscale;
4731 write_back_non_zero_count(h);
4734 h->ref_count[0] >>= 1;
4735 h->ref_count[1] >>= 1;
4741 static int decode_cabac_field_decoding_flag(H264Context *h) {
4742 MpegEncContext * const s = &h->s;
4743 const int mb_x = s->mb_x;
4744 const int mb_y = s->mb_y & ~1;
4745 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4746 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4748 unsigned int ctx = 0;
4750 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4753 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4757 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4760 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4761 uint8_t *state= &h->cabac_state[ctx_base];
4765 MpegEncContext * const s = &h->s;
4766 const int mba_xy = h->left_mb_xy[0];
4767 const int mbb_xy = h->top_mb_xy;
4769 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4771 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4773 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4774 return 0; /* I4x4 */
4777 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4778 return 0; /* I4x4 */
4781 if( get_cabac_terminate( &h->cabac ) )
4782 return 25; /* PCM */
4784 mb_type = 1; /* I16x16 */
4785 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4786 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4787 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4788 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4789 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4793 static int decode_cabac_mb_type( H264Context *h ) {
4794 MpegEncContext * const s = &h->s;
4796 if( h->slice_type_nos == FF_I_TYPE ) {
4797 return decode_cabac_intra_mb_type(h, 3, 1);
4798 } else if( h->slice_type_nos == FF_P_TYPE ) {
4799 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4801 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4802 /* P_L0_D16x16, P_8x8 */
4803 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4805 /* P_L0_D8x16, P_L0_D16x8 */
4806 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4809 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4812 const int mba_xy = h->left_mb_xy[0];
4813 const int mbb_xy = h->top_mb_xy;
4816 assert(h->slice_type_nos == FF_B_TYPE);
4818 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4820 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4823 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4824 return 0; /* B_Direct_16x16 */
4826 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4827 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4830 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4831 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4832 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4833 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4835 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4836 else if( bits == 13 ) {
4837 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4838 } else if( bits == 14 )
4839 return 11; /* B_L1_L0_8x16 */
4840 else if( bits == 15 )
4841 return 22; /* B_8x8 */
4843 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4844 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4848 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4849 MpegEncContext * const s = &h->s;
4853 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4854 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4857 && h->slice_table[mba_xy] == h->slice_num
4858 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4859 mba_xy += s->mb_stride;
4861 mbb_xy = mb_xy - s->mb_stride;
4863 && h->slice_table[mbb_xy] == h->slice_num
4864 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4865 mbb_xy -= s->mb_stride;
4867 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4869 int mb_xy = h->mb_xy;
4871 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4874 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4876 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4879 if( h->slice_type_nos == FF_B_TYPE )
4881 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4884 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4887 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4890 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4891 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4892 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4894 if( mode >= pred_mode )
4900 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4901 const int mba_xy = h->left_mb_xy[0];
4902 const int mbb_xy = h->top_mb_xy;
4906 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4907 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4910 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4913 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4916 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4918 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4924 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4925 int cbp_b, cbp_a, ctx, cbp = 0;
4927 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4928 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4930 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4931 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4932 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4933 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4934 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4935 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4936 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4937 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4940 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4944 cbp_a = (h->left_cbp>>4)&0x03;
4945 cbp_b = (h-> top_cbp>>4)&0x03;
4948 if( cbp_a > 0 ) ctx++;
4949 if( cbp_b > 0 ) ctx += 2;
4950 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4954 if( cbp_a == 2 ) ctx++;
4955 if( cbp_b == 2 ) ctx += 2;
4956 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4958 static int decode_cabac_mb_dqp( H264Context *h) {
4962 if( h->last_qscale_diff != 0 )
4965 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4971 if(val > 102) //prevent infinite loop
4978 return -(val + 1)/2;
4980 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4981 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4983 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4985 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4989 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4991 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4992 return 0; /* B_Direct_8x8 */
4993 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4994 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4996 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4997 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4998 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5001 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5002 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5006 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5007 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5010 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5011 int refa = h->ref_cache[list][scan8[n] - 1];
5012 int refb = h->ref_cache[list][scan8[n] - 8];
5016 if( h->slice_type_nos == FF_B_TYPE) {
5017 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5019 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5028 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5034 if(ref >= 32 /*h->ref_list[list]*/){
5041 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5042 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5043 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5044 int ctxbase = (l == 0) ? 40 : 47;
5049 else if( amvd > 32 )
5054 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5059 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5067 while( get_cabac_bypass( &h->cabac ) ) {
5071 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5076 if( get_cabac_bypass( &h->cabac ) )
5080 return get_cabac_bypass_sign( &h->cabac, -mvd );
5083 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5089 nza = h->left_cbp&0x100;
5090 nzb = h-> top_cbp&0x100;
5092 nza = (h->left_cbp>>(6+idx))&0x01;
5093 nzb = (h-> top_cbp>>(6+idx))&0x01;
5097 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5098 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5100 assert(cat == 1 || cat == 2);
5101 nza = h->non_zero_count_cache[scan8[idx] - 1];
5102 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5112 return ctx + 4 * cat;
5115 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5116 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5118 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5119 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5122 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5123 static const int significant_coeff_flag_offset[2][6] = {
5124 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5125 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5127 static const int last_coeff_flag_offset[2][6] = {
5128 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5129 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5131 static const int coeff_abs_level_m1_offset[6] = {
5132 227+0, 227+10, 227+20, 227+30, 227+39, 426
5134 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5135 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5136 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5137 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5138 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5139 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5140 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5141 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5142 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5144 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5145 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5146 * map node ctx => cabac ctx for level=1 */
5147 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5148 /* map node ctx => cabac ctx for level>1 */
5149 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5150 static const uint8_t coeff_abs_level_transition[2][8] = {
5151 /* update node ctx after decoding a level=1 */
5152 { 1, 2, 3, 3, 4, 5, 6, 7 },
5153 /* update node ctx after decoding a level>1 */
5154 { 4, 4, 4, 4, 5, 6, 7, 7 }
5160 int coeff_count = 0;
5163 uint8_t *significant_coeff_ctx_base;
5164 uint8_t *last_coeff_ctx_base;
5165 uint8_t *abs_level_m1_ctx_base;
5168 #define CABAC_ON_STACK
5170 #ifdef CABAC_ON_STACK
5173 cc.range = h->cabac.range;
5174 cc.low = h->cabac.low;
5175 cc.bytestream= h->cabac.bytestream;
5177 #define CC &h->cabac
5181 /* cat: 0-> DC 16x16 n = 0
5182 * 1-> AC 16x16 n = luma4x4idx
5183 * 2-> Luma4x4 n = luma4x4idx
5184 * 3-> DC Chroma n = iCbCr
5185 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5186 * 5-> Luma8x8 n = 4 * luma8x8idx
5189 /* read coded block flag */
5190 if( is_dc || cat != 5 ) {
5191 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5194 h->non_zero_count_cache[scan8[16+n]] = 0;
5196 h->non_zero_count_cache[scan8[n]] = 0;
5199 #ifdef CABAC_ON_STACK
5200 h->cabac.range = cc.range ;
5201 h->cabac.low = cc.low ;
5202 h->cabac.bytestream= cc.bytestream;
5208 significant_coeff_ctx_base = h->cabac_state
5209 + significant_coeff_flag_offset[MB_FIELD][cat];
5210 last_coeff_ctx_base = h->cabac_state
5211 + last_coeff_flag_offset[MB_FIELD][cat];
5212 abs_level_m1_ctx_base = h->cabac_state
5213 + coeff_abs_level_m1_offset[cat];
5215 if( !is_dc && cat == 5 ) {
5216 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5217 for(last= 0; last < coefs; last++) { \
5218 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5219 if( get_cabac( CC, sig_ctx )) { \
5220 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5221 index[coeff_count++] = last; \
5222 if( get_cabac( CC, last_ctx ) ) { \
5228 if( last == max_coeff -1 ) {\
5229 index[coeff_count++] = last;\
5231 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5232 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5233 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5235 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5237 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5239 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5242 assert(coeff_count > 0);
5246 h->cbp_table[h->mb_xy] |= 0x100;
5248 h->cbp_table[h->mb_xy] |= 0x40 << n;
5251 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5253 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5255 assert( cat == 1 || cat == 2 );
5256 h->non_zero_count_cache[scan8[n]] = coeff_count;
5261 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5263 int j= scantable[index[--coeff_count]];
5265 if( get_cabac( CC, ctx ) == 0 ) {
5266 node_ctx = coeff_abs_level_transition[0][node_ctx];
5268 block[j] = get_cabac_bypass_sign( CC, -1);
5270 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5274 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5275 node_ctx = coeff_abs_level_transition[1][node_ctx];
5277 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5281 if( coeff_abs >= 15 ) {
5283 while( get_cabac_bypass( CC ) ) {
5289 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5295 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5297 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5300 } while( coeff_count );
5301 #ifdef CABAC_ON_STACK
5302 h->cabac.range = cc.range ;
5303 h->cabac.low = cc.low ;
5304 h->cabac.bytestream= cc.bytestream;
5309 #ifndef CONFIG_SMALL
5310 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5311 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5314 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5315 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5319 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5321 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5323 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5324 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5328 static inline void compute_mb_neighbors(H264Context *h)
5330 MpegEncContext * const s = &h->s;
5331 const int mb_xy = h->mb_xy;
5332 h->top_mb_xy = mb_xy - s->mb_stride;
5333 h->left_mb_xy[0] = mb_xy - 1;
5335 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5336 const int top_pair_xy = pair_xy - s->mb_stride;
5337 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5338 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5339 const int curr_mb_frame_flag = !MB_FIELD;
5340 const int bottom = (s->mb_y & 1);
5342 ? !curr_mb_frame_flag // bottom macroblock
5343 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5345 h->top_mb_xy -= s->mb_stride;
5347 if (left_mb_frame_flag != curr_mb_frame_flag) {
5348 h->left_mb_xy[0] = pair_xy - 1;
5350 } else if (FIELD_PICTURE) {
5351 h->top_mb_xy -= s->mb_stride;
5357 * decodes a macroblock
5358 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5360 static int decode_mb_cabac(H264Context *h) {
5361 MpegEncContext * const s = &h->s;
5363 int mb_type, partition_count, cbp = 0;
5364 int dct8x8_allowed= h->pps.transform_8x8_mode;
5366 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5368 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5370 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5371 if( h->slice_type_nos != FF_I_TYPE ) {
5373 /* a skipped mb needs the aff flag from the following mb */
5374 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5375 predict_field_decoding_flag(h);
5376 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5377 skip = h->next_mb_skipped;
5379 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5380 /* read skip flags */
5382 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5383 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5384 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5385 if(h->next_mb_skipped)
5386 predict_field_decoding_flag(h);
5388 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5393 h->cbp_table[mb_xy] = 0;
5394 h->chroma_pred_mode_table[mb_xy] = 0;
5395 h->last_qscale_diff = 0;
5402 if( (s->mb_y&1) == 0 )
5404 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5407 h->prev_mb_skipped = 0;
5409 compute_mb_neighbors(h);
5410 mb_type = decode_cabac_mb_type( h );
5411 assert(mb_type >= 0);
5413 if( h->slice_type_nos == FF_B_TYPE ) {
5415 partition_count= b_mb_type_info[mb_type].partition_count;
5416 mb_type= b_mb_type_info[mb_type].type;
5419 goto decode_intra_mb;
5421 } else if( h->slice_type_nos == FF_P_TYPE ) {
5423 partition_count= p_mb_type_info[mb_type].partition_count;
5424 mb_type= p_mb_type_info[mb_type].type;
5427 goto decode_intra_mb;
5430 if(h->slice_type == FF_SI_TYPE && mb_type)
5432 assert(h->slice_type_nos == FF_I_TYPE);
5434 partition_count = 0;
5435 cbp= i_mb_type_info[mb_type].cbp;
5436 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5437 mb_type= i_mb_type_info[mb_type].type;
5440 mb_type |= MB_TYPE_INTERLACED;
5442 h->slice_table[ mb_xy ]= h->slice_num;
5444 if(IS_INTRA_PCM(mb_type)) {
5447 // We assume these blocks are very rare so we do not optimize it.
5448 // FIXME The two following lines get the bitstream position in the cabac
5449 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5450 ptr= h->cabac.bytestream;
5451 if(h->cabac.low&0x1) ptr--;
5453 if(h->cabac.low&0x1FF) ptr--;
5456 // The pixels are stored in the same order as levels in h->mb array.
5457 memcpy(h->mb, ptr, 256); ptr+=256;
5459 memcpy(h->mb+128, ptr, 128); ptr+=128;
5462 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5464 // All blocks are present
5465 h->cbp_table[mb_xy] = 0x1ef;
5466 h->chroma_pred_mode_table[mb_xy] = 0;
5467 // In deblocking, the quantizer is 0
5468 s->current_picture.qscale_table[mb_xy]= 0;
5469 // All coeffs are present
5470 memset(h->non_zero_count[mb_xy], 16, 16);
5471 s->current_picture.mb_type[mb_xy]= mb_type;
5472 h->last_qscale_diff = 0;
5477 h->ref_count[0] <<= 1;
5478 h->ref_count[1] <<= 1;
5481 fill_caches(h, mb_type, 0);
5483 if( IS_INTRA( mb_type ) ) {
5485 if( IS_INTRA4x4( mb_type ) ) {
5486 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5487 mb_type |= MB_TYPE_8x8DCT;
5488 for( i = 0; i < 16; i+=4 ) {
5489 int pred = pred_intra_mode( h, i );
5490 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5491 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5494 for( i = 0; i < 16; i++ ) {
5495 int pred = pred_intra_mode( h, i );
5496 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5498 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5501 write_back_intra_pred_mode(h);
5502 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5504 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5505 if( h->intra16x16_pred_mode < 0 ) return -1;
5508 h->chroma_pred_mode_table[mb_xy] =
5509 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5511 pred_mode= check_intra_pred_mode( h, pred_mode );
5512 if( pred_mode < 0 ) return -1;
5513 h->chroma_pred_mode= pred_mode;
5515 } else if( partition_count == 4 ) {
5516 int i, j, sub_partition_count[4], list, ref[2][4];
5518 if( h->slice_type_nos == FF_B_TYPE ) {
5519 for( i = 0; i < 4; i++ ) {
5520 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5521 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5522 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5524 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5525 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5526 pred_direct_motion(h, &mb_type);
5527 h->ref_cache[0][scan8[4]] =
5528 h->ref_cache[1][scan8[4]] =
5529 h->ref_cache[0][scan8[12]] =
5530 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5531 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5532 for( i = 0; i < 4; i++ )
5533 if( IS_DIRECT(h->sub_mb_type[i]) )
5534 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5538 for( i = 0; i < 4; i++ ) {
5539 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5540 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5541 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5545 for( list = 0; list < h->list_count; list++ ) {
5546 for( i = 0; i < 4; i++ ) {
5547 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5548 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5549 if( h->ref_count[list] > 1 ){
5550 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5551 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5552 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5560 h->ref_cache[list][ scan8[4*i]+1 ]=
5561 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5566 dct8x8_allowed = get_dct8x8_allowed(h);
5568 for(list=0; list<h->list_count; list++){
5570 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5571 if(IS_DIRECT(h->sub_mb_type[i])){
5572 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5576 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5577 const int sub_mb_type= h->sub_mb_type[i];
5578 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5579 for(j=0; j<sub_partition_count[i]; j++){
5582 const int index= 4*i + block_width*j;
5583 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5584 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5585 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5587 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5588 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5589 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5591 if(IS_SUB_8X8(sub_mb_type)){
5593 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5595 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5598 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5600 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5601 }else if(IS_SUB_8X4(sub_mb_type)){
5602 mv_cache[ 1 ][0]= mx;
5603 mv_cache[ 1 ][1]= my;
5605 mvd_cache[ 1 ][0]= mx - mpx;
5606 mvd_cache[ 1 ][1]= my - mpy;
5607 }else if(IS_SUB_4X8(sub_mb_type)){
5608 mv_cache[ 8 ][0]= mx;
5609 mv_cache[ 8 ][1]= my;
5611 mvd_cache[ 8 ][0]= mx - mpx;
5612 mvd_cache[ 8 ][1]= my - mpy;
5614 mv_cache[ 0 ][0]= mx;
5615 mv_cache[ 0 ][1]= my;
5617 mvd_cache[ 0 ][0]= mx - mpx;
5618 mvd_cache[ 0 ][1]= my - mpy;
5621 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5622 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5623 p[0] = p[1] = p[8] = p[9] = 0;
5624 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5628 } else if( IS_DIRECT(mb_type) ) {
5629 pred_direct_motion(h, &mb_type);
5630 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5631 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5632 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5634 int list, mx, my, i, mpx, mpy;
5635 if(IS_16X16(mb_type)){
5636 for(list=0; list<h->list_count; list++){
5637 if(IS_DIR(mb_type, 0, list)){
5639 if(h->ref_count[list] > 1){
5640 ref= decode_cabac_mb_ref(h, list, 0);
5641 if(ref >= (unsigned)h->ref_count[list]){
5642 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5647 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5649 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5651 for(list=0; list<h->list_count; list++){
5652 if(IS_DIR(mb_type, 0, list)){
5653 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5655 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5656 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5657 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5659 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5660 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5662 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5665 else if(IS_16X8(mb_type)){
5666 for(list=0; list<h->list_count; list++){
5668 if(IS_DIR(mb_type, i, list)){
5670 if(h->ref_count[list] > 1){
5671 ref= decode_cabac_mb_ref( h, list, 8*i );
5672 if(ref >= (unsigned)h->ref_count[list]){
5673 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5678 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5680 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5683 for(list=0; list<h->list_count; list++){
5685 if(IS_DIR(mb_type, i, list)){
5686 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5687 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5688 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5689 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5691 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5692 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5694 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5695 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5700 assert(IS_8X16(mb_type));
5701 for(list=0; list<h->list_count; list++){
5703 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5705 if(h->ref_count[list] > 1){
5706 ref= decode_cabac_mb_ref( h, list, 4*i );
5707 if(ref >= (unsigned)h->ref_count[list]){
5708 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5713 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5715 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5718 for(list=0; list<h->list_count; list++){
5720 if(IS_DIR(mb_type, i, list)){
5721 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5722 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5723 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5725 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5726 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5727 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5729 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5730 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5737 if( IS_INTER( mb_type ) ) {
5738 h->chroma_pred_mode_table[mb_xy] = 0;
5739 write_back_motion( h, mb_type );
5742 if( !IS_INTRA16x16( mb_type ) ) {
5743 cbp = decode_cabac_mb_cbp_luma( h );
5745 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5748 h->cbp_table[mb_xy] = h->cbp = cbp;
5750 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5751 if( decode_cabac_mb_transform_size( h ) )
5752 mb_type |= MB_TYPE_8x8DCT;
5754 s->current_picture.mb_type[mb_xy]= mb_type;
5756 if( cbp || IS_INTRA16x16( mb_type ) ) {
5757 const uint8_t *scan, *scan8x8, *dc_scan;
5758 const uint32_t *qmul;
5761 if(IS_INTERLACED(mb_type)){
5762 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5763 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5764 dc_scan= luma_dc_field_scan;
5766 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5767 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5768 dc_scan= luma_dc_zigzag_scan;
5771 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5772 if( dqp == INT_MIN ){
5773 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5777 if(((unsigned)s->qscale) > 51){
5778 if(s->qscale<0) s->qscale+= 52;
5779 else s->qscale-= 52;
5781 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5782 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5784 if( IS_INTRA16x16( mb_type ) ) {
5786 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5787 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5790 qmul = h->dequant4_coeff[0][s->qscale];
5791 for( i = 0; i < 16; i++ ) {
5792 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5793 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5796 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5800 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5801 if( cbp & (1<<i8x8) ) {
5802 if( IS_8x8DCT(mb_type) ) {
5803 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5804 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5806 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5807 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5808 const int index = 4*i8x8 + i4x4;
5809 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5811 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5812 //STOP_TIMER("decode_residual")
5816 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5817 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5824 for( c = 0; c < 2; c++ ) {
5825 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5826 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5832 for( c = 0; c < 2; c++ ) {
5833 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5834 for( i = 0; i < 4; i++ ) {
5835 const int index = 16 + 4 * c + i;
5836 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5837 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5841 uint8_t * const nnz= &h->non_zero_count_cache[0];
5842 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5843 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5846 uint8_t * const nnz= &h->non_zero_count_cache[0];
5847 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5848 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5849 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5850 h->last_qscale_diff = 0;
5853 s->current_picture.qscale_table[mb_xy]= s->qscale;
5854 write_back_non_zero_count(h);
5857 h->ref_count[0] >>= 1;
5858 h->ref_count[1] >>= 1;
5865 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5867 const int index_a = qp + h->slice_alpha_c0_offset;
5868 const int alpha = (alpha_table+52)[index_a];
5869 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5874 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5875 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5877 /* 16px edge length, because bS=4 is triggered by being at
5878 * the edge of an intra MB, so all 4 bS are the same */
5879 for( d = 0; d < 16; d++ ) {
5880 const int p0 = pix[-1];
5881 const int p1 = pix[-2];
5882 const int p2 = pix[-3];
5884 const int q0 = pix[0];
5885 const int q1 = pix[1];
5886 const int q2 = pix[2];
5888 if( FFABS( p0 - q0 ) < alpha &&
5889 FFABS( p1 - p0 ) < beta &&
5890 FFABS( q1 - q0 ) < beta ) {
5892 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5893 if( FFABS( p2 - p0 ) < beta)
5895 const int p3 = pix[-4];
5897 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5898 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5899 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5902 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5904 if( FFABS( q2 - q0 ) < beta)
5906 const int q3 = pix[3];
5908 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5909 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5910 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5913 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5917 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5918 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5920 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5926 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5928 const int index_a = qp + h->slice_alpha_c0_offset;
5929 const int alpha = (alpha_table+52)[index_a];
5930 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5935 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5936 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5938 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5942 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5944 for( i = 0; i < 16; i++, pix += stride) {
5950 int bS_index = (i >> 1);
5953 bS_index |= (i & 1);
5956 if( bS[bS_index] == 0 ) {
5960 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5961 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5962 alpha = (alpha_table+52)[index_a];
5963 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5965 if( bS[bS_index] < 4 ) {
5966 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5967 const int p0 = pix[-1];
5968 const int p1 = pix[-2];
5969 const int p2 = pix[-3];
5970 const int q0 = pix[0];
5971 const int q1 = pix[1];
5972 const int q2 = pix[2];
5974 if( FFABS( p0 - q0 ) < alpha &&
5975 FFABS( p1 - p0 ) < beta &&
5976 FFABS( q1 - q0 ) < beta ) {
5980 if( FFABS( p2 - p0 ) < beta ) {
5981 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5984 if( FFABS( q2 - q0 ) < beta ) {
5985 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5989 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5990 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5991 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5992 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5995 const int p0 = pix[-1];
5996 const int p1 = pix[-2];
5997 const int p2 = pix[-3];
5999 const int q0 = pix[0];
6000 const int q1 = pix[1];
6001 const int q2 = pix[2];
6003 if( FFABS( p0 - q0 ) < alpha &&
6004 FFABS( p1 - p0 ) < beta &&
6005 FFABS( q1 - q0 ) < beta ) {
6007 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6008 if( FFABS( p2 - p0 ) < beta)
6010 const int p3 = pix[-4];
6012 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6013 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6014 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6017 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6019 if( FFABS( q2 - q0 ) < beta)
6021 const int q3 = pix[3];
6023 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6024 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6025 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6028 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6032 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6033 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6035 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6040 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6042 for( i = 0; i < 8; i++, pix += stride) {
6050 if( bS[bS_index] == 0 ) {
6054 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6055 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6056 alpha = (alpha_table+52)[index_a];
6057 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6059 if( bS[bS_index] < 4 ) {
6060 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6061 const int p0 = pix[-1];
6062 const int p1 = pix[-2];
6063 const int q0 = pix[0];
6064 const int q1 = pix[1];
6066 if( FFABS( p0 - q0 ) < alpha &&
6067 FFABS( p1 - p0 ) < beta &&
6068 FFABS( q1 - q0 ) < beta ) {
6069 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6071 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6072 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6073 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6076 const int p0 = pix[-1];
6077 const int p1 = pix[-2];
6078 const int q0 = pix[0];
6079 const int q1 = pix[1];
6081 if( FFABS( p0 - q0 ) < alpha &&
6082 FFABS( p1 - p0 ) < beta &&
6083 FFABS( q1 - q0 ) < beta ) {
6085 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6086 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6087 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6093 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6095 const int index_a = qp + h->slice_alpha_c0_offset;
6096 const int alpha = (alpha_table+52)[index_a];
6097 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6098 const int pix_next = stride;
6103 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6104 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6106 /* 16px edge length, see filter_mb_edgev */
6107 for( d = 0; d < 16; d++ ) {
6108 const int p0 = pix[-1*pix_next];
6109 const int p1 = pix[-2*pix_next];
6110 const int p2 = pix[-3*pix_next];
6111 const int q0 = pix[0];
6112 const int q1 = pix[1*pix_next];
6113 const int q2 = pix[2*pix_next];
6115 if( FFABS( p0 - q0 ) < alpha &&
6116 FFABS( p1 - p0 ) < beta &&
6117 FFABS( q1 - q0 ) < beta ) {
6119 const int p3 = pix[-4*pix_next];
6120 const int q3 = pix[ 3*pix_next];
6122 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6123 if( FFABS( p2 - p0 ) < beta) {
6125 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6126 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6127 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6130 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6132 if( FFABS( q2 - q0 ) < beta) {
6134 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6135 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6136 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6139 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6143 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6144 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6146 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6153 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6155 const int index_a = qp + h->slice_alpha_c0_offset;
6156 const int alpha = (alpha_table+52)[index_a];
6157 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6162 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6163 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6165 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6169 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6170 MpegEncContext * const s = &h->s;
6171 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6173 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6177 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6178 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6179 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6180 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6181 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6184 assert(!FRAME_MBAFF);
6186 mb_type = s->current_picture.mb_type[mb_xy];
6187 qp = s->current_picture.qscale_table[mb_xy];
6188 qp0 = s->current_picture.qscale_table[mb_xy-1];
6189 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6190 qpc = get_chroma_qp( h, 0, qp );
6191 qpc0 = get_chroma_qp( h, 0, qp0 );
6192 qpc1 = get_chroma_qp( h, 0, qp1 );
6193 qp0 = (qp + qp0 + 1) >> 1;
6194 qp1 = (qp + qp1 + 1) >> 1;
6195 qpc0 = (qpc + qpc0 + 1) >> 1;
6196 qpc1 = (qpc + qpc1 + 1) >> 1;
6197 qp_thresh = 15 - h->slice_alpha_c0_offset;
6198 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6199 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6202 if( IS_INTRA(mb_type) ) {
6203 int16_t bS4[4] = {4,4,4,4};
6204 int16_t bS3[4] = {3,3,3,3};
6205 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6206 if( IS_8x8DCT(mb_type) ) {
6207 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6208 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6209 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6210 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6212 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6213 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6214 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6215 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6216 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6217 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6218 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6219 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6221 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6222 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6223 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6224 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6225 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6226 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6227 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6228 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6231 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6232 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6234 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6236 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6238 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6239 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6240 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6241 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6243 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6244 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6245 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6246 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6248 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6249 bSv[0][0] = 0x0004000400040004ULL;
6250 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6251 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6253 #define FILTER(hv,dir,edge)\
6254 if(bSv[dir][edge]) {\
6255 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6257 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6258 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6264 } else if( IS_8x8DCT(mb_type) ) {
6284 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6285 MpegEncContext * const s = &h->s;
6287 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6288 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6289 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6290 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6291 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6293 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6294 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6295 // how often to recheck mv-based bS when iterating between edges
6296 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6297 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6298 // how often to recheck mv-based bS when iterating along each edge
6299 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6301 if (first_vertical_edge_done) {
6305 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6308 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6309 && !IS_INTERLACED(mb_type)
6310 && IS_INTERLACED(mbm_type)
6312 // This is a special case in the norm where the filtering must
6313 // be done twice (one each of the field) even if we are in a
6314 // frame macroblock.
6316 static const int nnz_idx[4] = {4,5,6,3};
6317 unsigned int tmp_linesize = 2 * linesize;
6318 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6319 int mbn_xy = mb_xy - 2 * s->mb_stride;
6324 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6325 if( IS_INTRA(mb_type) ||
6326 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6327 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6329 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6330 for( i = 0; i < 4; i++ ) {
6331 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6332 mbn_nnz[nnz_idx[i]] != 0 )
6338 // Do not use s->qscale as luma quantizer because it has not the same
6339 // value in IPCM macroblocks.
6340 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6341 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6342 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6343 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6344 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6345 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6346 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6347 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6354 for( edge = start; edge < edges; edge++ ) {
6355 /* mbn_xy: neighbor macroblock */
6356 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6357 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6358 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6362 if( (edge&1) && IS_8x8DCT(mb_type) )
6365 if( IS_INTRA(mb_type) ||
6366 IS_INTRA(mbn_type) ) {
6369 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6370 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6379 bS[0] = bS[1] = bS[2] = bS[3] = value;
6384 if( edge & mask_edge ) {
6385 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6388 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6389 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6392 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6393 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6394 int bn_idx= b_idx - (dir ? 8:1);
6397 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6398 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6399 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6400 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6403 if(h->slice_type_nos == FF_B_TYPE && v){
6405 for( l = 0; !v && l < 2; l++ ) {
6407 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6408 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6409 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6413 bS[0] = bS[1] = bS[2] = bS[3] = v;
6419 for( i = 0; i < 4; i++ ) {
6420 int x = dir == 0 ? edge : i;
6421 int y = dir == 0 ? i : edge;
6422 int b_idx= 8 + 4 + x + 8*y;
6423 int bn_idx= b_idx - (dir ? 8:1);
6425 if( h->non_zero_count_cache[b_idx] |
6426 h->non_zero_count_cache[bn_idx] ) {
6432 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6433 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6434 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6435 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6441 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6443 for( l = 0; l < 2; l++ ) {
6445 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6446 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6447 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6456 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6461 // Do not use s->qscale as luma quantizer because it has not the same
6462 // value in IPCM macroblocks.
6463 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6464 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6465 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6466 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6468 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6469 if( (edge&1) == 0 ) {
6470 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6471 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6472 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6473 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6476 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6477 if( (edge&1) == 0 ) {
6478 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6479 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6480 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6481 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6487 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6488 MpegEncContext * const s = &h->s;
6489 const int mb_xy= mb_x + mb_y*s->mb_stride;
6490 const int mb_type = s->current_picture.mb_type[mb_xy];
6491 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6492 int first_vertical_edge_done = 0;
6495 //for sufficiently low qp, filtering wouldn't do anything
6496 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6498 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6499 int qp = s->current_picture.qscale_table[mb_xy];
6501 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6502 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6507 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6508 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6509 int top_type, left_type[2];
6510 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6511 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6512 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6514 if(IS_8x8DCT(top_type)){
6515 h->non_zero_count_cache[4+8*0]=
6516 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6517 h->non_zero_count_cache[6+8*0]=
6518 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6520 if(IS_8x8DCT(left_type[0])){
6521 h->non_zero_count_cache[3+8*1]=
6522 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6524 if(IS_8x8DCT(left_type[1])){
6525 h->non_zero_count_cache[3+8*3]=
6526 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6529 if(IS_8x8DCT(mb_type)){
6530 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6531 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6533 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6534 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6536 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6537 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6539 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6540 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6545 // left mb is in picture
6546 && h->slice_table[mb_xy-1] != 0xFFFF
6547 // and current and left pair do not have the same interlaced type
6548 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6549 // and left mb is in the same slice if deblocking_filter == 2
6550 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6551 /* First vertical edge is different in MBAFF frames
6552 * There are 8 different bS to compute and 2 different Qp
6554 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6555 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6560 int mb_qp, mbn0_qp, mbn1_qp;
6562 first_vertical_edge_done = 1;
6564 if( IS_INTRA(mb_type) )
6565 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6567 for( i = 0; i < 8; i++ ) {
6568 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6570 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6572 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6573 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6574 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6576 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6583 mb_qp = s->current_picture.qscale_table[mb_xy];
6584 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6585 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6586 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6587 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6588 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6589 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6590 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6591 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6592 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6593 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6594 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6595 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6598 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6599 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6600 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6601 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6602 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6606 for( dir = 0; dir < 2; dir++ )
6607 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6609 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6610 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6614 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6615 H264Context *h = *(void**)arg;
6616 MpegEncContext * const s = &h->s;
6617 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6621 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6622 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6624 if( h->pps.cabac ) {
6628 align_get_bits( &s->gb );
6631 ff_init_cabac_states( &h->cabac);
6632 ff_init_cabac_decoder( &h->cabac,
6633 s->gb.buffer + get_bits_count(&s->gb)/8,
6634 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6635 /* calculate pre-state */
6636 for( i= 0; i < 460; i++ ) {
6638 if( h->slice_type_nos == FF_I_TYPE )
6639 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6641 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6644 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6646 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6651 int ret = decode_mb_cabac(h);
6653 //STOP_TIMER("decode_mb_cabac")
6655 if(ret>=0) hl_decode_mb(h);
6657 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6660 if(ret>=0) ret = decode_mb_cabac(h);
6662 if(ret>=0) hl_decode_mb(h);
6665 eos = get_cabac_terminate( &h->cabac );
6667 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6668 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6669 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6673 if( ++s->mb_x >= s->mb_width ) {
6675 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6677 if(FIELD_OR_MBAFF_PICTURE) {
6682 if( eos || s->mb_y >= s->mb_height ) {
6683 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6684 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6691 int ret = decode_mb_cavlc(h);
6693 if(ret>=0) hl_decode_mb(h);
6695 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6697 ret = decode_mb_cavlc(h);
6699 if(ret>=0) hl_decode_mb(h);
6704 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6705 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6710 if(++s->mb_x >= s->mb_width){
6712 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6714 if(FIELD_OR_MBAFF_PICTURE) {
6717 if(s->mb_y >= s->mb_height){
6718 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6720 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6721 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6725 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6732 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6733 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6734 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6735 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6739 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6748 for(;s->mb_y < s->mb_height; s->mb_y++){
6749 for(;s->mb_x < s->mb_width; s->mb_x++){
6750 int ret= decode_mb(h);
6755 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6756 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6761 if(++s->mb_x >= s->mb_width){
6763 if(++s->mb_y >= s->mb_height){
6764 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6765 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6769 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6776 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6777 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6778 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6782 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6789 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6792 return -1; //not reached
6795 static int decode_picture_timing(H264Context *h){
6796 MpegEncContext * const s = &h->s;
6797 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6798 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6799 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6801 if(h->sps.pic_struct_present_flag){
6802 unsigned int i, num_clock_ts;
6803 h->sei_pic_struct = get_bits(&s->gb, 4);
6805 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6808 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6810 for (i = 0 ; i < num_clock_ts ; i++){
6811 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6812 unsigned int full_timestamp_flag;
6813 skip_bits(&s->gb, 2); /* ct_type */
6814 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6815 skip_bits(&s->gb, 5); /* counting_type */
6816 full_timestamp_flag = get_bits(&s->gb, 1);
6817 skip_bits(&s->gb, 1); /* discontinuity_flag */
6818 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6819 skip_bits(&s->gb, 8); /* n_frames */
6820 if(full_timestamp_flag){
6821 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6822 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6823 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6825 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6826 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6827 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6828 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6829 if(get_bits(&s->gb, 1)) /* hours_flag */
6830 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6834 if(h->sps.time_offset_length > 0)
6835 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6842 static int decode_unregistered_user_data(H264Context *h, int size){
6843 MpegEncContext * const s = &h->s;
6844 uint8_t user_data[16+256];
6850 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6851 user_data[i]= get_bits(&s->gb, 8);
6855 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6856 if(e==1 && build>=0)
6857 h->x264_build= build;
6859 if(s->avctx->debug & FF_DEBUG_BUGS)
6860 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6863 skip_bits(&s->gb, 8);
6868 static int decode_sei(H264Context *h){
6869 MpegEncContext * const s = &h->s;
6871 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6876 type+= show_bits(&s->gb, 8);
6877 }while(get_bits(&s->gb, 8) == 255);
6881 size+= show_bits(&s->gb, 8);
6882 }while(get_bits(&s->gb, 8) == 255);
6885 case 1: // Picture timing SEI
6886 if(decode_picture_timing(h) < 0)
6890 if(decode_unregistered_user_data(h, size) < 0)
6894 skip_bits(&s->gb, 8*size);
6897 //FIXME check bits here
6898 align_get_bits(&s->gb);
6904 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6905 MpegEncContext * const s = &h->s;
6907 cpb_count = get_ue_golomb(&s->gb) + 1;
6909 if(cpb_count > 32U){
6910 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6914 get_bits(&s->gb, 4); /* bit_rate_scale */
6915 get_bits(&s->gb, 4); /* cpb_size_scale */
6916 for(i=0; i<cpb_count; i++){
6917 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6918 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6919 get_bits1(&s->gb); /* cbr_flag */
6921 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6922 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6923 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6924 sps->time_offset_length = get_bits(&s->gb, 5);
6928 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6929 MpegEncContext * const s = &h->s;
6930 int aspect_ratio_info_present_flag;
6931 unsigned int aspect_ratio_idc;
6933 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6935 if( aspect_ratio_info_present_flag ) {
6936 aspect_ratio_idc= get_bits(&s->gb, 8);
6937 if( aspect_ratio_idc == EXTENDED_SAR ) {
6938 sps->sar.num= get_bits(&s->gb, 16);
6939 sps->sar.den= get_bits(&s->gb, 16);
6940 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6941 sps->sar= pixel_aspect[aspect_ratio_idc];
6943 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6950 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6952 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6953 get_bits1(&s->gb); /* overscan_appropriate_flag */
6956 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6957 get_bits(&s->gb, 3); /* video_format */
6958 get_bits1(&s->gb); /* video_full_range_flag */
6959 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6960 get_bits(&s->gb, 8); /* colour_primaries */
6961 get_bits(&s->gb, 8); /* transfer_characteristics */
6962 get_bits(&s->gb, 8); /* matrix_coefficients */
6966 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6967 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6968 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6971 sps->timing_info_present_flag = get_bits1(&s->gb);
6972 if(sps->timing_info_present_flag){
6973 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6974 sps->time_scale = get_bits_long(&s->gb, 32);
6975 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6978 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6979 if(sps->nal_hrd_parameters_present_flag)
6980 if(decode_hrd_parameters(h, sps) < 0)
6982 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6983 if(sps->vcl_hrd_parameters_present_flag)
6984 if(decode_hrd_parameters(h, sps) < 0)
6986 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6987 get_bits1(&s->gb); /* low_delay_hrd_flag */
6988 sps->pic_struct_present_flag = get_bits1(&s->gb);
6990 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6991 if(sps->bitstream_restriction_flag){
6992 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6993 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6994 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6995 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6996 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6997 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6998 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7000 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7001 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7009 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7010 const uint8_t *jvt_list, const uint8_t *fallback_list){
7011 MpegEncContext * const s = &h->s;
7012 int i, last = 8, next = 8;
7013 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7014 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7015 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7017 for(i=0;i<size;i++){
7019 next = (last + get_se_golomb(&s->gb)) & 0xff;
7020 if(!i && !next){ /* matrix not written, we use the preset one */
7021 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7024 last = factors[scan[i]] = next ? next : last;
7028 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7029 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7030 MpegEncContext * const s = &h->s;
7031 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7032 const uint8_t *fallback[4] = {
7033 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7034 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7035 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7036 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7038 if(get_bits1(&s->gb)){
7039 sps->scaling_matrix_present |= is_sps;
7040 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7041 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7042 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7043 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7044 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7045 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7046 if(is_sps || pps->transform_8x8_mode){
7047 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7048 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7053 static inline int decode_seq_parameter_set(H264Context *h){
7054 MpegEncContext * const s = &h->s;
7055 int profile_idc, level_idc;
7056 unsigned int sps_id;
7060 profile_idc= get_bits(&s->gb, 8);
7061 get_bits1(&s->gb); //constraint_set0_flag
7062 get_bits1(&s->gb); //constraint_set1_flag
7063 get_bits1(&s->gb); //constraint_set2_flag
7064 get_bits1(&s->gb); //constraint_set3_flag
7065 get_bits(&s->gb, 4); // reserved
7066 level_idc= get_bits(&s->gb, 8);
7067 sps_id= get_ue_golomb(&s->gb);
7069 if(sps_id >= MAX_SPS_COUNT) {
7070 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7073 sps= av_mallocz(sizeof(SPS));
7077 sps->profile_idc= profile_idc;
7078 sps->level_idc= level_idc;
7080 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7081 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7082 sps->scaling_matrix_present = 0;
7084 if(sps->profile_idc >= 100){ //high profile
7085 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7086 if(sps->chroma_format_idc == 3)
7087 get_bits1(&s->gb); //residual_color_transform_flag
7088 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7089 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7090 sps->transform_bypass = get_bits1(&s->gb);
7091 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7093 sps->chroma_format_idc= 1;
7096 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7097 sps->poc_type= get_ue_golomb(&s->gb);
7099 if(sps->poc_type == 0){ //FIXME #define
7100 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7101 } else if(sps->poc_type == 1){//FIXME #define
7102 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7103 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7104 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7105 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7107 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7108 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7112 for(i=0; i<sps->poc_cycle_length; i++)
7113 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7114 }else if(sps->poc_type != 2){
7115 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7119 sps->ref_frame_count= get_ue_golomb(&s->gb);
7120 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7121 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7124 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7125 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7126 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7127 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7128 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7129 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7133 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7134 if(!sps->frame_mbs_only_flag)
7135 sps->mb_aff= get_bits1(&s->gb);
7139 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7141 #ifndef ALLOW_INTERLACE
7143 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7145 sps->crop= get_bits1(&s->gb);
7147 sps->crop_left = get_ue_golomb(&s->gb);
7148 sps->crop_right = get_ue_golomb(&s->gb);
7149 sps->crop_top = get_ue_golomb(&s->gb);
7150 sps->crop_bottom= get_ue_golomb(&s->gb);
7151 if(sps->crop_left || sps->crop_top){
7152 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7154 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7155 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7161 sps->crop_bottom= 0;
7164 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7165 if( sps->vui_parameters_present_flag )
7166 decode_vui_parameters(h, sps);
7168 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7169 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7170 sps_id, sps->profile_idc, sps->level_idc,
7172 sps->ref_frame_count,
7173 sps->mb_width, sps->mb_height,
7174 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7175 sps->direct_8x8_inference_flag ? "8B8" : "",
7176 sps->crop_left, sps->crop_right,
7177 sps->crop_top, sps->crop_bottom,
7178 sps->vui_parameters_present_flag ? "VUI" : "",
7179 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7182 av_free(h->sps_buffers[sps_id]);
7183 h->sps_buffers[sps_id]= sps;
7191 build_qp_table(PPS *pps, int t, int index)
7194 for(i = 0; i < 52; i++)
7195 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7198 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7199 MpegEncContext * const s = &h->s;
7200 unsigned int pps_id= get_ue_golomb(&s->gb);
7203 if(pps_id >= MAX_PPS_COUNT) {
7204 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7208 pps= av_mallocz(sizeof(PPS));
7211 pps->sps_id= get_ue_golomb(&s->gb);
7212 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7213 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7217 pps->cabac= get_bits1(&s->gb);
7218 pps->pic_order_present= get_bits1(&s->gb);
7219 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7220 if(pps->slice_group_count > 1 ){
7221 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7222 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7223 switch(pps->mb_slice_group_map_type){
7226 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7227 | run_length[ i ] |1 |ue(v) |
7232 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7234 | top_left_mb[ i ] |1 |ue(v) |
7235 | bottom_right_mb[ i ] |1 |ue(v) |
7243 | slice_group_change_direction_flag |1 |u(1) |
7244 | slice_group_change_rate_minus1 |1 |ue(v) |
7249 | slice_group_id_cnt_minus1 |1 |ue(v) |
7250 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7252 | slice_group_id[ i ] |1 |u(v) |
7257 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7258 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7259 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7260 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7264 pps->weighted_pred= get_bits1(&s->gb);
7265 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7266 pps->init_qp= get_se_golomb(&s->gb) + 26;
7267 pps->init_qs= get_se_golomb(&s->gb) + 26;
7268 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7269 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7270 pps->constrained_intra_pred= get_bits1(&s->gb);
7271 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7273 pps->transform_8x8_mode= 0;
7274 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7275 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7276 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7278 if(get_bits_count(&s->gb) < bit_length){
7279 pps->transform_8x8_mode= get_bits1(&s->gb);
7280 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7281 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7283 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7286 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7287 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7288 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7289 h->pps.chroma_qp_diff= 1;
7291 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7292 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7293 pps_id, pps->sps_id,
7294 pps->cabac ? "CABAC" : "CAVLC",
7295 pps->slice_group_count,
7296 pps->ref_count[0], pps->ref_count[1],
7297 pps->weighted_pred ? "weighted" : "",
7298 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7299 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7300 pps->constrained_intra_pred ? "CONSTR" : "",
7301 pps->redundant_pic_cnt_present ? "REDU" : "",
7302 pps->transform_8x8_mode ? "8x8DCT" : ""
7306 av_free(h->pps_buffers[pps_id]);
7307 h->pps_buffers[pps_id]= pps;
7315 * Call decode_slice() for each context.
7317 * @param h h264 master context
7318 * @param context_count number of contexts to execute
7320 static void execute_decode_slices(H264Context *h, int context_count){
7321 MpegEncContext * const s = &h->s;
7322 AVCodecContext * const avctx= s->avctx;
7326 if(context_count == 1) {
7327 decode_slice(avctx, &h);
7329 for(i = 1; i < context_count; i++) {
7330 hx = h->thread_context[i];
7331 hx->s.error_recognition = avctx->error_recognition;
7332 hx->s.error_count = 0;
7335 avctx->execute(avctx, (void *)decode_slice,
7336 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7338 /* pull back stuff from slices to master context */
7339 hx = h->thread_context[context_count - 1];
7340 s->mb_x = hx->s.mb_x;
7341 s->mb_y = hx->s.mb_y;
7342 s->dropable = hx->s.dropable;
7343 s->picture_structure = hx->s.picture_structure;
7344 for(i = 1; i < context_count; i++)
7345 h->s.error_count += h->thread_context[i]->s.error_count;
7350 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7351 MpegEncContext * const s = &h->s;
7352 AVCodecContext * const avctx= s->avctx;
7354 H264Context *hx; ///< thread context
7355 int context_count = 0;
7357 h->max_contexts = avctx->thread_count;
7360 for(i=0; i<50; i++){
7361 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7364 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7365 h->current_slice = 0;
7366 if (!s->first_field)
7367 s->current_picture_ptr= NULL;
7379 if(buf_index >= buf_size) break;
7381 for(i = 0; i < h->nal_length_size; i++)
7382 nalsize = (nalsize << 8) | buf[buf_index++];
7383 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7388 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7393 // start code prefix search
7394 for(; buf_index + 3 < buf_size; buf_index++){
7395 // This should always succeed in the first iteration.
7396 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7400 if(buf_index+3 >= buf_size) break;
7405 hx = h->thread_context[context_count];
7407 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7408 if (ptr==NULL || dst_length < 0){
7411 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7413 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7415 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7416 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7419 if (h->is_avc && (nalsize != consumed)){
7420 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7424 buf_index += consumed;
7426 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7427 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7432 switch(hx->nal_unit_type){
7434 if (h->nal_unit_type != NAL_IDR_SLICE) {
7435 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7438 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7440 init_get_bits(&hx->s.gb, ptr, bit_length);
7442 hx->inter_gb_ptr= &hx->s.gb;
7443 hx->s.data_partitioning = 0;
7445 if((err = decode_slice_header(hx, h)))
7448 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7449 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7450 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7451 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7452 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7453 && avctx->skip_frame < AVDISCARD_ALL)
7457 init_get_bits(&hx->s.gb, ptr, bit_length);
7459 hx->inter_gb_ptr= NULL;
7460 hx->s.data_partitioning = 1;
7462 err = decode_slice_header(hx, h);
7465 init_get_bits(&hx->intra_gb, ptr, bit_length);
7466 hx->intra_gb_ptr= &hx->intra_gb;
7469 init_get_bits(&hx->inter_gb, ptr, bit_length);
7470 hx->inter_gb_ptr= &hx->inter_gb;
7472 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7473 && s->context_initialized
7475 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7476 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7477 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7478 && avctx->skip_frame < AVDISCARD_ALL)
7482 init_get_bits(&s->gb, ptr, bit_length);
7486 init_get_bits(&s->gb, ptr, bit_length);
7487 decode_seq_parameter_set(h);
7489 if(s->flags& CODEC_FLAG_LOW_DELAY)
7492 if(avctx->has_b_frames < 2)
7493 avctx->has_b_frames= !s->low_delay;
7496 init_get_bits(&s->gb, ptr, bit_length);
7498 decode_picture_parameter_set(h, bit_length);
7502 case NAL_END_SEQUENCE:
7503 case NAL_END_STREAM:
7504 case NAL_FILLER_DATA:
7506 case NAL_AUXILIARY_SLICE:
7509 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7512 if(context_count == h->max_contexts) {
7513 execute_decode_slices(h, context_count);
7518 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7520 /* Slice could not be decoded in parallel mode, copy down
7521 * NAL unit stuff to context 0 and restart. Note that
7522 * rbsp_buffer is not transferred, but since we no longer
7523 * run in parallel mode this should not be an issue. */
7524 h->nal_unit_type = hx->nal_unit_type;
7525 h->nal_ref_idc = hx->nal_ref_idc;
7531 execute_decode_slices(h, context_count);
7536 * returns the number of bytes consumed for building the current frame
7538 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7539 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7540 if(pos+10>buf_size) pos=buf_size; // oops ;)
7545 static int decode_frame(AVCodecContext *avctx,
7546 void *data, int *data_size,
7547 const uint8_t *buf, int buf_size)
7549 H264Context *h = avctx->priv_data;
7550 MpegEncContext *s = &h->s;
7551 AVFrame *pict = data;
7554 s->flags= avctx->flags;
7555 s->flags2= avctx->flags2;
7557 /* end of stream, output what is still in the buffers */
7558 if (buf_size == 0) {
7562 //FIXME factorize this with the output code below
7563 out = h->delayed_pic[0];
7565 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7566 if(h->delayed_pic[i]->poc < out->poc){
7567 out = h->delayed_pic[i];
7571 for(i=out_idx; h->delayed_pic[i]; i++)
7572 h->delayed_pic[i] = h->delayed_pic[i+1];
7575 *data_size = sizeof(AVFrame);
7576 *pict= *(AVFrame*)out;
7582 if(h->is_avc && !h->got_avcC) {
7583 int i, cnt, nalsize;
7584 unsigned char *p = avctx->extradata;
7585 if(avctx->extradata_size < 7) {
7586 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7590 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7593 /* sps and pps in the avcC always have length coded with 2 bytes,
7594 so put a fake nal_length_size = 2 while parsing them */
7595 h->nal_length_size = 2;
7596 // Decode sps from avcC
7597 cnt = *(p+5) & 0x1f; // Number of sps
7599 for (i = 0; i < cnt; i++) {
7600 nalsize = AV_RB16(p) + 2;
7601 if(decode_nal_units(h, p, nalsize) < 0) {
7602 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7607 // Decode pps from avcC
7608 cnt = *(p++); // Number of pps
7609 for (i = 0; i < cnt; i++) {
7610 nalsize = AV_RB16(p) + 2;
7611 if(decode_nal_units(h, p, nalsize) != nalsize) {
7612 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7617 // Now store right nal length size, that will be use to parse all other nals
7618 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7619 // Do not reparse avcC
7623 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7624 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7629 buf_index=decode_nal_units(h, buf, buf_size);
7633 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7634 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7635 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7639 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7640 Picture *out = s->current_picture_ptr;
7641 Picture *cur = s->current_picture_ptr;
7642 int i, pics, cross_idr, out_of_order, out_idx;
7646 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7647 s->current_picture_ptr->pict_type= s->pict_type;
7650 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7651 h->prev_poc_msb= h->poc_msb;
7652 h->prev_poc_lsb= h->poc_lsb;
7654 h->prev_frame_num_offset= h->frame_num_offset;
7655 h->prev_frame_num= h->frame_num;
7658 * FIXME: Error handling code does not seem to support interlaced
7659 * when slices span multiple rows
7660 * The ff_er_add_slice calls don't work right for bottom
7661 * fields; they cause massive erroneous error concealing
7662 * Error marking covers both fields (top and bottom).
7663 * This causes a mismatched s->error_count
7664 * and a bad error table. Further, the error count goes to
7665 * INT_MAX when called for bottom field, because mb_y is
7666 * past end by one (callers fault) and resync_mb_y != 0
7667 * causes problems for the first MB line, too.
7674 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7675 /* Wait for second field. */
7679 cur->repeat_pict = 0;
7681 /* Signal interlacing information externally. */
7682 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7683 if(h->sps.pic_struct_present_flag){
7684 switch (h->sei_pic_struct)
7686 case SEI_PIC_STRUCT_FRAME:
7687 cur->interlaced_frame = 0;
7689 case SEI_PIC_STRUCT_TOP_FIELD:
7690 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7691 case SEI_PIC_STRUCT_TOP_BOTTOM:
7692 case SEI_PIC_STRUCT_BOTTOM_TOP:
7693 cur->interlaced_frame = 1;
7695 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7696 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7697 // Signal the possibility of telecined film externally (pic_struct 5,6)
7698 // From these hints, let the applications decide if they apply deinterlacing.
7699 cur->repeat_pict = 1;
7700 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7702 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7703 // Force progressive here, as doubling interlaced frame is a bad idea.
7704 cur->interlaced_frame = 0;
7705 cur->repeat_pict = 2;
7707 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7708 cur->interlaced_frame = 0;
7709 cur->repeat_pict = 4;
7713 /* Derive interlacing flag from used decoding process. */
7714 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7717 if (cur->field_poc[0] != cur->field_poc[1]){
7718 /* Derive top_field_first from field pocs. */
7719 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7721 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7722 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7723 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7724 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7725 cur->top_field_first = 1;
7727 cur->top_field_first = 0;
7729 /* Most likely progressive */
7730 cur->top_field_first = 0;
7734 //FIXME do something with unavailable reference frames
7736 /* Sort B-frames into display order */
7738 if(h->sps.bitstream_restriction_flag
7739 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7740 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7744 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7745 && !h->sps.bitstream_restriction_flag){
7746 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7751 while(h->delayed_pic[pics]) pics++;
7753 assert(pics <= MAX_DELAYED_PIC_COUNT);
7755 h->delayed_pic[pics++] = cur;
7756 if(cur->reference == 0)
7757 cur->reference = DELAYED_PIC_REF;
7759 out = h->delayed_pic[0];
7761 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7762 if(h->delayed_pic[i]->poc < out->poc){
7763 out = h->delayed_pic[i];
7766 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7768 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7770 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7772 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7774 ((!cross_idr && out->poc > h->outputed_poc + 2)
7775 || cur->pict_type == FF_B_TYPE)))
7778 s->avctx->has_b_frames++;
7781 if(out_of_order || pics > s->avctx->has_b_frames){
7782 out->reference &= ~DELAYED_PIC_REF;
7783 for(i=out_idx; h->delayed_pic[i]; i++)
7784 h->delayed_pic[i] = h->delayed_pic[i+1];
7786 if(!out_of_order && pics > s->avctx->has_b_frames){
7787 *data_size = sizeof(AVFrame);
7789 h->outputed_poc = out->poc;
7790 *pict= *(AVFrame*)out;
7792 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7797 assert(pict->data[0] || !*data_size);
7798 ff_print_debug_info(s, pict);
7799 //printf("out %d\n", (int)pict->data[0]);
7802 /* Return the Picture timestamp as the frame number */
7803 /* we subtract 1 because it is added on utils.c */
7804 avctx->frame_number = s->picture_number - 1;
7806 return get_consumed_bytes(s, buf_index, buf_size);
7809 static inline void fill_mb_avail(H264Context *h){
7810 MpegEncContext * const s = &h->s;
7811 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7814 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7815 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7816 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7822 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7823 h->mb_avail[4]= 1; //FIXME move out
7824 h->mb_avail[5]= 0; //FIXME move out
7832 #define SIZE (COUNT*40)
7838 // int int_temp[10000];
7840 AVCodecContext avctx;
7842 dsputil_init(&dsp, &avctx);
7844 init_put_bits(&pb, temp, SIZE);
7845 printf("testing unsigned exp golomb\n");
7846 for(i=0; i<COUNT; i++){
7848 set_ue_golomb(&pb, i);
7849 STOP_TIMER("set_ue_golomb");
7851 flush_put_bits(&pb);
7853 init_get_bits(&gb, temp, 8*SIZE);
7854 for(i=0; i<COUNT; i++){
7857 s= show_bits(&gb, 24);
7860 j= get_ue_golomb(&gb);
7862 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7865 STOP_TIMER("get_ue_golomb");
7869 init_put_bits(&pb, temp, SIZE);
7870 printf("testing signed exp golomb\n");
7871 for(i=0; i<COUNT; i++){
7873 set_se_golomb(&pb, i - COUNT/2);
7874 STOP_TIMER("set_se_golomb");
7876 flush_put_bits(&pb);
7878 init_get_bits(&gb, temp, 8*SIZE);
7879 for(i=0; i<COUNT; i++){
7882 s= show_bits(&gb, 24);
7885 j= get_se_golomb(&gb);
7886 if(j != i - COUNT/2){
7887 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7890 STOP_TIMER("get_se_golomb");
7894 printf("testing 4x4 (I)DCT\n");
7897 uint8_t src[16], ref[16];
7898 uint64_t error= 0, max_error=0;
7900 for(i=0; i<COUNT; i++){
7902 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7903 for(j=0; j<16; j++){
7904 ref[j]= random()%255;
7905 src[j]= random()%255;
7908 h264_diff_dct_c(block, src, ref, 4);
7911 for(j=0; j<16; j++){
7912 // printf("%d ", block[j]);
7913 block[j]= block[j]*4;
7914 if(j&1) block[j]= (block[j]*4 + 2)/5;
7915 if(j&4) block[j]= (block[j]*4 + 2)/5;
7919 s->dsp.h264_idct_add(ref, block, 4);
7920 /* for(j=0; j<16; j++){
7921 printf("%d ", ref[j]);
7925 for(j=0; j<16; j++){
7926 int diff= FFABS(src[j] - ref[j]);
7929 max_error= FFMAX(max_error, diff);
7932 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7933 printf("testing quantizer\n");
7934 for(qp=0; qp<52; qp++){
7936 src1_block[i]= src2_block[i]= random()%255;
7939 printf("Testing NAL layer\n");
7941 uint8_t bitstream[COUNT];
7942 uint8_t nal[COUNT*2];
7944 memset(&h, 0, sizeof(H264Context));
7946 for(i=0; i<COUNT; i++){
7954 for(j=0; j<COUNT; j++){
7955 bitstream[j]= (random() % 255) + 1;
7958 for(j=0; j<zeros; j++){
7959 int pos= random() % COUNT;
7960 while(bitstream[pos] == 0){
7969 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7971 printf("encoding failed\n");
7975 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7979 if(out_length != COUNT){
7980 printf("incorrect length %d %d\n", out_length, COUNT);
7984 if(consumed != nal_length){
7985 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7989 if(memcmp(bitstream, out, COUNT)){
7990 printf("mismatch\n");
7996 printf("Testing RBSP\n");
8004 static av_cold int decode_end(AVCodecContext *avctx)
8006 H264Context *h = avctx->priv_data;
8007 MpegEncContext *s = &h->s;
8010 av_freep(&h->rbsp_buffer[0]);
8011 av_freep(&h->rbsp_buffer[1]);
8012 free_tables(h); //FIXME cleanup init stuff perhaps
8014 for(i = 0; i < MAX_SPS_COUNT; i++)
8015 av_freep(h->sps_buffers + i);
8017 for(i = 0; i < MAX_PPS_COUNT; i++)
8018 av_freep(h->pps_buffers + i);
8022 // memset(h, 0, sizeof(H264Context));
8028 AVCodec h264_decoder = {
8032 sizeof(H264Context),
8037 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8039 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),