2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1591 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1592 int src_x_offset, int src_y_offset,
1593 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1594 MpegEncContext * const s = &h->s;
1595 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1596 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1597 const int luma_xy= (mx&3) + ((my&3)<<2);
1598 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1599 uint8_t * src_cb, * src_cr;
1600 int extra_width= h->emu_edge_width;
1601 int extra_height= h->emu_edge_height;
1603 const int full_mx= mx>>2;
1604 const int full_my= my>>2;
1605 const int pic_width = 16*s->mb_width;
1606 const int pic_height = 16*s->mb_height >> MB_FIELD;
1608 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1611 if(mx&7) extra_width -= 3;
1612 if(my&7) extra_height -= 3;
1614 if( full_mx < 0-extra_width
1615 || full_my < 0-extra_height
1616 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1617 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1618 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1619 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1623 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1625 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1628 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1631 // chroma offset when predicting from a field of opposite parity
1632 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1633 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1635 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1636 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1639 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1640 src_cb= s->edge_emu_buffer;
1642 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1645 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1646 src_cr= s->edge_emu_buffer;
1648 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1651 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1652 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1653 int x_offset, int y_offset,
1654 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1655 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1656 int list0, int list1){
1657 MpegEncContext * const s = &h->s;
1658 qpel_mc_func *qpix_op= qpix_put;
1659 h264_chroma_mc_func chroma_op= chroma_put;
1661 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1662 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1663 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1664 x_offset += 8*s->mb_x;
1665 y_offset += 8*(s->mb_y >> MB_FIELD);
1668 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1669 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1670 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1671 qpix_op, chroma_op);
1674 chroma_op= chroma_avg;
1678 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1679 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1680 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1681 qpix_op, chroma_op);
1685 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1686 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1687 int x_offset, int y_offset,
1688 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1689 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1690 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1691 int list0, int list1){
1692 MpegEncContext * const s = &h->s;
1694 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1695 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1696 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1697 x_offset += 8*s->mb_x;
1698 y_offset += 8*(s->mb_y >> MB_FIELD);
1701 /* don't optimize for luma-only case, since B-frames usually
1702 * use implicit weights => chroma too. */
1703 uint8_t *tmp_cb = s->obmc_scratchpad;
1704 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1705 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1706 int refn0 = h->ref_cache[0][ scan8[n] ];
1707 int refn1 = h->ref_cache[1][ scan8[n] ];
1709 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1710 dest_y, dest_cb, dest_cr,
1711 x_offset, y_offset, qpix_put, chroma_put);
1712 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1713 tmp_y, tmp_cb, tmp_cr,
1714 x_offset, y_offset, qpix_put, chroma_put);
1716 if(h->use_weight == 2){
1717 int weight0 = h->implicit_weight[refn0][refn1];
1718 int weight1 = 64 - weight0;
1719 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1720 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1721 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1723 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1724 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1725 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1726 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1727 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1728 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1729 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1730 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1731 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1734 int list = list1 ? 1 : 0;
1735 int refn = h->ref_cache[list][ scan8[n] ];
1736 Picture *ref= &h->ref_list[list][refn];
1737 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1738 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1739 qpix_put, chroma_put);
1741 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1742 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1743 if(h->use_weight_chroma){
1744 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1745 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1746 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1752 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1753 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1754 int x_offset, int y_offset,
1755 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1756 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1757 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1758 int list0, int list1){
1759 if((h->use_weight==2 && list0 && list1
1760 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1761 || h->use_weight==1)
1762 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1763 x_offset, y_offset, qpix_put, chroma_put,
1764 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1766 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1767 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1770 static inline void prefetch_motion(H264Context *h, int list){
1771 /* fetch pixels for estimated mv 4 macroblocks ahead
1772 * optimized for 64byte cache lines */
1773 MpegEncContext * const s = &h->s;
1774 const int refn = h->ref_cache[list][scan8[0]];
1776 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1777 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1778 uint8_t **src= h->ref_list[list][refn].data;
1779 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1780 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1781 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1782 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1786 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1788 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1789 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1790 MpegEncContext * const s = &h->s;
1791 const int mb_xy= h->mb_xy;
1792 const int mb_type= s->current_picture.mb_type[mb_xy];
1794 assert(IS_INTER(mb_type));
1796 prefetch_motion(h, 0);
1798 if(IS_16X16(mb_type)){
1799 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1800 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1801 &weight_op[0], &weight_avg[0],
1802 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1803 }else if(IS_16X8(mb_type)){
1804 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1805 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1806 &weight_op[1], &weight_avg[1],
1807 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1809 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1810 &weight_op[1], &weight_avg[1],
1811 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1812 }else if(IS_8X16(mb_type)){
1813 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1815 &weight_op[2], &weight_avg[2],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1818 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1819 &weight_op[2], &weight_avg[2],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1824 assert(IS_8X8(mb_type));
1827 const int sub_mb_type= h->sub_mb_type[i];
1829 int x_offset= (i&1)<<2;
1830 int y_offset= (i&2)<<1;
1832 if(IS_SUB_8X8(sub_mb_type)){
1833 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1834 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1835 &weight_op[3], &weight_avg[3],
1836 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1837 }else if(IS_SUB_8X4(sub_mb_type)){
1838 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1840 &weight_op[4], &weight_avg[4],
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1843 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1844 &weight_op[4], &weight_avg[4],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_4X8(sub_mb_type)){
1847 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1849 &weight_op[5], &weight_avg[5],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1852 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1853 &weight_op[5], &weight_avg[5],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1857 assert(IS_SUB_4X4(sub_mb_type));
1859 int sub_x_offset= x_offset + 2*(j&1);
1860 int sub_y_offset= y_offset + (j&2);
1861 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1862 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1863 &weight_op[6], &weight_avg[6],
1864 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1870 prefetch_motion(h, 1);
1873 static av_cold void decode_init_vlc(void){
1874 static int done = 0;
1881 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1882 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1883 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1884 &chroma_dc_coeff_token_len [0], 1, 1,
1885 &chroma_dc_coeff_token_bits[0], 1, 1,
1886 INIT_VLC_USE_NEW_STATIC);
1890 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1891 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1892 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1893 &coeff_token_len [i][0], 1, 1,
1894 &coeff_token_bits[i][0], 1, 1,
1895 INIT_VLC_USE_NEW_STATIC);
1896 offset += coeff_token_vlc_tables_size[i];
1899 * This is a one time safety check to make sure that
1900 * the packed static coeff_token_vlc table sizes
1901 * were initialized correctly.
1903 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1906 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1907 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1908 init_vlc(&chroma_dc_total_zeros_vlc[i],
1909 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1910 &chroma_dc_total_zeros_len [i][0], 1, 1,
1911 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1912 INIT_VLC_USE_NEW_STATIC);
1914 for(i=0; i<15; i++){
1915 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1916 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1917 init_vlc(&total_zeros_vlc[i],
1918 TOTAL_ZEROS_VLC_BITS, 16,
1919 &total_zeros_len [i][0], 1, 1,
1920 &total_zeros_bits[i][0], 1, 1,
1921 INIT_VLC_USE_NEW_STATIC);
1925 run_vlc[i].table = run_vlc_tables[i];
1926 run_vlc[i].table_allocated = run_vlc_tables_size;
1927 init_vlc(&run_vlc[i],
1929 &run_len [i][0], 1, 1,
1930 &run_bits[i][0], 1, 1,
1931 INIT_VLC_USE_NEW_STATIC);
1933 run7_vlc.table = run7_vlc_table,
1934 run7_vlc.table_allocated = run7_vlc_table_size;
1935 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1936 &run_len [6][0], 1, 1,
1937 &run_bits[6][0], 1, 1,
1938 INIT_VLC_USE_NEW_STATIC);
1942 static void free_tables(H264Context *h){
1945 av_freep(&h->intra4x4_pred_mode);
1946 av_freep(&h->chroma_pred_mode_table);
1947 av_freep(&h->cbp_table);
1948 av_freep(&h->mvd_table[0]);
1949 av_freep(&h->mvd_table[1]);
1950 av_freep(&h->direct_table);
1951 av_freep(&h->non_zero_count);
1952 av_freep(&h->slice_table_base);
1953 h->slice_table= NULL;
1955 av_freep(&h->mb2b_xy);
1956 av_freep(&h->mb2b8_xy);
1958 for(i = 0; i < h->s.avctx->thread_count; i++) {
1959 hx = h->thread_context[i];
1961 av_freep(&hx->top_borders[1]);
1962 av_freep(&hx->top_borders[0]);
1963 av_freep(&hx->s.obmc_scratchpad);
1967 static void init_dequant8_coeff_table(H264Context *h){
1969 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1970 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1971 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1973 for(i=0; i<2; i++ ){
1974 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1975 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1979 for(q=0; q<52; q++){
1980 int shift = div6[q];
1983 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1984 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1985 h->pps.scaling_matrix8[i][x]) << shift;
1990 static void init_dequant4_coeff_table(H264Context *h){
1992 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1993 for(i=0; i<6; i++ ){
1994 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1996 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1997 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2004 for(q=0; q<52; q++){
2005 int shift = div6[q] + 2;
2008 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2009 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2010 h->pps.scaling_matrix4[i][x]) << shift;
2015 static void init_dequant_tables(H264Context *h){
2017 init_dequant4_coeff_table(h);
2018 if(h->pps.transform_8x8_mode)
2019 init_dequant8_coeff_table(h);
2020 if(h->sps.transform_bypass){
2023 h->dequant4_coeff[i][0][x] = 1<<6;
2024 if(h->pps.transform_8x8_mode)
2027 h->dequant8_coeff[i][0][x] = 1<<6;
2034 * needs width/height
2036 static int alloc_tables(H264Context *h){
2037 MpegEncContext * const s = &h->s;
2038 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2041 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2043 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2044 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2045 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2047 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2048 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2049 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2050 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2052 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2053 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2055 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2056 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2057 for(y=0; y<s->mb_height; y++){
2058 for(x=0; x<s->mb_width; x++){
2059 const int mb_xy= x + y*s->mb_stride;
2060 const int b_xy = 4*x + 4*y*h->b_stride;
2061 const int b8_xy= 2*x + 2*y*h->b8_stride;
2063 h->mb2b_xy [mb_xy]= b_xy;
2064 h->mb2b8_xy[mb_xy]= b8_xy;
2068 s->obmc_scratchpad = NULL;
2070 if(!h->dequant4_coeff[0])
2071 init_dequant_tables(h);
2080 * Mimic alloc_tables(), but for every context thread.
2082 static void clone_tables(H264Context *dst, H264Context *src){
2083 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2084 dst->non_zero_count = src->non_zero_count;
2085 dst->slice_table = src->slice_table;
2086 dst->cbp_table = src->cbp_table;
2087 dst->mb2b_xy = src->mb2b_xy;
2088 dst->mb2b8_xy = src->mb2b8_xy;
2089 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2090 dst->mvd_table[0] = src->mvd_table[0];
2091 dst->mvd_table[1] = src->mvd_table[1];
2092 dst->direct_table = src->direct_table;
2094 dst->s.obmc_scratchpad = NULL;
2095 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2100 * Allocate buffers which are not shared amongst multiple threads.
2102 static int context_init(H264Context *h){
2103 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2108 return -1; // free_tables will clean up for us
2111 static av_cold void common_init(H264Context *h){
2112 MpegEncContext * const s = &h->s;
2114 s->width = s->avctx->width;
2115 s->height = s->avctx->height;
2116 s->codec_id= s->avctx->codec->id;
2118 ff_h264_pred_init(&h->hpc, s->codec_id);
2120 h->dequant_coeff_pps= -1;
2121 s->unrestricted_mv=1;
2122 s->decode=1; //FIXME
2124 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2125 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2128 static av_cold int decode_init(AVCodecContext *avctx){
2129 H264Context *h= avctx->priv_data;
2130 MpegEncContext * const s = &h->s;
2132 MPV_decode_defaults(s);
2137 s->out_format = FMT_H264;
2138 s->workaround_bugs= avctx->workaround_bugs;
2141 // s->decode_mb= ff_h263_decode_mb;
2142 s->quarter_sample = 1;
2145 if(avctx->codec_id == CODEC_ID_SVQ3)
2146 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2148 avctx->pix_fmt= PIX_FMT_YUV420P;
2152 if(avctx->extradata_size > 0 && avctx->extradata &&
2153 *(char *)avctx->extradata == 1){
2160 h->thread_context[0] = h;
2161 h->outputed_poc = INT_MIN;
2162 h->prev_poc_msb= 1<<16;
2166 static int frame_start(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2170 if(MPV_frame_start(s, s->avctx) < 0)
2172 ff_er_frame_start(s);
2174 * MPV_frame_start uses pict_type to derive key_frame.
2175 * This is incorrect for H.264; IDR markings must be used.
2176 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2177 * See decode_nal_units().
2179 s->current_picture_ptr->key_frame= 0;
2181 assert(s->linesize && s->uvlinesize);
2183 for(i=0; i<16; i++){
2184 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2185 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2188 h->block_offset[16+i]=
2189 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2190 h->block_offset[24+16+i]=
2191 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2194 /* can't be in alloc_tables because linesize isn't known there.
2195 * FIXME: redo bipred weight to not require extra buffer? */
2196 for(i = 0; i < s->avctx->thread_count; i++)
2197 if(!h->thread_context[i]->s.obmc_scratchpad)
2198 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2200 /* some macroblocks will be accessed before they're available */
2201 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2202 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2204 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2206 // We mark the current picture as non-reference after allocating it, so
2207 // that if we break out due to an error it can be released automatically
2208 // in the next MPV_frame_start().
2209 // SVQ3 as well as most other codecs have only last/next/current and thus
2210 // get released even with set reference, besides SVQ3 and others do not
2211 // mark frames as reference later "naturally".
2212 if(s->codec_id != CODEC_ID_SVQ3)
2213 s->current_picture_ptr->reference= 0;
2215 s->current_picture_ptr->field_poc[0]=
2216 s->current_picture_ptr->field_poc[1]= INT_MAX;
2217 assert(s->current_picture_ptr->long_ref==0);
2222 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2223 MpegEncContext * const s = &h->s;
2232 src_cb -= uvlinesize;
2233 src_cr -= uvlinesize;
2235 if(!simple && FRAME_MBAFF){
2237 offset = MB_MBAFF ? 1 : 17;
2238 uvoffset= MB_MBAFF ? 1 : 9;
2240 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2242 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2243 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2244 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2249 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2250 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2251 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2252 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2258 top_idx = MB_MBAFF ? 0 : 1;
2260 step= MB_MBAFF ? 2 : 1;
2263 // There are two lines saved, the line above the the top macroblock of a pair,
2264 // and the line above the bottom macroblock
2265 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2266 for(i=1; i<17 - skiplast; i++){
2267 h->left_border[offset+i*step]= src_y[15+i* linesize];
2270 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2271 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2273 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2274 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2275 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2276 for(i=1; i<9 - skiplast; i++){
2277 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2278 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2280 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2281 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2285 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2286 MpegEncContext * const s = &h->s;
2297 if(!simple && FRAME_MBAFF){
2299 offset = MB_MBAFF ? 1 : 17;
2300 uvoffset= MB_MBAFF ? 1 : 9;
2304 top_idx = MB_MBAFF ? 0 : 1;
2306 step= MB_MBAFF ? 2 : 1;
2309 if(h->deblocking_filter == 2) {
2311 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2312 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2314 deblock_left = (s->mb_x > 0);
2315 deblock_top = (s->mb_y > !!MB_FIELD);
2318 src_y -= linesize + 1;
2319 src_cb -= uvlinesize + 1;
2320 src_cr -= uvlinesize + 1;
2322 #define XCHG(a,b,t,xchg)\
2329 for(i = !deblock_top; i<16; i++){
2330 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2332 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2336 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2337 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2338 if(s->mb_x+1 < s->mb_width){
2339 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2343 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2345 for(i = !deblock_top; i<8; i++){
2346 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2347 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2350 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2353 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2359 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2360 MpegEncContext * const s = &h->s;
2361 const int mb_x= s->mb_x;
2362 const int mb_y= s->mb_y;
2363 const int mb_xy= h->mb_xy;
2364 const int mb_type= s->current_picture.mb_type[mb_xy];
2365 uint8_t *dest_y, *dest_cb, *dest_cr;
2366 int linesize, uvlinesize /*dct_offset*/;
2368 int *block_offset = &h->block_offset[0];
2369 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2370 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2371 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2372 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2374 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2375 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2376 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2378 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2379 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2381 if (!simple && MB_FIELD) {
2382 linesize = h->mb_linesize = s->linesize * 2;
2383 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2384 block_offset = &h->block_offset[24];
2385 if(mb_y&1){ //FIXME move out of this function?
2386 dest_y -= s->linesize*15;
2387 dest_cb-= s->uvlinesize*7;
2388 dest_cr-= s->uvlinesize*7;
2392 for(list=0; list<h->list_count; list++){
2393 if(!USES_LIST(mb_type, list))
2395 if(IS_16X16(mb_type)){
2396 int8_t *ref = &h->ref_cache[list][scan8[0]];
2397 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2399 for(i=0; i<16; i+=4){
2400 int ref = h->ref_cache[list][scan8[i]];
2402 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2408 linesize = h->mb_linesize = s->linesize;
2409 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2410 // dct_offset = s->linesize * 16;
2413 if(transform_bypass){
2415 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2416 }else if(IS_8x8DCT(mb_type)){
2417 idct_dc_add = s->dsp.h264_idct8_dc_add;
2418 idct_add = s->dsp.h264_idct8_add;
2420 idct_dc_add = s->dsp.h264_idct_dc_add;
2421 idct_add = s->dsp.h264_idct_add;
2424 if (!simple && IS_INTRA_PCM(mb_type)) {
2425 for (i=0; i<16; i++) {
2426 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2428 for (i=0; i<8; i++) {
2429 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2430 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2433 if(IS_INTRA(mb_type)){
2434 if(h->deblocking_filter)
2435 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2437 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2438 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2439 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2442 if(IS_INTRA4x4(mb_type)){
2443 if(simple || !s->encoding){
2444 if(IS_8x8DCT(mb_type)){
2445 for(i=0; i<16; i+=4){
2446 uint8_t * const ptr= dest_y + block_offset[i];
2447 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2448 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2449 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2451 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2452 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2453 (h->topright_samples_available<<i)&0x4000, linesize);
2455 if(nnz == 1 && h->mb[i*16])
2456 idct_dc_add(ptr, h->mb + i*16, linesize);
2458 idct_add (ptr, h->mb + i*16, linesize);
2463 for(i=0; i<16; i++){
2464 uint8_t * const ptr= dest_y + block_offset[i];
2465 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2467 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2468 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2472 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2473 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2474 assert(mb_y || linesize <= block_offset[i]);
2475 if(!topright_avail){
2476 tr= ptr[3 - linesize]*0x01010101;
2477 topright= (uint8_t*) &tr;
2479 topright= ptr + 4 - linesize;
2483 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2484 nnz = h->non_zero_count_cache[ scan8[i] ];
2487 if(nnz == 1 && h->mb[i*16])
2488 idct_dc_add(ptr, h->mb + i*16, linesize);
2490 idct_add (ptr, h->mb + i*16, linesize);
2492 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2498 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2500 if(!transform_bypass)
2501 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2503 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2505 if(h->deblocking_filter)
2506 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2508 hl_motion(h, dest_y, dest_cb, dest_cr,
2509 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2510 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2511 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2515 if(!IS_INTRA4x4(mb_type)){
2517 if(IS_INTRA16x16(mb_type)){
2518 if(transform_bypass){
2519 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2520 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2522 for(i=0; i<16; i++){
2523 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2524 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2528 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2530 }else if(h->cbp&15){
2531 if(transform_bypass){
2532 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2533 for(i=0; i<16; i+=di){
2534 int nnz = h->non_zero_count_cache[ scan8[i] ];
2536 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2540 if(IS_8x8DCT(mb_type)){
2541 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2543 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2548 for(i=0; i<16; i++){
2549 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2550 uint8_t * const ptr= dest_y + block_offset[i];
2551 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2557 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2558 uint8_t *dest[2] = {dest_cb, dest_cr};
2559 if(transform_bypass){
2560 idct_add = idct_dc_add = s->dsp.add_pixels4;
2562 idct_add = s->dsp.h264_idct_add;
2563 idct_dc_add = s->dsp.h264_idct_dc_add;
2564 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2565 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2568 if(transform_bypass && IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2569 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2570 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2572 for(i=16; i<16+8; i++){
2573 if(h->non_zero_count_cache[ scan8[i] ])
2574 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2575 else if(h->mb[i*16])
2576 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2580 for(i=16; i<16+8; i++){
2581 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2582 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2583 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2589 if(h->deblocking_filter) {
2590 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2591 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2592 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2593 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2594 if (!simple && FRAME_MBAFF) {
2595 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2597 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2603 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2605 static void hl_decode_mb_simple(H264Context *h){
2606 hl_decode_mb_internal(h, 1);
2610 * Process a macroblock; this handles edge cases, such as interlacing.
2612 static void av_noinline hl_decode_mb_complex(H264Context *h){
2613 hl_decode_mb_internal(h, 0);
2616 static void hl_decode_mb(H264Context *h){
2617 MpegEncContext * const s = &h->s;
2618 const int mb_xy= h->mb_xy;
2619 const int mb_type= s->current_picture.mb_type[mb_xy];
2620 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2622 if(ENABLE_H264_ENCODER && !s->decode)
2626 hl_decode_mb_complex(h);
2627 else hl_decode_mb_simple(h);
2630 static void pic_as_field(Picture *pic, const int parity){
2632 for (i = 0; i < 4; ++i) {
2633 if (parity == PICT_BOTTOM_FIELD)
2634 pic->data[i] += pic->linesize[i];
2635 pic->reference = parity;
2636 pic->linesize[i] *= 2;
2638 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2641 static int split_field_copy(Picture *dest, Picture *src,
2642 int parity, int id_add){
2643 int match = !!(src->reference & parity);
2647 if(parity != PICT_FRAME){
2648 pic_as_field(dest, parity);
2650 dest->pic_id += id_add;
2657 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2661 while(i[0]<len || i[1]<len){
2662 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2664 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2667 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2668 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2671 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2672 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2679 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2684 best_poc= dir ? INT_MIN : INT_MAX;
2686 for(i=0; i<len; i++){
2687 const int poc= src[i]->poc;
2688 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2690 sorted[out_i]= src[i];
2693 if(best_poc == (dir ? INT_MIN : INT_MAX))
2695 limit= sorted[out_i++]->poc - dir;
2701 * fills the default_ref_list.
2703 static int fill_default_ref_list(H264Context *h){
2704 MpegEncContext * const s = &h->s;
2707 if(h->slice_type_nos==FF_B_TYPE){
2708 Picture *sorted[32];
2713 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2715 cur_poc= s->current_picture_ptr->poc;
2717 for(list= 0; list<2; list++){
2718 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2719 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2721 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2722 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2725 if(len < h->ref_count[list])
2726 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2730 if(lens[0] == lens[1] && lens[1] > 1){
2731 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2733 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2736 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2737 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2739 if(len < h->ref_count[0])
2740 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2743 for (i=0; i<h->ref_count[0]; i++) {
2744 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2746 if(h->slice_type_nos==FF_B_TYPE){
2747 for (i=0; i<h->ref_count[1]; i++) {
2748 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2755 static void print_short_term(H264Context *h);
2756 static void print_long_term(H264Context *h);
2759 * Extract structure information about the picture described by pic_num in
2760 * the current decoding context (frame or field). Note that pic_num is
2761 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2762 * @param pic_num picture number for which to extract structure information
2763 * @param structure one of PICT_XXX describing structure of picture
2765 * @return frame number (short term) or long term index of picture
2766 * described by pic_num
2768 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2769 MpegEncContext * const s = &h->s;
2771 *structure = s->picture_structure;
2774 /* opposite field */
2775 *structure ^= PICT_FRAME;
2782 static int decode_ref_pic_list_reordering(H264Context *h){
2783 MpegEncContext * const s = &h->s;
2784 int list, index, pic_structure;
2786 print_short_term(h);
2789 for(list=0; list<h->list_count; list++){
2790 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2792 if(get_bits1(&s->gb)){
2793 int pred= h->curr_pic_num;
2795 for(index=0; ; index++){
2796 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2797 unsigned int pic_id;
2799 Picture *ref = NULL;
2801 if(reordering_of_pic_nums_idc==3)
2804 if(index >= h->ref_count[list]){
2805 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2809 if(reordering_of_pic_nums_idc<3){
2810 if(reordering_of_pic_nums_idc<2){
2811 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2814 if(abs_diff_pic_num > h->max_pic_num){
2815 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2819 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2820 else pred+= abs_diff_pic_num;
2821 pred &= h->max_pic_num - 1;
2823 frame_num = pic_num_extract(h, pred, &pic_structure);
2825 for(i= h->short_ref_count-1; i>=0; i--){
2826 ref = h->short_ref[i];
2827 assert(ref->reference);
2828 assert(!ref->long_ref);
2830 ref->frame_num == frame_num &&
2831 (ref->reference & pic_structure)
2839 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2841 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2844 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2847 ref = h->long_ref[long_idx];
2848 assert(!(ref && !ref->reference));
2849 if(ref && (ref->reference & pic_structure)){
2850 ref->pic_id= pic_id;
2851 assert(ref->long_ref);
2859 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2860 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2862 for(i=index; i+1<h->ref_count[list]; i++){
2863 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2866 for(; i > index; i--){
2867 h->ref_list[list][i]= h->ref_list[list][i-1];
2869 h->ref_list[list][index]= *ref;
2871 pic_as_field(&h->ref_list[list][index], pic_structure);
2875 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2881 for(list=0; list<h->list_count; list++){
2882 for(index= 0; index < h->ref_count[list]; index++){
2883 if(!h->ref_list[list][index].data[0]){
2884 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2885 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2893 static void fill_mbaff_ref_list(H264Context *h){
2895 for(list=0; list<2; list++){ //FIXME try list_count
2896 for(i=0; i<h->ref_count[list]; i++){
2897 Picture *frame = &h->ref_list[list][i];
2898 Picture *field = &h->ref_list[list][16+2*i];
2901 field[0].linesize[j] <<= 1;
2902 field[0].reference = PICT_TOP_FIELD;
2903 field[0].poc= field[0].field_poc[0];
2904 field[1] = field[0];
2906 field[1].data[j] += frame->linesize[j];
2907 field[1].reference = PICT_BOTTOM_FIELD;
2908 field[1].poc= field[1].field_poc[1];
2910 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2911 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2913 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2914 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2918 for(j=0; j<h->ref_count[1]; j++){
2919 for(i=0; i<h->ref_count[0]; i++)
2920 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2921 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2922 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2926 static int pred_weight_table(H264Context *h){
2927 MpegEncContext * const s = &h->s;
2929 int luma_def, chroma_def;
2932 h->use_weight_chroma= 0;
2933 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2934 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2935 luma_def = 1<<h->luma_log2_weight_denom;
2936 chroma_def = 1<<h->chroma_log2_weight_denom;
2938 for(list=0; list<2; list++){
2939 for(i=0; i<h->ref_count[list]; i++){
2940 int luma_weight_flag, chroma_weight_flag;
2942 luma_weight_flag= get_bits1(&s->gb);
2943 if(luma_weight_flag){
2944 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2945 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2946 if( h->luma_weight[list][i] != luma_def
2947 || h->luma_offset[list][i] != 0)
2950 h->luma_weight[list][i]= luma_def;
2951 h->luma_offset[list][i]= 0;
2955 chroma_weight_flag= get_bits1(&s->gb);
2956 if(chroma_weight_flag){
2959 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2960 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2961 if( h->chroma_weight[list][i][j] != chroma_def
2962 || h->chroma_offset[list][i][j] != 0)
2963 h->use_weight_chroma= 1;
2968 h->chroma_weight[list][i][j]= chroma_def;
2969 h->chroma_offset[list][i][j]= 0;
2974 if(h->slice_type_nos != FF_B_TYPE) break;
2976 h->use_weight= h->use_weight || h->use_weight_chroma;
2980 static void implicit_weight_table(H264Context *h){
2981 MpegEncContext * const s = &h->s;
2983 int cur_poc = s->current_picture_ptr->poc;
2985 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2986 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2988 h->use_weight_chroma= 0;
2993 h->use_weight_chroma= 2;
2994 h->luma_log2_weight_denom= 5;
2995 h->chroma_log2_weight_denom= 5;
2997 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
2998 int poc0 = h->ref_list[0][ref0].poc;
2999 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3000 int poc1 = h->ref_list[1][ref1].poc;
3001 int td = av_clip(poc1 - poc0, -128, 127);
3003 int tb = av_clip(cur_poc - poc0, -128, 127);
3004 int tx = (16384 + (FFABS(td) >> 1)) / td;
3005 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3006 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3007 h->implicit_weight[ref0][ref1] = 32;
3009 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3011 h->implicit_weight[ref0][ref1] = 32;
3017 * Mark a picture as no longer needed for reference. The refmask
3018 * argument allows unreferencing of individual fields or the whole frame.
3019 * If the picture becomes entirely unreferenced, but is being held for
3020 * display purposes, it is marked as such.
3021 * @param refmask mask of fields to unreference; the mask is bitwise
3022 * anded with the reference marking of pic
3023 * @return non-zero if pic becomes entirely unreferenced (except possibly
3024 * for display purposes) zero if one of the fields remains in
3027 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3029 if (pic->reference &= refmask) {
3032 for(i = 0; h->delayed_pic[i]; i++)
3033 if(pic == h->delayed_pic[i]){
3034 pic->reference=DELAYED_PIC_REF;
3042 * instantaneous decoder refresh.
3044 static void idr(H264Context *h){
3047 for(i=0; i<16; i++){
3048 remove_long(h, i, 0);
3050 assert(h->long_ref_count==0);
3052 for(i=0; i<h->short_ref_count; i++){
3053 unreference_pic(h, h->short_ref[i], 0);
3054 h->short_ref[i]= NULL;
3056 h->short_ref_count=0;
3057 h->prev_frame_num= 0;
3058 h->prev_frame_num_offset= 0;
3063 /* forget old pics after a seek */
3064 static void flush_dpb(AVCodecContext *avctx){
3065 H264Context *h= avctx->priv_data;
3067 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3068 if(h->delayed_pic[i])
3069 h->delayed_pic[i]->reference= 0;
3070 h->delayed_pic[i]= NULL;
3072 h->outputed_poc= INT_MIN;
3074 if(h->s.current_picture_ptr)
3075 h->s.current_picture_ptr->reference= 0;
3076 h->s.first_field= 0;
3077 ff_mpeg_flush(avctx);
3081 * Find a Picture in the short term reference list by frame number.
3082 * @param frame_num frame number to search for
3083 * @param idx the index into h->short_ref where returned picture is found
3084 * undefined if no picture found.
3085 * @return pointer to the found picture, or NULL if no pic with the provided
3086 * frame number is found
3088 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3089 MpegEncContext * const s = &h->s;
3092 for(i=0; i<h->short_ref_count; i++){
3093 Picture *pic= h->short_ref[i];
3094 if(s->avctx->debug&FF_DEBUG_MMCO)
3095 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3096 if(pic->frame_num == frame_num) {
3105 * Remove a picture from the short term reference list by its index in
3106 * that list. This does no checking on the provided index; it is assumed
3107 * to be valid. Other list entries are shifted down.
3108 * @param i index into h->short_ref of picture to remove.
3110 static void remove_short_at_index(H264Context *h, int i){
3111 assert(i >= 0 && i < h->short_ref_count);
3112 h->short_ref[i]= NULL;
3113 if (--h->short_ref_count)
3114 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3119 * @return the removed picture or NULL if an error occurs
3121 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3122 MpegEncContext * const s = &h->s;
3126 if(s->avctx->debug&FF_DEBUG_MMCO)
3127 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3129 pic = find_short(h, frame_num, &i);
3131 if(unreference_pic(h, pic, ref_mask))
3132 remove_short_at_index(h, i);
3139 * Remove a picture from the long term reference list by its index in
3141 * @return the removed picture or NULL if an error occurs
3143 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3146 pic= h->long_ref[i];
3148 if(unreference_pic(h, pic, ref_mask)){
3149 assert(h->long_ref[i]->long_ref == 1);
3150 h->long_ref[i]->long_ref= 0;
3151 h->long_ref[i]= NULL;
3152 h->long_ref_count--;
3160 * print short term list
3162 static void print_short_term(H264Context *h) {
3164 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3165 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3166 for(i=0; i<h->short_ref_count; i++){
3167 Picture *pic= h->short_ref[i];
3168 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3174 * print long term list
3176 static void print_long_term(H264Context *h) {
3178 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3179 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3180 for(i = 0; i < 16; i++){
3181 Picture *pic= h->long_ref[i];
3183 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3190 * Executes the reference picture marking (memory management control operations).
3192 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3193 MpegEncContext * const s = &h->s;
3195 int current_ref_assigned=0;
3198 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3199 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3201 for(i=0; i<mmco_count; i++){
3202 int structure, frame_num;
3203 if(s->avctx->debug&FF_DEBUG_MMCO)
3204 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3206 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3207 || mmco[i].opcode == MMCO_SHORT2LONG){
3208 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3209 pic = find_short(h, frame_num, &j);
3211 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3212 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3213 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3218 switch(mmco[i].opcode){
3219 case MMCO_SHORT2UNUSED:
3220 if(s->avctx->debug&FF_DEBUG_MMCO)
3221 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3222 remove_short(h, frame_num, structure ^ PICT_FRAME);
3224 case MMCO_SHORT2LONG:
3225 if (h->long_ref[mmco[i].long_arg] != pic)
3226 remove_long(h, mmco[i].long_arg, 0);
3228 remove_short_at_index(h, j);
3229 h->long_ref[ mmco[i].long_arg ]= pic;
3230 if (h->long_ref[ mmco[i].long_arg ]){
3231 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3232 h->long_ref_count++;
3235 case MMCO_LONG2UNUSED:
3236 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3237 pic = h->long_ref[j];
3239 remove_long(h, j, structure ^ PICT_FRAME);
3240 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3241 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3244 // Comment below left from previous code as it is an interresting note.
3245 /* First field in pair is in short term list or
3246 * at a different long term index.
3247 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3248 * Report the problem and keep the pair where it is,
3249 * and mark this field valid.
3252 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3253 remove_long(h, mmco[i].long_arg, 0);
3255 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3256 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3257 h->long_ref_count++;
3260 s->current_picture_ptr->reference |= s->picture_structure;
3261 current_ref_assigned=1;
3263 case MMCO_SET_MAX_LONG:
3264 assert(mmco[i].long_arg <= 16);
3265 // just remove the long term which index is greater than new max
3266 for(j = mmco[i].long_arg; j<16; j++){
3267 remove_long(h, j, 0);
3271 while(h->short_ref_count){
3272 remove_short(h, h->short_ref[0]->frame_num, 0);
3274 for(j = 0; j < 16; j++) {
3275 remove_long(h, j, 0);
3277 s->current_picture_ptr->poc=
3278 s->current_picture_ptr->field_poc[0]=
3279 s->current_picture_ptr->field_poc[1]=
3283 s->current_picture_ptr->frame_num= 0;
3289 if (!current_ref_assigned) {
3290 /* Second field of complementary field pair; the first field of
3291 * which is already referenced. If short referenced, it
3292 * should be first entry in short_ref. If not, it must exist
3293 * in long_ref; trying to put it on the short list here is an
3294 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3296 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3297 /* Just mark the second field valid */
3298 s->current_picture_ptr->reference = PICT_FRAME;
3299 } else if (s->current_picture_ptr->long_ref) {
3300 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3301 "assignment for second field "
3302 "in complementary field pair "
3303 "(first field is long term)\n");
3305 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3307 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3310 if(h->short_ref_count)
3311 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3313 h->short_ref[0]= s->current_picture_ptr;
3314 h->short_ref_count++;
3315 s->current_picture_ptr->reference |= s->picture_structure;
3319 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3321 /* We have too many reference frames, probably due to corrupted
3322 * stream. Need to discard one frame. Prevents overrun of the
3323 * short_ref and long_ref buffers.
3325 av_log(h->s.avctx, AV_LOG_ERROR,
3326 "number of reference frames exceeds max (probably "
3327 "corrupt input), discarding one\n");
3329 if (h->long_ref_count && !h->short_ref_count) {
3330 for (i = 0; i < 16; ++i)
3335 remove_long(h, i, 0);
3337 pic = h->short_ref[h->short_ref_count - 1];
3338 remove_short(h, pic->frame_num, 0);
3342 print_short_term(h);
3347 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3348 MpegEncContext * const s = &h->s;
3352 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3353 s->broken_link= get_bits1(gb) -1;
3355 h->mmco[0].opcode= MMCO_LONG;
3356 h->mmco[0].long_arg= 0;
3360 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3361 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3362 MMCOOpcode opcode= get_ue_golomb(gb);
3364 h->mmco[i].opcode= opcode;
3365 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3366 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3367 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3368 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3372 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3373 unsigned int long_arg= get_ue_golomb(gb);
3374 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3375 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3378 h->mmco[i].long_arg= long_arg;
3381 if(opcode > (unsigned)MMCO_LONG){
3382 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3385 if(opcode == MMCO_END)
3390 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3392 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3393 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3394 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3395 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3397 if (FIELD_PICTURE) {
3398 h->mmco[0].short_pic_num *= 2;
3399 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3400 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3410 static int init_poc(H264Context *h){
3411 MpegEncContext * const s = &h->s;
3412 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3414 Picture *cur = s->current_picture_ptr;
3416 h->frame_num_offset= h->prev_frame_num_offset;
3417 if(h->frame_num < h->prev_frame_num)
3418 h->frame_num_offset += max_frame_num;
3420 if(h->sps.poc_type==0){
3421 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3423 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3424 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3425 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3426 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3428 h->poc_msb = h->prev_poc_msb;
3429 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3431 field_poc[1] = h->poc_msb + h->poc_lsb;
3432 if(s->picture_structure == PICT_FRAME)
3433 field_poc[1] += h->delta_poc_bottom;
3434 }else if(h->sps.poc_type==1){
3435 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3438 if(h->sps.poc_cycle_length != 0)
3439 abs_frame_num = h->frame_num_offset + h->frame_num;
3443 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3446 expected_delta_per_poc_cycle = 0;
3447 for(i=0; i < h->sps.poc_cycle_length; i++)
3448 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3450 if(abs_frame_num > 0){
3451 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3452 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3454 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3455 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3456 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3460 if(h->nal_ref_idc == 0)
3461 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3463 field_poc[0] = expectedpoc + h->delta_poc[0];
3464 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3466 if(s->picture_structure == PICT_FRAME)
3467 field_poc[1] += h->delta_poc[1];
3469 int poc= 2*(h->frame_num_offset + h->frame_num);
3478 if(s->picture_structure != PICT_BOTTOM_FIELD)
3479 s->current_picture_ptr->field_poc[0]= field_poc[0];
3480 if(s->picture_structure != PICT_TOP_FIELD)
3481 s->current_picture_ptr->field_poc[1]= field_poc[1];
3482 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3489 * initialize scan tables
3491 static void init_scan_tables(H264Context *h){
3492 MpegEncContext * const s = &h->s;
3494 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3495 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3496 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3498 for(i=0; i<16; i++){
3499 #define T(x) (x>>2) | ((x<<2) & 0xF)
3500 h->zigzag_scan[i] = T(zigzag_scan[i]);
3501 h-> field_scan[i] = T( field_scan[i]);
3505 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3506 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3507 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3508 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3509 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3511 for(i=0; i<64; i++){
3512 #define T(x) (x>>3) | ((x&7)<<3)
3513 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3514 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3515 h->field_scan8x8[i] = T(field_scan8x8[i]);
3516 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3520 if(h->sps.transform_bypass){ //FIXME same ugly
3521 h->zigzag_scan_q0 = zigzag_scan;
3522 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3523 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3524 h->field_scan_q0 = field_scan;
3525 h->field_scan8x8_q0 = field_scan8x8;
3526 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3528 h->zigzag_scan_q0 = h->zigzag_scan;
3529 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3530 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3531 h->field_scan_q0 = h->field_scan;
3532 h->field_scan8x8_q0 = h->field_scan8x8;
3533 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3538 * Replicates H264 "master" context to thread contexts.
3540 static void clone_slice(H264Context *dst, H264Context *src)
3542 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3543 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3544 dst->s.current_picture = src->s.current_picture;
3545 dst->s.linesize = src->s.linesize;
3546 dst->s.uvlinesize = src->s.uvlinesize;
3547 dst->s.first_field = src->s.first_field;
3549 dst->prev_poc_msb = src->prev_poc_msb;
3550 dst->prev_poc_lsb = src->prev_poc_lsb;
3551 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3552 dst->prev_frame_num = src->prev_frame_num;
3553 dst->short_ref_count = src->short_ref_count;
3555 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3556 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3557 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3558 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3560 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3561 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3565 * decodes a slice header.
3566 * This will also call MPV_common_init() and frame_start() as needed.
3568 * @param h h264context
3569 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3571 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3573 static int decode_slice_header(H264Context *h, H264Context *h0){
3574 MpegEncContext * const s = &h->s;
3575 MpegEncContext * const s0 = &h0->s;
3576 unsigned int first_mb_in_slice;
3577 unsigned int pps_id;
3578 int num_ref_idx_active_override_flag;
3579 unsigned int slice_type, tmp, i, j;
3580 int default_ref_list_done = 0;
3581 int last_pic_structure;
3583 s->dropable= h->nal_ref_idc == 0;
3585 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3586 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3587 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3589 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3590 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3593 first_mb_in_slice= get_ue_golomb(&s->gb);
3595 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3596 h0->current_slice = 0;
3597 if (!s0->first_field)
3598 s->current_picture_ptr= NULL;
3601 slice_type= get_ue_golomb(&s->gb);
3603 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3608 h->slice_type_fixed=1;
3610 h->slice_type_fixed=0;
3612 slice_type= golomb_to_pict_type[ slice_type ];
3613 if (slice_type == FF_I_TYPE
3614 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3615 default_ref_list_done = 1;
3617 h->slice_type= slice_type;
3618 h->slice_type_nos= slice_type & 3;
3620 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3621 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3622 av_log(h->s.avctx, AV_LOG_ERROR,
3623 "B picture before any references, skipping\n");
3627 pps_id= get_ue_golomb(&s->gb);
3628 if(pps_id>=MAX_PPS_COUNT){
3629 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3632 if(!h0->pps_buffers[pps_id]) {
3633 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3636 h->pps= *h0->pps_buffers[pps_id];
3638 if(!h0->sps_buffers[h->pps.sps_id]) {
3639 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3642 h->sps = *h0->sps_buffers[h->pps.sps_id];
3644 if(h == h0 && h->dequant_coeff_pps != pps_id){
3645 h->dequant_coeff_pps = pps_id;
3646 init_dequant_tables(h);
3649 s->mb_width= h->sps.mb_width;
3650 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3652 h->b_stride= s->mb_width*4;
3653 h->b8_stride= s->mb_width*2;
3655 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3656 if(h->sps.frame_mbs_only_flag)
3657 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3659 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3661 if (s->context_initialized
3662 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3664 return -1; // width / height changed during parallelized decoding
3666 flush_dpb(s->avctx);
3669 if (!s->context_initialized) {
3671 return -1; // we cant (re-)initialize context during parallel decoding
3672 if (MPV_common_init(s) < 0)
3676 init_scan_tables(h);
3679 for(i = 1; i < s->avctx->thread_count; i++) {
3681 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3682 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3683 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3686 init_scan_tables(c);
3690 for(i = 0; i < s->avctx->thread_count; i++)
3691 if(context_init(h->thread_context[i]) < 0)
3694 s->avctx->width = s->width;
3695 s->avctx->height = s->height;
3696 s->avctx->sample_aspect_ratio= h->sps.sar;
3697 if(!s->avctx->sample_aspect_ratio.den)
3698 s->avctx->sample_aspect_ratio.den = 1;
3700 if(h->sps.timing_info_present_flag){
3701 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3702 if(h->x264_build > 0 && h->x264_build < 44)
3703 s->avctx->time_base.den *= 2;
3704 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3705 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3709 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3712 h->mb_aff_frame = 0;
3713 last_pic_structure = s0->picture_structure;
3714 if(h->sps.frame_mbs_only_flag){
3715 s->picture_structure= PICT_FRAME;
3717 if(get_bits1(&s->gb)) { //field_pic_flag
3718 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3720 s->picture_structure= PICT_FRAME;
3721 h->mb_aff_frame = h->sps.mb_aff;
3724 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3726 if(h0->current_slice == 0){
3727 while(h->frame_num != h->prev_frame_num &&
3728 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3729 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3731 h->prev_frame_num++;
3732 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3733 s->current_picture_ptr->frame_num= h->prev_frame_num;
3734 execute_ref_pic_marking(h, NULL, 0);
3737 /* See if we have a decoded first field looking for a pair... */
3738 if (s0->first_field) {
3739 assert(s0->current_picture_ptr);
3740 assert(s0->current_picture_ptr->data[0]);
3741 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3743 /* figure out if we have a complementary field pair */
3744 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3746 * Previous field is unmatched. Don't display it, but let it
3747 * remain for reference if marked as such.
3749 s0->current_picture_ptr = NULL;
3750 s0->first_field = FIELD_PICTURE;
3753 if (h->nal_ref_idc &&
3754 s0->current_picture_ptr->reference &&
3755 s0->current_picture_ptr->frame_num != h->frame_num) {
3757 * This and previous field were reference, but had
3758 * different frame_nums. Consider this field first in
3759 * pair. Throw away previous field except for reference
3762 s0->first_field = 1;
3763 s0->current_picture_ptr = NULL;
3766 /* Second field in complementary pair */
3767 s0->first_field = 0;
3772 /* Frame or first field in a potentially complementary pair */
3773 assert(!s0->current_picture_ptr);
3774 s0->first_field = FIELD_PICTURE;
3777 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3778 s0->first_field = 0;
3785 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3787 assert(s->mb_num == s->mb_width * s->mb_height);
3788 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3789 first_mb_in_slice >= s->mb_num){
3790 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3793 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3794 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3795 if (s->picture_structure == PICT_BOTTOM_FIELD)
3796 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3797 assert(s->mb_y < s->mb_height);
3799 if(s->picture_structure==PICT_FRAME){
3800 h->curr_pic_num= h->frame_num;
3801 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3803 h->curr_pic_num= 2*h->frame_num + 1;
3804 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3807 if(h->nal_unit_type == NAL_IDR_SLICE){
3808 get_ue_golomb(&s->gb); /* idr_pic_id */
3811 if(h->sps.poc_type==0){
3812 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3814 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3815 h->delta_poc_bottom= get_se_golomb(&s->gb);
3819 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3820 h->delta_poc[0]= get_se_golomb(&s->gb);
3822 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3823 h->delta_poc[1]= get_se_golomb(&s->gb);
3828 if(h->pps.redundant_pic_cnt_present){
3829 h->redundant_pic_count= get_ue_golomb(&s->gb);
3832 //set defaults, might be overridden a few lines later
3833 h->ref_count[0]= h->pps.ref_count[0];
3834 h->ref_count[1]= h->pps.ref_count[1];
3836 if(h->slice_type_nos != FF_I_TYPE){
3837 if(h->slice_type_nos == FF_B_TYPE){
3838 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3840 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3842 if(num_ref_idx_active_override_flag){
3843 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3844 if(h->slice_type_nos==FF_B_TYPE)
3845 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3847 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3848 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3849 h->ref_count[0]= h->ref_count[1]= 1;
3853 if(h->slice_type_nos == FF_B_TYPE)
3860 if(!default_ref_list_done){
3861 fill_default_ref_list(h);
3864 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3867 if(h->slice_type_nos!=FF_I_TYPE){
3868 s->last_picture_ptr= &h->ref_list[0][0];
3869 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3871 if(h->slice_type_nos==FF_B_TYPE){
3872 s->next_picture_ptr= &h->ref_list[1][0];
3873 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3876 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3877 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3878 pred_weight_table(h);
3879 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3880 implicit_weight_table(h);
3885 decode_ref_pic_marking(h0, &s->gb);
3888 fill_mbaff_ref_list(h);
3890 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3891 direct_dist_scale_factor(h);
3892 direct_ref_list_init(h);
3894 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3895 tmp = get_ue_golomb(&s->gb);
3897 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3900 h->cabac_init_idc= tmp;
3903 h->last_qscale_diff = 0;
3904 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3906 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3910 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3911 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3912 //FIXME qscale / qp ... stuff
3913 if(h->slice_type == FF_SP_TYPE){
3914 get_bits1(&s->gb); /* sp_for_switch_flag */
3916 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3917 get_se_golomb(&s->gb); /* slice_qs_delta */
3920 h->deblocking_filter = 1;
3921 h->slice_alpha_c0_offset = 0;
3922 h->slice_beta_offset = 0;
3923 if( h->pps.deblocking_filter_parameters_present ) {
3924 tmp= get_ue_golomb(&s->gb);
3926 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3929 h->deblocking_filter= tmp;
3930 if(h->deblocking_filter < 2)
3931 h->deblocking_filter^= 1; // 1<->0
3933 if( h->deblocking_filter ) {
3934 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3935 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3939 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3940 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3941 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3942 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3943 h->deblocking_filter= 0;
3945 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3946 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3947 /* Cheat slightly for speed:
3948 Do not bother to deblock across slices. */
3949 h->deblocking_filter = 2;
3951 h0->max_contexts = 1;
3952 if(!h0->single_decode_warning) {
3953 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3954 h0->single_decode_warning = 1;
3957 return 1; // deblocking switched inside frame
3962 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3963 slice_group_change_cycle= get_bits(&s->gb, ?);
3966 h0->last_slice_type = slice_type;
3967 h->slice_num = ++h0->current_slice;
3968 if(h->slice_num >= MAX_SLICES){
3969 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3973 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3977 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3978 +(h->ref_list[j][i].reference&3);
3981 for(i=16; i<48; i++)
3982 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3983 +(h->ref_list[j][i].reference&3);
3986 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3987 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3989 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3990 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3992 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3994 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3995 pps_id, h->frame_num,
3996 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3997 h->ref_count[0], h->ref_count[1],
3999 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4001 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4002 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4012 static inline int get_level_prefix(GetBitContext *gb){
4016 OPEN_READER(re, gb);
4017 UPDATE_CACHE(re, gb);
4018 buf=GET_CACHE(re, gb);
4020 log= 32 - av_log2(buf);
4022 print_bin(buf>>(32-log), log);
4023 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4026 LAST_SKIP_BITS(re, gb, log);
4027 CLOSE_READER(re, gb);
4032 static inline int get_dct8x8_allowed(H264Context *h){
4035 if(!IS_SUB_8X8(h->sub_mb_type[i])
4036 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4043 * decodes a residual block.
4044 * @param n block index
4045 * @param scantable scantable
4046 * @param max_coeff number of coefficients in the block
4047 * @return <0 if an error occurred
4049 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4050 MpegEncContext * const s = &h->s;
4051 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4053 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4055 //FIXME put trailing_onex into the context
4057 if(n == CHROMA_DC_BLOCK_INDEX){
4058 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4059 total_coeff= coeff_token>>2;
4061 if(n == LUMA_DC_BLOCK_INDEX){
4062 total_coeff= pred_non_zero_count(h, 0);
4063 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4064 total_coeff= coeff_token>>2;
4066 total_coeff= pred_non_zero_count(h, n);
4067 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4068 total_coeff= coeff_token>>2;
4069 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4073 //FIXME set last_non_zero?
4077 if(total_coeff > (unsigned)max_coeff) {
4078 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4082 trailing_ones= coeff_token&3;
4083 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4084 assert(total_coeff<=16);
4086 for(i=0; i<trailing_ones; i++){
4087 level[i]= 1 - 2*get_bits1(gb);
4091 int level_code, mask;
4092 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4093 int prefix= get_level_prefix(gb);
4095 //first coefficient has suffix_length equal to 0 or 1
4096 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4098 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4100 level_code= (prefix<<suffix_length); //part
4101 }else if(prefix==14){
4103 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4105 level_code= prefix + get_bits(gb, 4); //part
4107 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4108 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4110 level_code += (1<<(prefix-3))-4096;
4113 if(trailing_ones < 3) level_code += 2;
4118 mask= -(level_code&1);
4119 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4122 //remaining coefficients have suffix_length > 0
4123 for(;i<total_coeff;i++) {
4124 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4125 prefix = get_level_prefix(gb);
4127 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4129 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4131 level_code += (1<<(prefix-3))-4096;
4133 mask= -(level_code&1);
4134 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4135 if(level_code > suffix_limit[suffix_length])
4140 if(total_coeff == max_coeff)
4143 if(n == CHROMA_DC_BLOCK_INDEX)
4144 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4146 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4149 coeff_num = zeros_left + total_coeff - 1;
4150 j = scantable[coeff_num];
4152 block[j] = level[0];
4153 for(i=1;i<total_coeff;i++) {
4156 else if(zeros_left < 7){
4157 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4159 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4161 zeros_left -= run_before;
4162 coeff_num -= 1 + run_before;
4163 j= scantable[ coeff_num ];
4168 block[j] = (level[0] * qmul[j] + 32)>>6;
4169 for(i=1;i<total_coeff;i++) {
4172 else if(zeros_left < 7){
4173 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4175 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4177 zeros_left -= run_before;
4178 coeff_num -= 1 + run_before;
4179 j= scantable[ coeff_num ];
4181 block[j]= (level[i] * qmul[j] + 32)>>6;
4186 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4193 static void predict_field_decoding_flag(H264Context *h){
4194 MpegEncContext * const s = &h->s;
4195 const int mb_xy= h->mb_xy;
4196 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4197 ? s->current_picture.mb_type[mb_xy-1]
4198 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4199 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4201 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4205 * decodes a P_SKIP or B_SKIP macroblock
4207 static void decode_mb_skip(H264Context *h){
4208 MpegEncContext * const s = &h->s;
4209 const int mb_xy= h->mb_xy;
4212 memset(h->non_zero_count[mb_xy], 0, 16);
4213 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4216 mb_type|= MB_TYPE_INTERLACED;
4218 if( h->slice_type_nos == FF_B_TYPE )
4220 // just for fill_caches. pred_direct_motion will set the real mb_type
4221 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4223 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4224 pred_direct_motion(h, &mb_type);
4225 mb_type|= MB_TYPE_SKIP;
4230 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4232 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4233 pred_pskip_motion(h, &mx, &my);
4234 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4235 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4238 write_back_motion(h, mb_type);
4239 s->current_picture.mb_type[mb_xy]= mb_type;
4240 s->current_picture.qscale_table[mb_xy]= s->qscale;
4241 h->slice_table[ mb_xy ]= h->slice_num;
4242 h->prev_mb_skipped= 1;
4246 * decodes a macroblock
4247 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4249 static int decode_mb_cavlc(H264Context *h){
4250 MpegEncContext * const s = &h->s;
4252 int partition_count;
4253 unsigned int mb_type, cbp;
4254 int dct8x8_allowed= h->pps.transform_8x8_mode;
4256 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4258 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4260 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4261 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4263 if(h->slice_type_nos != FF_I_TYPE){
4264 if(s->mb_skip_run==-1)
4265 s->mb_skip_run= get_ue_golomb(&s->gb);
4267 if (s->mb_skip_run--) {
4268 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4269 if(s->mb_skip_run==0)
4270 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4272 predict_field_decoding_flag(h);
4279 if( (s->mb_y&1) == 0 )
4280 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4283 h->prev_mb_skipped= 0;
4285 mb_type= get_ue_golomb(&s->gb);
4286 if(h->slice_type_nos == FF_B_TYPE){
4288 partition_count= b_mb_type_info[mb_type].partition_count;
4289 mb_type= b_mb_type_info[mb_type].type;
4292 goto decode_intra_mb;
4294 }else if(h->slice_type_nos == FF_P_TYPE){
4296 partition_count= p_mb_type_info[mb_type].partition_count;
4297 mb_type= p_mb_type_info[mb_type].type;
4300 goto decode_intra_mb;
4303 assert(h->slice_type_nos == FF_I_TYPE);
4304 if(h->slice_type == FF_SI_TYPE && mb_type)
4308 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4312 cbp= i_mb_type_info[mb_type].cbp;
4313 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4314 mb_type= i_mb_type_info[mb_type].type;
4318 mb_type |= MB_TYPE_INTERLACED;
4320 h->slice_table[ mb_xy ]= h->slice_num;
4322 if(IS_INTRA_PCM(mb_type)){
4325 // We assume these blocks are very rare so we do not optimize it.
4326 align_get_bits(&s->gb);
4328 // The pixels are stored in the same order as levels in h->mb array.
4329 for(x=0; x < (CHROMA ? 384 : 256); x++){
4330 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4333 // In deblocking, the quantizer is 0
4334 s->current_picture.qscale_table[mb_xy]= 0;
4335 // All coeffs are present
4336 memset(h->non_zero_count[mb_xy], 16, 16);
4338 s->current_picture.mb_type[mb_xy]= mb_type;
4343 h->ref_count[0] <<= 1;
4344 h->ref_count[1] <<= 1;
4347 fill_caches(h, mb_type, 0);
4350 if(IS_INTRA(mb_type)){
4352 // init_top_left_availability(h);
4353 if(IS_INTRA4x4(mb_type)){
4356 if(dct8x8_allowed && get_bits1(&s->gb)){
4357 mb_type |= MB_TYPE_8x8DCT;
4361 // fill_intra4x4_pred_table(h);
4362 for(i=0; i<16; i+=di){
4363 int mode= pred_intra_mode(h, i);
4365 if(!get_bits1(&s->gb)){
4366 const int rem_mode= get_bits(&s->gb, 3);
4367 mode = rem_mode + (rem_mode >= mode);
4371 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4373 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4375 write_back_intra_pred_mode(h);
4376 if( check_intra4x4_pred_mode(h) < 0)
4379 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4380 if(h->intra16x16_pred_mode < 0)
4384 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4387 h->chroma_pred_mode= pred_mode;
4389 }else if(partition_count==4){
4390 int i, j, sub_partition_count[4], list, ref[2][4];
4392 if(h->slice_type_nos == FF_B_TYPE){
4394 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4395 if(h->sub_mb_type[i] >=13){
4396 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4399 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4400 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4402 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4403 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4404 pred_direct_motion(h, &mb_type);
4405 h->ref_cache[0][scan8[4]] =
4406 h->ref_cache[1][scan8[4]] =
4407 h->ref_cache[0][scan8[12]] =
4408 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4411 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4413 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4414 if(h->sub_mb_type[i] >=4){
4415 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4418 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4419 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4423 for(list=0; list<h->list_count; list++){
4424 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4426 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4427 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4428 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4430 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4442 dct8x8_allowed = get_dct8x8_allowed(h);
4444 for(list=0; list<h->list_count; list++){
4446 if(IS_DIRECT(h->sub_mb_type[i])) {
4447 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4450 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4451 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4453 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4454 const int sub_mb_type= h->sub_mb_type[i];
4455 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4456 for(j=0; j<sub_partition_count[i]; j++){
4458 const int index= 4*i + block_width*j;
4459 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4460 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4461 mx += get_se_golomb(&s->gb);
4462 my += get_se_golomb(&s->gb);
4463 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4465 if(IS_SUB_8X8(sub_mb_type)){
4467 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4469 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4470 }else if(IS_SUB_8X4(sub_mb_type)){
4471 mv_cache[ 1 ][0]= mx;
4472 mv_cache[ 1 ][1]= my;
4473 }else if(IS_SUB_4X8(sub_mb_type)){
4474 mv_cache[ 8 ][0]= mx;
4475 mv_cache[ 8 ][1]= my;
4477 mv_cache[ 0 ][0]= mx;
4478 mv_cache[ 0 ][1]= my;
4481 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4487 }else if(IS_DIRECT(mb_type)){
4488 pred_direct_motion(h, &mb_type);
4489 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4491 int list, mx, my, i;
4492 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4493 if(IS_16X16(mb_type)){
4494 for(list=0; list<h->list_count; list++){
4496 if(IS_DIR(mb_type, 0, list)){
4497 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4498 if(val >= h->ref_count[list]){
4499 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4503 val= LIST_NOT_USED&0xFF;
4504 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4506 for(list=0; list<h->list_count; list++){
4508 if(IS_DIR(mb_type, 0, list)){
4509 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4510 mx += get_se_golomb(&s->gb);
4511 my += get_se_golomb(&s->gb);
4512 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4514 val= pack16to32(mx,my);
4517 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4520 else if(IS_16X8(mb_type)){
4521 for(list=0; list<h->list_count; list++){
4524 if(IS_DIR(mb_type, i, list)){
4525 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4526 if(val >= h->ref_count[list]){
4527 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4531 val= LIST_NOT_USED&0xFF;
4532 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4535 for(list=0; list<h->list_count; list++){
4538 if(IS_DIR(mb_type, i, list)){
4539 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4540 mx += get_se_golomb(&s->gb);
4541 my += get_se_golomb(&s->gb);
4542 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4544 val= pack16to32(mx,my);
4547 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4551 assert(IS_8X16(mb_type));
4552 for(list=0; list<h->list_count; list++){
4555 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4556 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4557 if(val >= h->ref_count[list]){
4558 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4562 val= LIST_NOT_USED&0xFF;
4563 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4566 for(list=0; list<h->list_count; list++){
4569 if(IS_DIR(mb_type, i, list)){
4570 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4571 mx += get_se_golomb(&s->gb);
4572 my += get_se_golomb(&s->gb);
4573 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4575 val= pack16to32(mx,my);
4578 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4584 if(IS_INTER(mb_type))
4585 write_back_motion(h, mb_type);
4587 if(!IS_INTRA16x16(mb_type)){
4588 cbp= get_ue_golomb(&s->gb);
4590 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4595 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4596 else cbp= golomb_to_inter_cbp [cbp];
4598 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4599 else cbp= golomb_to_inter_cbp_gray[cbp];
4604 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4605 if(get_bits1(&s->gb)){
4606 mb_type |= MB_TYPE_8x8DCT;
4607 h->cbp_table[mb_xy]= cbp;
4610 s->current_picture.mb_type[mb_xy]= mb_type;
4612 if(cbp || IS_INTRA16x16(mb_type)){
4613 int i8x8, i4x4, chroma_idx;
4615 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4616 const uint8_t *scan, *scan8x8, *dc_scan;
4618 // fill_non_zero_count_cache(h);
4620 if(IS_INTERLACED(mb_type)){
4621 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4622 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4623 dc_scan= luma_dc_field_scan;
4625 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4626 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4627 dc_scan= luma_dc_zigzag_scan;
4630 dquant= get_se_golomb(&s->gb);
4632 if( dquant > 25 || dquant < -26 ){
4633 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4637 s->qscale += dquant;
4638 if(((unsigned)s->qscale) > 51){
4639 if(s->qscale<0) s->qscale+= 52;
4640 else s->qscale-= 52;
4643 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4644 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4645 if(IS_INTRA16x16(mb_type)){
4646 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4647 return -1; //FIXME continue if partitioned and other return -1 too
4650 assert((cbp&15) == 0 || (cbp&15) == 15);
4653 for(i8x8=0; i8x8<4; i8x8++){
4654 for(i4x4=0; i4x4<4; i4x4++){
4655 const int index= i4x4 + 4*i8x8;
4656 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4662 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4665 for(i8x8=0; i8x8<4; i8x8++){
4666 if(cbp & (1<<i8x8)){
4667 if(IS_8x8DCT(mb_type)){
4668 DCTELEM *buf = &h->mb[64*i8x8];
4670 for(i4x4=0; i4x4<4; i4x4++){
4671 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4672 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4675 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4676 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4678 for(i4x4=0; i4x4<4; i4x4++){
4679 const int index= i4x4 + 4*i8x8;
4681 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4687 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4688 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4694 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4695 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4701 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4702 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4703 for(i4x4=0; i4x4<4; i4x4++){
4704 const int index= 16 + 4*chroma_idx + i4x4;
4705 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4711 uint8_t * const nnz= &h->non_zero_count_cache[0];
4712 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4713 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4716 uint8_t * const nnz= &h->non_zero_count_cache[0];
4717 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4718 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4719 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4721 s->current_picture.qscale_table[mb_xy]= s->qscale;
4722 write_back_non_zero_count(h);
4725 h->ref_count[0] >>= 1;
4726 h->ref_count[1] >>= 1;
4732 static int decode_cabac_field_decoding_flag(H264Context *h) {
4733 MpegEncContext * const s = &h->s;
4734 const int mb_x = s->mb_x;
4735 const int mb_y = s->mb_y & ~1;
4736 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4737 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4739 unsigned int ctx = 0;
4741 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4744 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4748 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4751 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4752 uint8_t *state= &h->cabac_state[ctx_base];
4756 MpegEncContext * const s = &h->s;
4757 const int mba_xy = h->left_mb_xy[0];
4758 const int mbb_xy = h->top_mb_xy;
4760 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4762 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4764 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4765 return 0; /* I4x4 */
4768 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4769 return 0; /* I4x4 */
4772 if( get_cabac_terminate( &h->cabac ) )
4773 return 25; /* PCM */
4775 mb_type = 1; /* I16x16 */
4776 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4777 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4778 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4779 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4780 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4784 static int decode_cabac_mb_type( H264Context *h ) {
4785 MpegEncContext * const s = &h->s;
4787 if( h->slice_type_nos == FF_I_TYPE ) {
4788 return decode_cabac_intra_mb_type(h, 3, 1);
4789 } else if( h->slice_type_nos == FF_P_TYPE ) {
4790 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4792 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4793 /* P_L0_D16x16, P_8x8 */
4794 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4796 /* P_L0_D8x16, P_L0_D16x8 */
4797 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4800 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4802 } else if( h->slice_type_nos == FF_B_TYPE ) {
4803 const int mba_xy = h->left_mb_xy[0];
4804 const int mbb_xy = h->top_mb_xy;
4808 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4810 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4813 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4814 return 0; /* B_Direct_16x16 */
4816 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4817 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4820 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4821 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4822 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4823 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4825 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4826 else if( bits == 13 ) {
4827 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4828 } else if( bits == 14 )
4829 return 11; /* B_L1_L0_8x16 */
4830 else if( bits == 15 )
4831 return 22; /* B_8x8 */
4833 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4834 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4836 /* TODO SI/SP frames? */
4841 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4842 MpegEncContext * const s = &h->s;
4846 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4847 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4850 && h->slice_table[mba_xy] == h->slice_num
4851 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4852 mba_xy += s->mb_stride;
4854 mbb_xy = mb_xy - s->mb_stride;
4856 && h->slice_table[mbb_xy] == h->slice_num
4857 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4858 mbb_xy -= s->mb_stride;
4860 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4862 int mb_xy = h->mb_xy;
4864 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4867 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4869 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4872 if( h->slice_type_nos == FF_B_TYPE )
4874 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4877 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4880 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4883 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4884 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4885 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4887 if( mode >= pred_mode )
4893 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4894 const int mba_xy = h->left_mb_xy[0];
4895 const int mbb_xy = h->top_mb_xy;
4899 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4900 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4903 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4906 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4909 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4911 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4917 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4918 int cbp_b, cbp_a, ctx, cbp = 0;
4920 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4921 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4923 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4924 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4925 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4926 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4927 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4928 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4929 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4930 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4933 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4937 cbp_a = (h->left_cbp>>4)&0x03;
4938 cbp_b = (h-> top_cbp>>4)&0x03;
4941 if( cbp_a > 0 ) ctx++;
4942 if( cbp_b > 0 ) ctx += 2;
4943 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4947 if( cbp_a == 2 ) ctx++;
4948 if( cbp_b == 2 ) ctx += 2;
4949 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4951 static int decode_cabac_mb_dqp( H264Context *h) {
4955 if( h->last_qscale_diff != 0 )
4958 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4964 if(val > 102) //prevent infinite loop
4971 return -(val + 1)/2;
4973 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4974 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4976 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4978 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4982 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4984 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4985 return 0; /* B_Direct_8x8 */
4986 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4987 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4989 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4990 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4991 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4994 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4995 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4999 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5000 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5003 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5004 int refa = h->ref_cache[list][scan8[n] - 1];
5005 int refb = h->ref_cache[list][scan8[n] - 8];
5009 if( h->slice_type_nos == FF_B_TYPE) {
5010 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5012 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5021 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5027 if(ref >= 32 /*h->ref_list[list]*/){
5028 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5029 return 0; //FIXME we should return -1 and check the return everywhere
5035 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5036 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5037 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5038 int ctxbase = (l == 0) ? 40 : 47;
5043 else if( amvd > 32 )
5048 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5053 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5061 while( get_cabac_bypass( &h->cabac ) ) {
5065 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5070 if( get_cabac_bypass( &h->cabac ) )
5074 return get_cabac_bypass_sign( &h->cabac, -mvd );
5077 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5083 nza = h->left_cbp&0x100;
5084 nzb = h-> top_cbp&0x100;
5086 nza = (h->left_cbp>>(6+idx))&0x01;
5087 nzb = (h-> top_cbp>>(6+idx))&0x01;
5091 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5092 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5094 assert(cat == 1 || cat == 2);
5095 nza = h->non_zero_count_cache[scan8[idx] - 1];
5096 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5106 return ctx + 4 * cat;
5109 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5110 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5111 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5112 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5113 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5116 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5117 static const int significant_coeff_flag_offset[2][6] = {
5118 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5119 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5121 static const int last_coeff_flag_offset[2][6] = {
5122 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5123 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5125 static const int coeff_abs_level_m1_offset[6] = {
5126 227+0, 227+10, 227+20, 227+30, 227+39, 426
5128 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5129 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5130 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5131 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5132 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5133 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5134 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5135 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5136 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5138 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5139 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5140 * map node ctx => cabac ctx for level=1 */
5141 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5142 /* map node ctx => cabac ctx for level>1 */
5143 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5144 static const uint8_t coeff_abs_level_transition[2][8] = {
5145 /* update node ctx after decoding a level=1 */
5146 { 1, 2, 3, 3, 4, 5, 6, 7 },
5147 /* update node ctx after decoding a level>1 */
5148 { 4, 4, 4, 4, 5, 6, 7, 7 }
5154 int coeff_count = 0;
5157 uint8_t *significant_coeff_ctx_base;
5158 uint8_t *last_coeff_ctx_base;
5159 uint8_t *abs_level_m1_ctx_base;
5162 #define CABAC_ON_STACK
5164 #ifdef CABAC_ON_STACK
5167 cc.range = h->cabac.range;
5168 cc.low = h->cabac.low;
5169 cc.bytestream= h->cabac.bytestream;
5171 #define CC &h->cabac
5175 /* cat: 0-> DC 16x16 n = 0
5176 * 1-> AC 16x16 n = luma4x4idx
5177 * 2-> Luma4x4 n = luma4x4idx
5178 * 3-> DC Chroma n = iCbCr
5179 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5180 * 5-> Luma8x8 n = 4 * luma8x8idx
5183 /* read coded block flag */
5184 if( is_dc || cat != 5 ) {
5185 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5188 h->non_zero_count_cache[scan8[16+n]] = 0;
5190 h->non_zero_count_cache[scan8[n]] = 0;
5193 #ifdef CABAC_ON_STACK
5194 h->cabac.range = cc.range ;
5195 h->cabac.low = cc.low ;
5196 h->cabac.bytestream= cc.bytestream;
5202 significant_coeff_ctx_base = h->cabac_state
5203 + significant_coeff_flag_offset[MB_FIELD][cat];
5204 last_coeff_ctx_base = h->cabac_state
5205 + last_coeff_flag_offset[MB_FIELD][cat];
5206 abs_level_m1_ctx_base = h->cabac_state
5207 + coeff_abs_level_m1_offset[cat];
5209 if( !is_dc && cat == 5 ) {
5210 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5211 for(last= 0; last < coefs; last++) { \
5212 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5213 if( get_cabac( CC, sig_ctx )) { \
5214 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5215 index[coeff_count++] = last; \
5216 if( get_cabac( CC, last_ctx ) ) { \
5222 if( last == max_coeff -1 ) {\
5223 index[coeff_count++] = last;\
5225 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5226 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5227 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5229 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5231 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5233 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5236 assert(coeff_count > 0);
5240 h->cbp_table[h->mb_xy] |= 0x100;
5242 h->cbp_table[h->mb_xy] |= 0x40 << n;
5245 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5247 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5249 assert( cat == 1 || cat == 2 );
5250 h->non_zero_count_cache[scan8[n]] = coeff_count;
5255 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5257 int j= scantable[index[--coeff_count]];
5259 if( get_cabac( CC, ctx ) == 0 ) {
5260 node_ctx = coeff_abs_level_transition[0][node_ctx];
5262 block[j] = get_cabac_bypass_sign( CC, -1);
5264 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5268 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5269 node_ctx = coeff_abs_level_transition[1][node_ctx];
5271 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5275 if( coeff_abs >= 15 ) {
5277 while( get_cabac_bypass( CC ) ) {
5283 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5289 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5291 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5294 } while( coeff_count );
5295 #ifdef CABAC_ON_STACK
5296 h->cabac.range = cc.range ;
5297 h->cabac.low = cc.low ;
5298 h->cabac.bytestream= cc.bytestream;
5303 #ifndef CONFIG_SMALL
5304 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5305 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5308 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5309 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5313 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5315 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5317 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5318 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5322 static inline void compute_mb_neighbors(H264Context *h)
5324 MpegEncContext * const s = &h->s;
5325 const int mb_xy = h->mb_xy;
5326 h->top_mb_xy = mb_xy - s->mb_stride;
5327 h->left_mb_xy[0] = mb_xy - 1;
5329 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5330 const int top_pair_xy = pair_xy - s->mb_stride;
5331 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5332 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5333 const int curr_mb_frame_flag = !MB_FIELD;
5334 const int bottom = (s->mb_y & 1);
5336 ? !curr_mb_frame_flag // bottom macroblock
5337 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5339 h->top_mb_xy -= s->mb_stride;
5341 if (left_mb_frame_flag != curr_mb_frame_flag) {
5342 h->left_mb_xy[0] = pair_xy - 1;
5344 } else if (FIELD_PICTURE) {
5345 h->top_mb_xy -= s->mb_stride;
5351 * decodes a macroblock
5352 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5354 static int decode_mb_cabac(H264Context *h) {
5355 MpegEncContext * const s = &h->s;
5357 int mb_type, partition_count, cbp = 0;
5358 int dct8x8_allowed= h->pps.transform_8x8_mode;
5360 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5362 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5364 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5365 if( h->slice_type_nos != FF_I_TYPE ) {
5367 /* a skipped mb needs the aff flag from the following mb */
5368 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5369 predict_field_decoding_flag(h);
5370 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5371 skip = h->next_mb_skipped;
5373 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5374 /* read skip flags */
5376 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5377 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5378 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5379 if(h->next_mb_skipped)
5380 predict_field_decoding_flag(h);
5382 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5387 h->cbp_table[mb_xy] = 0;
5388 h->chroma_pred_mode_table[mb_xy] = 0;
5389 h->last_qscale_diff = 0;
5396 if( (s->mb_y&1) == 0 )
5398 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5401 h->prev_mb_skipped = 0;
5403 compute_mb_neighbors(h);
5404 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5405 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5409 if( h->slice_type_nos == FF_B_TYPE ) {
5411 partition_count= b_mb_type_info[mb_type].partition_count;
5412 mb_type= b_mb_type_info[mb_type].type;
5415 goto decode_intra_mb;
5417 } else if( h->slice_type_nos == FF_P_TYPE ) {
5419 partition_count= p_mb_type_info[mb_type].partition_count;
5420 mb_type= p_mb_type_info[mb_type].type;
5423 goto decode_intra_mb;
5426 if(h->slice_type == FF_SI_TYPE && mb_type)
5428 assert(h->slice_type_nos == FF_I_TYPE);
5430 partition_count = 0;
5431 cbp= i_mb_type_info[mb_type].cbp;
5432 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5433 mb_type= i_mb_type_info[mb_type].type;
5436 mb_type |= MB_TYPE_INTERLACED;
5438 h->slice_table[ mb_xy ]= h->slice_num;
5440 if(IS_INTRA_PCM(mb_type)) {
5443 // We assume these blocks are very rare so we do not optimize it.
5444 // FIXME The two following lines get the bitstream position in the cabac
5445 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5446 ptr= h->cabac.bytestream;
5447 if(h->cabac.low&0x1) ptr--;
5449 if(h->cabac.low&0x1FF) ptr--;
5452 // The pixels are stored in the same order as levels in h->mb array.
5453 memcpy(h->mb, ptr, 256); ptr+=256;
5455 memcpy(h->mb+128, ptr, 128); ptr+=128;
5458 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5460 // All blocks are present
5461 h->cbp_table[mb_xy] = 0x1ef;
5462 h->chroma_pred_mode_table[mb_xy] = 0;
5463 // In deblocking, the quantizer is 0
5464 s->current_picture.qscale_table[mb_xy]= 0;
5465 // All coeffs are present
5466 memset(h->non_zero_count[mb_xy], 16, 16);
5467 s->current_picture.mb_type[mb_xy]= mb_type;
5468 h->last_qscale_diff = 0;
5473 h->ref_count[0] <<= 1;
5474 h->ref_count[1] <<= 1;
5477 fill_caches(h, mb_type, 0);
5479 if( IS_INTRA( mb_type ) ) {
5481 if( IS_INTRA4x4( mb_type ) ) {
5482 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5483 mb_type |= MB_TYPE_8x8DCT;
5484 for( i = 0; i < 16; i+=4 ) {
5485 int pred = pred_intra_mode( h, i );
5486 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5487 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5490 for( i = 0; i < 16; i++ ) {
5491 int pred = pred_intra_mode( h, i );
5492 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5494 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5497 write_back_intra_pred_mode(h);
5498 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5500 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5501 if( h->intra16x16_pred_mode < 0 ) return -1;
5504 h->chroma_pred_mode_table[mb_xy] =
5505 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5507 pred_mode= check_intra_pred_mode( h, pred_mode );
5508 if( pred_mode < 0 ) return -1;
5509 h->chroma_pred_mode= pred_mode;
5511 } else if( partition_count == 4 ) {
5512 int i, j, sub_partition_count[4], list, ref[2][4];
5514 if( h->slice_type_nos == FF_B_TYPE ) {
5515 for( i = 0; i < 4; i++ ) {
5516 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5517 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5518 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5520 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5521 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5522 pred_direct_motion(h, &mb_type);
5523 h->ref_cache[0][scan8[4]] =
5524 h->ref_cache[1][scan8[4]] =
5525 h->ref_cache[0][scan8[12]] =
5526 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5527 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5528 for( i = 0; i < 4; i++ )
5529 if( IS_DIRECT(h->sub_mb_type[i]) )
5530 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5534 for( i = 0; i < 4; i++ ) {
5535 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5536 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5537 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5541 for( list = 0; list < h->list_count; list++ ) {
5542 for( i = 0; i < 4; i++ ) {
5543 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5544 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5545 if( h->ref_count[list] > 1 )
5546 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5552 h->ref_cache[list][ scan8[4*i]+1 ]=
5553 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5558 dct8x8_allowed = get_dct8x8_allowed(h);
5560 for(list=0; list<h->list_count; list++){
5562 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5563 if(IS_DIRECT(h->sub_mb_type[i])){
5564 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5568 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5569 const int sub_mb_type= h->sub_mb_type[i];
5570 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5571 for(j=0; j<sub_partition_count[i]; j++){
5574 const int index= 4*i + block_width*j;
5575 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5576 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5577 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5579 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5580 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5581 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5583 if(IS_SUB_8X8(sub_mb_type)){
5585 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5587 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5590 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5592 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5593 }else if(IS_SUB_8X4(sub_mb_type)){
5594 mv_cache[ 1 ][0]= mx;
5595 mv_cache[ 1 ][1]= my;
5597 mvd_cache[ 1 ][0]= mx - mpx;
5598 mvd_cache[ 1 ][1]= my - mpy;
5599 }else if(IS_SUB_4X8(sub_mb_type)){
5600 mv_cache[ 8 ][0]= mx;
5601 mv_cache[ 8 ][1]= my;
5603 mvd_cache[ 8 ][0]= mx - mpx;
5604 mvd_cache[ 8 ][1]= my - mpy;
5606 mv_cache[ 0 ][0]= mx;
5607 mv_cache[ 0 ][1]= my;
5609 mvd_cache[ 0 ][0]= mx - mpx;
5610 mvd_cache[ 0 ][1]= my - mpy;
5613 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5614 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5615 p[0] = p[1] = p[8] = p[9] = 0;
5616 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5620 } else if( IS_DIRECT(mb_type) ) {
5621 pred_direct_motion(h, &mb_type);
5622 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5623 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5624 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5626 int list, mx, my, i, mpx, mpy;
5627 if(IS_16X16(mb_type)){
5628 for(list=0; list<h->list_count; list++){
5629 if(IS_DIR(mb_type, 0, list)){
5630 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5631 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5633 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5635 for(list=0; list<h->list_count; list++){
5636 if(IS_DIR(mb_type, 0, list)){
5637 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5639 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5640 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5641 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5643 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5644 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5646 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5649 else if(IS_16X8(mb_type)){
5650 for(list=0; list<h->list_count; list++){
5652 if(IS_DIR(mb_type, i, list)){
5653 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5654 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5656 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5659 for(list=0; list<h->list_count; list++){
5661 if(IS_DIR(mb_type, i, list)){
5662 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5663 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5664 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5665 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5667 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5668 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5670 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5671 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5676 assert(IS_8X16(mb_type));
5677 for(list=0; list<h->list_count; list++){
5679 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5680 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5681 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5683 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5686 for(list=0; list<h->list_count; list++){
5688 if(IS_DIR(mb_type, i, list)){
5689 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5690 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5691 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5693 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5694 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5695 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5697 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5698 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5705 if( IS_INTER( mb_type ) ) {
5706 h->chroma_pred_mode_table[mb_xy] = 0;
5707 write_back_motion( h, mb_type );
5710 if( !IS_INTRA16x16( mb_type ) ) {
5711 cbp = decode_cabac_mb_cbp_luma( h );
5713 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5716 h->cbp_table[mb_xy] = h->cbp = cbp;
5718 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5719 if( decode_cabac_mb_transform_size( h ) )
5720 mb_type |= MB_TYPE_8x8DCT;
5722 s->current_picture.mb_type[mb_xy]= mb_type;
5724 if( cbp || IS_INTRA16x16( mb_type ) ) {
5725 const uint8_t *scan, *scan8x8, *dc_scan;
5726 const uint32_t *qmul;
5729 if(IS_INTERLACED(mb_type)){
5730 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5731 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5732 dc_scan= luma_dc_field_scan;
5734 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5735 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5736 dc_scan= luma_dc_zigzag_scan;
5739 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5740 if( dqp == INT_MIN ){
5741 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5745 if(((unsigned)s->qscale) > 51){
5746 if(s->qscale<0) s->qscale+= 52;
5747 else s->qscale-= 52;
5749 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5750 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5752 if( IS_INTRA16x16( mb_type ) ) {
5754 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5755 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5758 qmul = h->dequant4_coeff[0][s->qscale];
5759 for( i = 0; i < 16; i++ ) {
5760 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5761 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5764 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5768 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5769 if( cbp & (1<<i8x8) ) {
5770 if( IS_8x8DCT(mb_type) ) {
5771 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5772 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5774 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5775 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5776 const int index = 4*i8x8 + i4x4;
5777 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5779 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5780 //STOP_TIMER("decode_residual")
5784 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5785 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5792 for( c = 0; c < 2; c++ ) {
5793 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5794 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5800 for( c = 0; c < 2; c++ ) {
5801 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5802 for( i = 0; i < 4; i++ ) {
5803 const int index = 16 + 4 * c + i;
5804 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5805 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5809 uint8_t * const nnz= &h->non_zero_count_cache[0];
5810 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5811 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5814 uint8_t * const nnz= &h->non_zero_count_cache[0];
5815 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5816 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5817 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5818 h->last_qscale_diff = 0;
5821 s->current_picture.qscale_table[mb_xy]= s->qscale;
5822 write_back_non_zero_count(h);
5825 h->ref_count[0] >>= 1;
5826 h->ref_count[1] >>= 1;
5833 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5835 const int index_a = qp + h->slice_alpha_c0_offset;
5836 const int alpha = (alpha_table+52)[index_a];
5837 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5842 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5843 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5845 /* 16px edge length, because bS=4 is triggered by being at
5846 * the edge of an intra MB, so all 4 bS are the same */
5847 for( d = 0; d < 16; d++ ) {
5848 const int p0 = pix[-1];
5849 const int p1 = pix[-2];
5850 const int p2 = pix[-3];
5852 const int q0 = pix[0];
5853 const int q1 = pix[1];
5854 const int q2 = pix[2];
5856 if( FFABS( p0 - q0 ) < alpha &&
5857 FFABS( p1 - p0 ) < beta &&
5858 FFABS( q1 - q0 ) < beta ) {
5860 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5861 if( FFABS( p2 - p0 ) < beta)
5863 const int p3 = pix[-4];
5865 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5866 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5867 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5870 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5872 if( FFABS( q2 - q0 ) < beta)
5874 const int q3 = pix[3];
5876 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5877 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5878 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5881 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5885 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5886 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5888 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5894 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5896 const int index_a = qp + h->slice_alpha_c0_offset;
5897 const int alpha = (alpha_table+52)[index_a];
5898 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5903 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5904 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5906 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5910 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5912 for( i = 0; i < 16; i++, pix += stride) {
5918 int bS_index = (i >> 1);
5921 bS_index |= (i & 1);
5924 if( bS[bS_index] == 0 ) {
5928 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5929 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5930 alpha = (alpha_table+52)[index_a];
5931 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5933 if( bS[bS_index] < 4 ) {
5934 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5935 const int p0 = pix[-1];
5936 const int p1 = pix[-2];
5937 const int p2 = pix[-3];
5938 const int q0 = pix[0];
5939 const int q1 = pix[1];
5940 const int q2 = pix[2];
5942 if( FFABS( p0 - q0 ) < alpha &&
5943 FFABS( p1 - p0 ) < beta &&
5944 FFABS( q1 - q0 ) < beta ) {
5948 if( FFABS( p2 - p0 ) < beta ) {
5949 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5952 if( FFABS( q2 - q0 ) < beta ) {
5953 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5957 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5958 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5959 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5960 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5963 const int p0 = pix[-1];
5964 const int p1 = pix[-2];
5965 const int p2 = pix[-3];
5967 const int q0 = pix[0];
5968 const int q1 = pix[1];
5969 const int q2 = pix[2];
5971 if( FFABS( p0 - q0 ) < alpha &&
5972 FFABS( p1 - p0 ) < beta &&
5973 FFABS( q1 - q0 ) < beta ) {
5975 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5976 if( FFABS( p2 - p0 ) < beta)
5978 const int p3 = pix[-4];
5980 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5981 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5982 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5985 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5987 if( FFABS( q2 - q0 ) < beta)
5989 const int q3 = pix[3];
5991 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5992 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5993 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5996 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6000 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6001 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6003 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6008 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6010 for( i = 0; i < 8; i++, pix += stride) {
6018 if( bS[bS_index] == 0 ) {
6022 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6023 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6024 alpha = (alpha_table+52)[index_a];
6025 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6027 if( bS[bS_index] < 4 ) {
6028 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6029 const int p0 = pix[-1];
6030 const int p1 = pix[-2];
6031 const int q0 = pix[0];
6032 const int q1 = pix[1];
6034 if( FFABS( p0 - q0 ) < alpha &&
6035 FFABS( p1 - p0 ) < beta &&
6036 FFABS( q1 - q0 ) < beta ) {
6037 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6039 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6040 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6041 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6044 const int p0 = pix[-1];
6045 const int p1 = pix[-2];
6046 const int q0 = pix[0];
6047 const int q1 = pix[1];
6049 if( FFABS( p0 - q0 ) < alpha &&
6050 FFABS( p1 - p0 ) < beta &&
6051 FFABS( q1 - q0 ) < beta ) {
6053 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6054 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6055 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6061 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6063 const int index_a = qp + h->slice_alpha_c0_offset;
6064 const int alpha = (alpha_table+52)[index_a];
6065 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6066 const int pix_next = stride;
6071 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6072 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6074 /* 16px edge length, see filter_mb_edgev */
6075 for( d = 0; d < 16; d++ ) {
6076 const int p0 = pix[-1*pix_next];
6077 const int p1 = pix[-2*pix_next];
6078 const int p2 = pix[-3*pix_next];
6079 const int q0 = pix[0];
6080 const int q1 = pix[1*pix_next];
6081 const int q2 = pix[2*pix_next];
6083 if( FFABS( p0 - q0 ) < alpha &&
6084 FFABS( p1 - p0 ) < beta &&
6085 FFABS( q1 - q0 ) < beta ) {
6087 const int p3 = pix[-4*pix_next];
6088 const int q3 = pix[ 3*pix_next];
6090 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6091 if( FFABS( p2 - p0 ) < beta) {
6093 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6094 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6095 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6098 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6100 if( FFABS( q2 - q0 ) < beta) {
6102 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6103 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6104 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6107 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6111 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6112 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6114 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6121 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6123 const int index_a = qp + h->slice_alpha_c0_offset;
6124 const int alpha = (alpha_table+52)[index_a];
6125 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6130 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6131 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6133 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6137 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6138 MpegEncContext * const s = &h->s;
6139 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6141 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6145 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6146 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6147 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6148 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6149 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6152 assert(!FRAME_MBAFF);
6154 mb_type = s->current_picture.mb_type[mb_xy];
6155 qp = s->current_picture.qscale_table[mb_xy];
6156 qp0 = s->current_picture.qscale_table[mb_xy-1];
6157 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6158 qpc = get_chroma_qp( h, 0, qp );
6159 qpc0 = get_chroma_qp( h, 0, qp0 );
6160 qpc1 = get_chroma_qp( h, 0, qp1 );
6161 qp0 = (qp + qp0 + 1) >> 1;
6162 qp1 = (qp + qp1 + 1) >> 1;
6163 qpc0 = (qpc + qpc0 + 1) >> 1;
6164 qpc1 = (qpc + qpc1 + 1) >> 1;
6165 qp_thresh = 15 - h->slice_alpha_c0_offset;
6166 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6167 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6170 if( IS_INTRA(mb_type) ) {
6171 int16_t bS4[4] = {4,4,4,4};
6172 int16_t bS3[4] = {3,3,3,3};
6173 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6174 if( IS_8x8DCT(mb_type) ) {
6175 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6176 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6177 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6178 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6180 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6181 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6182 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6183 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6184 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6185 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6186 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6187 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6189 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6190 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6191 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6192 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6193 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6194 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6195 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6196 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6199 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6200 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6202 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6204 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6206 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6207 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6208 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6209 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6211 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6212 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6213 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6214 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6216 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6217 bSv[0][0] = 0x0004000400040004ULL;
6218 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6219 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6221 #define FILTER(hv,dir,edge)\
6222 if(bSv[dir][edge]) {\
6223 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6225 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6226 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6232 } else if( IS_8x8DCT(mb_type) ) {
6252 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6253 MpegEncContext * const s = &h->s;
6255 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6256 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6257 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6258 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6259 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6261 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6262 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6263 // how often to recheck mv-based bS when iterating between edges
6264 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6265 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6266 // how often to recheck mv-based bS when iterating along each edge
6267 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6269 if (first_vertical_edge_done) {
6273 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6276 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6277 && !IS_INTERLACED(mb_type)
6278 && IS_INTERLACED(mbm_type)
6280 // This is a special case in the norm where the filtering must
6281 // be done twice (one each of the field) even if we are in a
6282 // frame macroblock.
6284 static const int nnz_idx[4] = {4,5,6,3};
6285 unsigned int tmp_linesize = 2 * linesize;
6286 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6287 int mbn_xy = mb_xy - 2 * s->mb_stride;
6292 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6293 if( IS_INTRA(mb_type) ||
6294 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6295 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6297 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6298 for( i = 0; i < 4; i++ ) {
6299 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6300 mbn_nnz[nnz_idx[i]] != 0 )
6306 // Do not use s->qscale as luma quantizer because it has not the same
6307 // value in IPCM macroblocks.
6308 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6309 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6310 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6311 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6312 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6313 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6314 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6315 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6322 for( edge = start; edge < edges; edge++ ) {
6323 /* mbn_xy: neighbor macroblock */
6324 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6325 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6326 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6330 if( (edge&1) && IS_8x8DCT(mb_type) )
6333 if( IS_INTRA(mb_type) ||
6334 IS_INTRA(mbn_type) ) {
6337 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6338 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6347 bS[0] = bS[1] = bS[2] = bS[3] = value;
6352 if( edge & mask_edge ) {
6353 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6356 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6357 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6360 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6361 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6362 int bn_idx= b_idx - (dir ? 8:1);
6365 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6366 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6367 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6368 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6371 if(h->slice_type_nos == FF_B_TYPE && v){
6373 for( l = 0; !v && l < 2; l++ ) {
6375 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6376 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6377 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6381 bS[0] = bS[1] = bS[2] = bS[3] = v;
6387 for( i = 0; i < 4; i++ ) {
6388 int x = dir == 0 ? edge : i;
6389 int y = dir == 0 ? i : edge;
6390 int b_idx= 8 + 4 + x + 8*y;
6391 int bn_idx= b_idx - (dir ? 8:1);
6393 if( h->non_zero_count_cache[b_idx] |
6394 h->non_zero_count_cache[bn_idx] ) {
6400 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6401 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6402 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6403 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6409 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6411 for( l = 0; l < 2; l++ ) {
6413 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6414 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6415 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6424 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6429 // Do not use s->qscale as luma quantizer because it has not the same
6430 // value in IPCM macroblocks.
6431 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6432 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6433 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6434 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6436 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6437 if( (edge&1) == 0 ) {
6438 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6439 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6440 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6441 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6444 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6445 if( (edge&1) == 0 ) {
6446 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6447 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6448 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6449 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6455 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6456 MpegEncContext * const s = &h->s;
6457 const int mb_xy= mb_x + mb_y*s->mb_stride;
6458 const int mb_type = s->current_picture.mb_type[mb_xy];
6459 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6460 int first_vertical_edge_done = 0;
6463 //for sufficiently low qp, filtering wouldn't do anything
6464 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6466 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6467 int qp = s->current_picture.qscale_table[mb_xy];
6469 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6470 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6475 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6476 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6477 int top_type, left_type[2];
6478 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6479 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6480 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6482 if(IS_8x8DCT(top_type)){
6483 h->non_zero_count_cache[4+8*0]=
6484 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6485 h->non_zero_count_cache[6+8*0]=
6486 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6488 if(IS_8x8DCT(left_type[0])){
6489 h->non_zero_count_cache[3+8*1]=
6490 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6492 if(IS_8x8DCT(left_type[1])){
6493 h->non_zero_count_cache[3+8*3]=
6494 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6497 if(IS_8x8DCT(mb_type)){
6498 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6499 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6501 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6502 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6504 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6505 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6507 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6508 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6513 // left mb is in picture
6514 && h->slice_table[mb_xy-1] != 0xFFFF
6515 // and current and left pair do not have the same interlaced type
6516 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6517 // and left mb is in the same slice if deblocking_filter == 2
6518 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6519 /* First vertical edge is different in MBAFF frames
6520 * There are 8 different bS to compute and 2 different Qp
6522 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6523 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6528 int mb_qp, mbn0_qp, mbn1_qp;
6530 first_vertical_edge_done = 1;
6532 if( IS_INTRA(mb_type) )
6533 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6535 for( i = 0; i < 8; i++ ) {
6536 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6538 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6540 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6541 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6542 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6544 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6551 mb_qp = s->current_picture.qscale_table[mb_xy];
6552 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6553 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6554 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6555 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6556 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6557 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6558 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6559 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6560 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6561 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6562 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6563 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6566 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6567 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6568 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6569 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6570 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6574 for( dir = 0; dir < 2; dir++ )
6575 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6577 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6578 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6582 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6583 H264Context *h = *(void**)arg;
6584 MpegEncContext * const s = &h->s;
6585 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6589 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6590 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6592 if( h->pps.cabac ) {
6596 align_get_bits( &s->gb );
6599 ff_init_cabac_states( &h->cabac);
6600 ff_init_cabac_decoder( &h->cabac,
6601 s->gb.buffer + get_bits_count(&s->gb)/8,
6602 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6603 /* calculate pre-state */
6604 for( i= 0; i < 460; i++ ) {
6606 if( h->slice_type_nos == FF_I_TYPE )
6607 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6609 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6612 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6614 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6619 int ret = decode_mb_cabac(h);
6621 //STOP_TIMER("decode_mb_cabac")
6623 if(ret>=0) hl_decode_mb(h);
6625 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6628 if(ret>=0) ret = decode_mb_cabac(h);
6630 if(ret>=0) hl_decode_mb(h);
6633 eos = get_cabac_terminate( &h->cabac );
6635 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6636 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6637 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6641 if( ++s->mb_x >= s->mb_width ) {
6643 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6645 if(FIELD_OR_MBAFF_PICTURE) {
6650 if( eos || s->mb_y >= s->mb_height ) {
6651 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6652 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6659 int ret = decode_mb_cavlc(h);
6661 if(ret>=0) hl_decode_mb(h);
6663 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6665 ret = decode_mb_cavlc(h);
6667 if(ret>=0) hl_decode_mb(h);
6672 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6673 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6678 if(++s->mb_x >= s->mb_width){
6680 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6682 if(FIELD_OR_MBAFF_PICTURE) {
6685 if(s->mb_y >= s->mb_height){
6686 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6688 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6689 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6693 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6700 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6701 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6702 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6703 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6707 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6716 for(;s->mb_y < s->mb_height; s->mb_y++){
6717 for(;s->mb_x < s->mb_width; s->mb_x++){
6718 int ret= decode_mb(h);
6723 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6724 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6729 if(++s->mb_x >= s->mb_width){
6731 if(++s->mb_y >= s->mb_height){
6732 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6733 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6737 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6744 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6745 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6757 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6760 return -1; //not reached
6763 static int decode_picture_timing(H264Context *h){
6764 MpegEncContext * const s = &h->s;
6765 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6766 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6767 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6769 if(h->sps.pic_struct_present_flag){
6770 unsigned int i, num_clock_ts;
6771 h->sei_pic_struct = get_bits(&s->gb, 4);
6773 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6776 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6778 for (i = 0 ; i < num_clock_ts ; i++){
6779 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6780 unsigned int full_timestamp_flag;
6781 skip_bits(&s->gb, 2); /* ct_type */
6782 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6783 skip_bits(&s->gb, 5); /* counting_type */
6784 full_timestamp_flag = get_bits(&s->gb, 1);
6785 skip_bits(&s->gb, 1); /* discontinuity_flag */
6786 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6787 skip_bits(&s->gb, 8); /* n_frames */
6788 if(full_timestamp_flag){
6789 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6790 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6791 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6793 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6794 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6795 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6796 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6797 if(get_bits(&s->gb, 1)) /* hours_flag */
6798 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6802 if(h->sps.time_offset_length > 0)
6803 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6810 static int decode_unregistered_user_data(H264Context *h, int size){
6811 MpegEncContext * const s = &h->s;
6812 uint8_t user_data[16+256];
6818 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6819 user_data[i]= get_bits(&s->gb, 8);
6823 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6824 if(e==1 && build>=0)
6825 h->x264_build= build;
6827 if(s->avctx->debug & FF_DEBUG_BUGS)
6828 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6831 skip_bits(&s->gb, 8);
6836 static int decode_sei(H264Context *h){
6837 MpegEncContext * const s = &h->s;
6839 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6844 type+= show_bits(&s->gb, 8);
6845 }while(get_bits(&s->gb, 8) == 255);
6849 size+= show_bits(&s->gb, 8);
6850 }while(get_bits(&s->gb, 8) == 255);
6853 case 1: // Picture timing SEI
6854 if(decode_picture_timing(h) < 0)
6858 if(decode_unregistered_user_data(h, size) < 0)
6862 skip_bits(&s->gb, 8*size);
6865 //FIXME check bits here
6866 align_get_bits(&s->gb);
6872 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6873 MpegEncContext * const s = &h->s;
6875 cpb_count = get_ue_golomb(&s->gb) + 1;
6877 if(cpb_count > 32U){
6878 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6882 get_bits(&s->gb, 4); /* bit_rate_scale */
6883 get_bits(&s->gb, 4); /* cpb_size_scale */
6884 for(i=0; i<cpb_count; i++){
6885 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6886 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6887 get_bits1(&s->gb); /* cbr_flag */
6889 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6890 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6891 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6892 sps->time_offset_length = get_bits(&s->gb, 5);
6896 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6897 MpegEncContext * const s = &h->s;
6898 int aspect_ratio_info_present_flag;
6899 unsigned int aspect_ratio_idc;
6901 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6903 if( aspect_ratio_info_present_flag ) {
6904 aspect_ratio_idc= get_bits(&s->gb, 8);
6905 if( aspect_ratio_idc == EXTENDED_SAR ) {
6906 sps->sar.num= get_bits(&s->gb, 16);
6907 sps->sar.den= get_bits(&s->gb, 16);
6908 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6909 sps->sar= pixel_aspect[aspect_ratio_idc];
6911 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6918 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6920 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6921 get_bits1(&s->gb); /* overscan_appropriate_flag */
6924 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6925 get_bits(&s->gb, 3); /* video_format */
6926 get_bits1(&s->gb); /* video_full_range_flag */
6927 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6928 get_bits(&s->gb, 8); /* colour_primaries */
6929 get_bits(&s->gb, 8); /* transfer_characteristics */
6930 get_bits(&s->gb, 8); /* matrix_coefficients */
6934 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6935 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6936 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6939 sps->timing_info_present_flag = get_bits1(&s->gb);
6940 if(sps->timing_info_present_flag){
6941 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6942 sps->time_scale = get_bits_long(&s->gb, 32);
6943 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6946 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6947 if(sps->nal_hrd_parameters_present_flag)
6948 if(decode_hrd_parameters(h, sps) < 0)
6950 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6951 if(sps->vcl_hrd_parameters_present_flag)
6952 if(decode_hrd_parameters(h, sps) < 0)
6954 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6955 get_bits1(&s->gb); /* low_delay_hrd_flag */
6956 sps->pic_struct_present_flag = get_bits1(&s->gb);
6958 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6959 if(sps->bitstream_restriction_flag){
6960 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6961 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6962 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6963 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6964 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6965 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6966 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6968 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6969 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6977 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6978 const uint8_t *jvt_list, const uint8_t *fallback_list){
6979 MpegEncContext * const s = &h->s;
6980 int i, last = 8, next = 8;
6981 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6982 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6983 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6985 for(i=0;i<size;i++){
6987 next = (last + get_se_golomb(&s->gb)) & 0xff;
6988 if(!i && !next){ /* matrix not written, we use the preset one */
6989 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6992 last = factors[scan[i]] = next ? next : last;
6996 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6997 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6998 MpegEncContext * const s = &h->s;
6999 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7000 const uint8_t *fallback[4] = {
7001 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7002 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7003 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7004 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7006 if(get_bits1(&s->gb)){
7007 sps->scaling_matrix_present |= is_sps;
7008 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7009 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7010 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7011 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7012 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7013 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7014 if(is_sps || pps->transform_8x8_mode){
7015 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7016 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7021 static inline int decode_seq_parameter_set(H264Context *h){
7022 MpegEncContext * const s = &h->s;
7023 int profile_idc, level_idc;
7024 unsigned int sps_id;
7028 profile_idc= get_bits(&s->gb, 8);
7029 get_bits1(&s->gb); //constraint_set0_flag
7030 get_bits1(&s->gb); //constraint_set1_flag
7031 get_bits1(&s->gb); //constraint_set2_flag
7032 get_bits1(&s->gb); //constraint_set3_flag
7033 get_bits(&s->gb, 4); // reserved
7034 level_idc= get_bits(&s->gb, 8);
7035 sps_id= get_ue_golomb(&s->gb);
7037 if(sps_id >= MAX_SPS_COUNT) {
7038 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7041 sps= av_mallocz(sizeof(SPS));
7045 sps->profile_idc= profile_idc;
7046 sps->level_idc= level_idc;
7048 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7049 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7050 sps->scaling_matrix_present = 0;
7052 if(sps->profile_idc >= 100){ //high profile
7053 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7054 if(sps->chroma_format_idc == 3)
7055 get_bits1(&s->gb); //residual_color_transform_flag
7056 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7057 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7058 sps->transform_bypass = get_bits1(&s->gb);
7059 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7061 sps->chroma_format_idc= 1;
7064 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7065 sps->poc_type= get_ue_golomb(&s->gb);
7067 if(sps->poc_type == 0){ //FIXME #define
7068 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7069 } else if(sps->poc_type == 1){//FIXME #define
7070 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7071 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7072 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7073 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7075 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7076 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7080 for(i=0; i<sps->poc_cycle_length; i++)
7081 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7082 }else if(sps->poc_type != 2){
7083 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7087 sps->ref_frame_count= get_ue_golomb(&s->gb);
7088 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7089 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7092 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7093 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7094 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7095 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7096 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7097 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7101 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7102 if(!sps->frame_mbs_only_flag)
7103 sps->mb_aff= get_bits1(&s->gb);
7107 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7109 #ifndef ALLOW_INTERLACE
7111 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7113 sps->crop= get_bits1(&s->gb);
7115 sps->crop_left = get_ue_golomb(&s->gb);
7116 sps->crop_right = get_ue_golomb(&s->gb);
7117 sps->crop_top = get_ue_golomb(&s->gb);
7118 sps->crop_bottom= get_ue_golomb(&s->gb);
7119 if(sps->crop_left || sps->crop_top){
7120 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7122 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7123 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7129 sps->crop_bottom= 0;
7132 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7133 if( sps->vui_parameters_present_flag )
7134 decode_vui_parameters(h, sps);
7136 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7137 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7138 sps_id, sps->profile_idc, sps->level_idc,
7140 sps->ref_frame_count,
7141 sps->mb_width, sps->mb_height,
7142 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7143 sps->direct_8x8_inference_flag ? "8B8" : "",
7144 sps->crop_left, sps->crop_right,
7145 sps->crop_top, sps->crop_bottom,
7146 sps->vui_parameters_present_flag ? "VUI" : "",
7147 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7150 av_free(h->sps_buffers[sps_id]);
7151 h->sps_buffers[sps_id]= sps;
7159 build_qp_table(PPS *pps, int t, int index)
7162 for(i = 0; i < 52; i++)
7163 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7166 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7167 MpegEncContext * const s = &h->s;
7168 unsigned int pps_id= get_ue_golomb(&s->gb);
7171 if(pps_id >= MAX_PPS_COUNT) {
7172 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7176 pps= av_mallocz(sizeof(PPS));
7179 pps->sps_id= get_ue_golomb(&s->gb);
7180 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7181 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7185 pps->cabac= get_bits1(&s->gb);
7186 pps->pic_order_present= get_bits1(&s->gb);
7187 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7188 if(pps->slice_group_count > 1 ){
7189 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7190 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7191 switch(pps->mb_slice_group_map_type){
7194 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7195 | run_length[ i ] |1 |ue(v) |
7200 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7202 | top_left_mb[ i ] |1 |ue(v) |
7203 | bottom_right_mb[ i ] |1 |ue(v) |
7211 | slice_group_change_direction_flag |1 |u(1) |
7212 | slice_group_change_rate_minus1 |1 |ue(v) |
7217 | slice_group_id_cnt_minus1 |1 |ue(v) |
7218 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7220 | slice_group_id[ i ] |1 |u(v) |
7225 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7226 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7227 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7228 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7232 pps->weighted_pred= get_bits1(&s->gb);
7233 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7234 pps->init_qp= get_se_golomb(&s->gb) + 26;
7235 pps->init_qs= get_se_golomb(&s->gb) + 26;
7236 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7237 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7238 pps->constrained_intra_pred= get_bits1(&s->gb);
7239 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7241 pps->transform_8x8_mode= 0;
7242 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7243 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7244 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7246 if(get_bits_count(&s->gb) < bit_length){
7247 pps->transform_8x8_mode= get_bits1(&s->gb);
7248 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7249 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7251 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7254 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7255 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7256 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7257 h->pps.chroma_qp_diff= 1;
7259 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7260 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7261 pps_id, pps->sps_id,
7262 pps->cabac ? "CABAC" : "CAVLC",
7263 pps->slice_group_count,
7264 pps->ref_count[0], pps->ref_count[1],
7265 pps->weighted_pred ? "weighted" : "",
7266 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7267 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7268 pps->constrained_intra_pred ? "CONSTR" : "",
7269 pps->redundant_pic_cnt_present ? "REDU" : "",
7270 pps->transform_8x8_mode ? "8x8DCT" : ""
7274 av_free(h->pps_buffers[pps_id]);
7275 h->pps_buffers[pps_id]= pps;
7283 * Call decode_slice() for each context.
7285 * @param h h264 master context
7286 * @param context_count number of contexts to execute
7288 static void execute_decode_slices(H264Context *h, int context_count){
7289 MpegEncContext * const s = &h->s;
7290 AVCodecContext * const avctx= s->avctx;
7294 if(context_count == 1) {
7295 decode_slice(avctx, &h);
7297 for(i = 1; i < context_count; i++) {
7298 hx = h->thread_context[i];
7299 hx->s.error_recognition = avctx->error_recognition;
7300 hx->s.error_count = 0;
7303 avctx->execute(avctx, (void *)decode_slice,
7304 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7306 /* pull back stuff from slices to master context */
7307 hx = h->thread_context[context_count - 1];
7308 s->mb_x = hx->s.mb_x;
7309 s->mb_y = hx->s.mb_y;
7310 s->dropable = hx->s.dropable;
7311 s->picture_structure = hx->s.picture_structure;
7312 for(i = 1; i < context_count; i++)
7313 h->s.error_count += h->thread_context[i]->s.error_count;
7318 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7319 MpegEncContext * const s = &h->s;
7320 AVCodecContext * const avctx= s->avctx;
7322 H264Context *hx; ///< thread context
7323 int context_count = 0;
7325 h->max_contexts = avctx->thread_count;
7328 for(i=0; i<50; i++){
7329 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7332 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7333 h->current_slice = 0;
7334 if (!s->first_field)
7335 s->current_picture_ptr= NULL;
7347 if(buf_index >= buf_size) break;
7349 for(i = 0; i < h->nal_length_size; i++)
7350 nalsize = (nalsize << 8) | buf[buf_index++];
7351 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7356 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7361 // start code prefix search
7362 for(; buf_index + 3 < buf_size; buf_index++){
7363 // This should always succeed in the first iteration.
7364 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7368 if(buf_index+3 >= buf_size) break;
7373 hx = h->thread_context[context_count];
7375 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7376 if (ptr==NULL || dst_length < 0){
7379 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7381 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7383 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7384 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7387 if (h->is_avc && (nalsize != consumed)){
7388 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7392 buf_index += consumed;
7394 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7395 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7400 switch(hx->nal_unit_type){
7402 if (h->nal_unit_type != NAL_IDR_SLICE) {
7403 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7406 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7408 init_get_bits(&hx->s.gb, ptr, bit_length);
7410 hx->inter_gb_ptr= &hx->s.gb;
7411 hx->s.data_partitioning = 0;
7413 if((err = decode_slice_header(hx, h)))
7416 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7417 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7418 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7419 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7420 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7421 && avctx->skip_frame < AVDISCARD_ALL)
7425 init_get_bits(&hx->s.gb, ptr, bit_length);
7427 hx->inter_gb_ptr= NULL;
7428 hx->s.data_partitioning = 1;
7430 err = decode_slice_header(hx, h);
7433 init_get_bits(&hx->intra_gb, ptr, bit_length);
7434 hx->intra_gb_ptr= &hx->intra_gb;
7437 init_get_bits(&hx->inter_gb, ptr, bit_length);
7438 hx->inter_gb_ptr= &hx->inter_gb;
7440 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7441 && s->context_initialized
7443 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7444 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7445 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7446 && avctx->skip_frame < AVDISCARD_ALL)
7450 init_get_bits(&s->gb, ptr, bit_length);
7454 init_get_bits(&s->gb, ptr, bit_length);
7455 decode_seq_parameter_set(h);
7457 if(s->flags& CODEC_FLAG_LOW_DELAY)
7460 if(avctx->has_b_frames < 2)
7461 avctx->has_b_frames= !s->low_delay;
7464 init_get_bits(&s->gb, ptr, bit_length);
7466 decode_picture_parameter_set(h, bit_length);
7470 case NAL_END_SEQUENCE:
7471 case NAL_END_STREAM:
7472 case NAL_FILLER_DATA:
7474 case NAL_AUXILIARY_SLICE:
7477 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7480 if(context_count == h->max_contexts) {
7481 execute_decode_slices(h, context_count);
7486 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7488 /* Slice could not be decoded in parallel mode, copy down
7489 * NAL unit stuff to context 0 and restart. Note that
7490 * rbsp_buffer is not transferred, but since we no longer
7491 * run in parallel mode this should not be an issue. */
7492 h->nal_unit_type = hx->nal_unit_type;
7493 h->nal_ref_idc = hx->nal_ref_idc;
7499 execute_decode_slices(h, context_count);
7504 * returns the number of bytes consumed for building the current frame
7506 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7507 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7508 if(pos+10>buf_size) pos=buf_size; // oops ;)
7513 static int decode_frame(AVCodecContext *avctx,
7514 void *data, int *data_size,
7515 const uint8_t *buf, int buf_size)
7517 H264Context *h = avctx->priv_data;
7518 MpegEncContext *s = &h->s;
7519 AVFrame *pict = data;
7522 s->flags= avctx->flags;
7523 s->flags2= avctx->flags2;
7525 /* end of stream, output what is still in the buffers */
7526 if (buf_size == 0) {
7530 //FIXME factorize this with the output code below
7531 out = h->delayed_pic[0];
7533 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7534 if(h->delayed_pic[i]->poc < out->poc){
7535 out = h->delayed_pic[i];
7539 for(i=out_idx; h->delayed_pic[i]; i++)
7540 h->delayed_pic[i] = h->delayed_pic[i+1];
7543 *data_size = sizeof(AVFrame);
7544 *pict= *(AVFrame*)out;
7550 if(h->is_avc && !h->got_avcC) {
7551 int i, cnt, nalsize;
7552 unsigned char *p = avctx->extradata;
7553 if(avctx->extradata_size < 7) {
7554 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7558 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7561 /* sps and pps in the avcC always have length coded with 2 bytes,
7562 so put a fake nal_length_size = 2 while parsing them */
7563 h->nal_length_size = 2;
7564 // Decode sps from avcC
7565 cnt = *(p+5) & 0x1f; // Number of sps
7567 for (i = 0; i < cnt; i++) {
7568 nalsize = AV_RB16(p) + 2;
7569 if(decode_nal_units(h, p, nalsize) < 0) {
7570 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7575 // Decode pps from avcC
7576 cnt = *(p++); // Number of pps
7577 for (i = 0; i < cnt; i++) {
7578 nalsize = AV_RB16(p) + 2;
7579 if(decode_nal_units(h, p, nalsize) != nalsize) {
7580 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7585 // Now store right nal length size, that will be use to parse all other nals
7586 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7587 // Do not reparse avcC
7591 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7592 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7597 buf_index=decode_nal_units(h, buf, buf_size);
7601 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7602 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7603 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7607 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7608 Picture *out = s->current_picture_ptr;
7609 Picture *cur = s->current_picture_ptr;
7610 int i, pics, cross_idr, out_of_order, out_idx;
7614 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7615 s->current_picture_ptr->pict_type= s->pict_type;
7618 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7619 h->prev_poc_msb= h->poc_msb;
7620 h->prev_poc_lsb= h->poc_lsb;
7622 h->prev_frame_num_offset= h->frame_num_offset;
7623 h->prev_frame_num= h->frame_num;
7626 * FIXME: Error handling code does not seem to support interlaced
7627 * when slices span multiple rows
7628 * The ff_er_add_slice calls don't work right for bottom
7629 * fields; they cause massive erroneous error concealing
7630 * Error marking covers both fields (top and bottom).
7631 * This causes a mismatched s->error_count
7632 * and a bad error table. Further, the error count goes to
7633 * INT_MAX when called for bottom field, because mb_y is
7634 * past end by one (callers fault) and resync_mb_y != 0
7635 * causes problems for the first MB line, too.
7642 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7643 /* Wait for second field. */
7647 cur->repeat_pict = 0;
7649 /* Signal interlacing information externally. */
7650 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7651 if(h->sps.pic_struct_present_flag){
7652 switch (h->sei_pic_struct)
7654 case SEI_PIC_STRUCT_FRAME:
7655 cur->interlaced_frame = 0;
7657 case SEI_PIC_STRUCT_TOP_FIELD:
7658 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7659 case SEI_PIC_STRUCT_TOP_BOTTOM:
7660 case SEI_PIC_STRUCT_BOTTOM_TOP:
7661 cur->interlaced_frame = 1;
7663 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7664 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7665 // Signal the possibility of telecined film externally (pic_struct 5,6)
7666 // From these hints, let the applications decide if they apply deinterlacing.
7667 cur->repeat_pict = 1;
7668 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7670 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7671 // Force progressive here, as doubling interlaced frame is a bad idea.
7672 cur->interlaced_frame = 0;
7673 cur->repeat_pict = 2;
7675 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7676 cur->interlaced_frame = 0;
7677 cur->repeat_pict = 4;
7681 /* Derive interlacing flag from used decoding process. */
7682 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7685 if (cur->field_poc[0] != cur->field_poc[1]){
7686 /* Derive top_field_first from field pocs. */
7687 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7689 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7690 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7691 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7692 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7693 cur->top_field_first = 1;
7695 cur->top_field_first = 0;
7697 /* Most likely progressive */
7698 cur->top_field_first = 0;
7702 //FIXME do something with unavailable reference frames
7704 /* Sort B-frames into display order */
7706 if(h->sps.bitstream_restriction_flag
7707 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7708 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7712 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7713 && !h->sps.bitstream_restriction_flag){
7714 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7719 while(h->delayed_pic[pics]) pics++;
7721 assert(pics <= MAX_DELAYED_PIC_COUNT);
7723 h->delayed_pic[pics++] = cur;
7724 if(cur->reference == 0)
7725 cur->reference = DELAYED_PIC_REF;
7727 out = h->delayed_pic[0];
7729 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7730 if(h->delayed_pic[i]->poc < out->poc){
7731 out = h->delayed_pic[i];
7734 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7736 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7738 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7740 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7742 ((!cross_idr && out->poc > h->outputed_poc + 2)
7743 || cur->pict_type == FF_B_TYPE)))
7746 s->avctx->has_b_frames++;
7749 if(out_of_order || pics > s->avctx->has_b_frames){
7750 out->reference &= ~DELAYED_PIC_REF;
7751 for(i=out_idx; h->delayed_pic[i]; i++)
7752 h->delayed_pic[i] = h->delayed_pic[i+1];
7754 if(!out_of_order && pics > s->avctx->has_b_frames){
7755 *data_size = sizeof(AVFrame);
7757 h->outputed_poc = out->poc;
7758 *pict= *(AVFrame*)out;
7760 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7765 assert(pict->data[0] || !*data_size);
7766 ff_print_debug_info(s, pict);
7767 //printf("out %d\n", (int)pict->data[0]);
7770 /* Return the Picture timestamp as the frame number */
7771 /* we subtract 1 because it is added on utils.c */
7772 avctx->frame_number = s->picture_number - 1;
7774 return get_consumed_bytes(s, buf_index, buf_size);
7777 static inline void fill_mb_avail(H264Context *h){
7778 MpegEncContext * const s = &h->s;
7779 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7782 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7783 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7784 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7790 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7791 h->mb_avail[4]= 1; //FIXME move out
7792 h->mb_avail[5]= 0; //FIXME move out
7800 #define SIZE (COUNT*40)
7806 // int int_temp[10000];
7808 AVCodecContext avctx;
7810 dsputil_init(&dsp, &avctx);
7812 init_put_bits(&pb, temp, SIZE);
7813 printf("testing unsigned exp golomb\n");
7814 for(i=0; i<COUNT; i++){
7816 set_ue_golomb(&pb, i);
7817 STOP_TIMER("set_ue_golomb");
7819 flush_put_bits(&pb);
7821 init_get_bits(&gb, temp, 8*SIZE);
7822 for(i=0; i<COUNT; i++){
7825 s= show_bits(&gb, 24);
7828 j= get_ue_golomb(&gb);
7830 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7833 STOP_TIMER("get_ue_golomb");
7837 init_put_bits(&pb, temp, SIZE);
7838 printf("testing signed exp golomb\n");
7839 for(i=0; i<COUNT; i++){
7841 set_se_golomb(&pb, i - COUNT/2);
7842 STOP_TIMER("set_se_golomb");
7844 flush_put_bits(&pb);
7846 init_get_bits(&gb, temp, 8*SIZE);
7847 for(i=0; i<COUNT; i++){
7850 s= show_bits(&gb, 24);
7853 j= get_se_golomb(&gb);
7854 if(j != i - COUNT/2){
7855 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7858 STOP_TIMER("get_se_golomb");
7862 printf("testing 4x4 (I)DCT\n");
7865 uint8_t src[16], ref[16];
7866 uint64_t error= 0, max_error=0;
7868 for(i=0; i<COUNT; i++){
7870 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7871 for(j=0; j<16; j++){
7872 ref[j]= random()%255;
7873 src[j]= random()%255;
7876 h264_diff_dct_c(block, src, ref, 4);
7879 for(j=0; j<16; j++){
7880 // printf("%d ", block[j]);
7881 block[j]= block[j]*4;
7882 if(j&1) block[j]= (block[j]*4 + 2)/5;
7883 if(j&4) block[j]= (block[j]*4 + 2)/5;
7887 s->dsp.h264_idct_add(ref, block, 4);
7888 /* for(j=0; j<16; j++){
7889 printf("%d ", ref[j]);
7893 for(j=0; j<16; j++){
7894 int diff= FFABS(src[j] - ref[j]);
7897 max_error= FFMAX(max_error, diff);
7900 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7901 printf("testing quantizer\n");
7902 for(qp=0; qp<52; qp++){
7904 src1_block[i]= src2_block[i]= random()%255;
7907 printf("Testing NAL layer\n");
7909 uint8_t bitstream[COUNT];
7910 uint8_t nal[COUNT*2];
7912 memset(&h, 0, sizeof(H264Context));
7914 for(i=0; i<COUNT; i++){
7922 for(j=0; j<COUNT; j++){
7923 bitstream[j]= (random() % 255) + 1;
7926 for(j=0; j<zeros; j++){
7927 int pos= random() % COUNT;
7928 while(bitstream[pos] == 0){
7937 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7939 printf("encoding failed\n");
7943 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7947 if(out_length != COUNT){
7948 printf("incorrect length %d %d\n", out_length, COUNT);
7952 if(consumed != nal_length){
7953 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7957 if(memcmp(bitstream, out, COUNT)){
7958 printf("mismatch\n");
7964 printf("Testing RBSP\n");
7972 static av_cold int decode_end(AVCodecContext *avctx)
7974 H264Context *h = avctx->priv_data;
7975 MpegEncContext *s = &h->s;
7978 av_freep(&h->rbsp_buffer[0]);
7979 av_freep(&h->rbsp_buffer[1]);
7980 free_tables(h); //FIXME cleanup init stuff perhaps
7982 for(i = 0; i < MAX_SPS_COUNT; i++)
7983 av_freep(h->sps_buffers + i);
7985 for(i = 0; i < MAX_PPS_COUNT; i++)
7986 av_freep(h->pps_buffers + i);
7990 // memset(h, 0, sizeof(H264Context));
7996 AVCodec h264_decoder = {
8000 sizeof(H264Context),
8005 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8007 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),