2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1591 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1592 int src_x_offset, int src_y_offset,
1593 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1594 MpegEncContext * const s = &h->s;
1595 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1596 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1597 const int luma_xy= (mx&3) + ((my&3)<<2);
1598 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1599 uint8_t * src_cb, * src_cr;
1600 int extra_width= h->emu_edge_width;
1601 int extra_height= h->emu_edge_height;
1603 const int full_mx= mx>>2;
1604 const int full_my= my>>2;
1605 const int pic_width = 16*s->mb_width;
1606 const int pic_height = 16*s->mb_height >> MB_FIELD;
1608 if(mx&7) extra_width -= 3;
1609 if(my&7) extra_height -= 3;
1611 if( full_mx < 0-extra_width
1612 || full_my < 0-extra_height
1613 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1614 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1615 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1616 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1620 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1622 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1625 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1628 // chroma offset when predicting from a field of opposite parity
1629 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1630 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1632 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1633 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1636 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1637 src_cb= s->edge_emu_buffer;
1639 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1642 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1643 src_cr= s->edge_emu_buffer;
1645 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1648 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1649 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1650 int x_offset, int y_offset,
1651 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1652 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1653 int list0, int list1){
1654 MpegEncContext * const s = &h->s;
1655 qpel_mc_func *qpix_op= qpix_put;
1656 h264_chroma_mc_func chroma_op= chroma_put;
1658 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1659 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1660 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1661 x_offset += 8*s->mb_x;
1662 y_offset += 8*(s->mb_y >> MB_FIELD);
1665 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1666 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1667 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1668 qpix_op, chroma_op);
1671 chroma_op= chroma_avg;
1675 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1676 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1677 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1678 qpix_op, chroma_op);
1682 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1683 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1684 int x_offset, int y_offset,
1685 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1686 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1687 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1688 int list0, int list1){
1689 MpegEncContext * const s = &h->s;
1691 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1692 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1693 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1694 x_offset += 8*s->mb_x;
1695 y_offset += 8*(s->mb_y >> MB_FIELD);
1698 /* don't optimize for luma-only case, since B-frames usually
1699 * use implicit weights => chroma too. */
1700 uint8_t *tmp_cb = s->obmc_scratchpad;
1701 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1702 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1703 int refn0 = h->ref_cache[0][ scan8[n] ];
1704 int refn1 = h->ref_cache[1][ scan8[n] ];
1706 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1707 dest_y, dest_cb, dest_cr,
1708 x_offset, y_offset, qpix_put, chroma_put);
1709 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1710 tmp_y, tmp_cb, tmp_cr,
1711 x_offset, y_offset, qpix_put, chroma_put);
1713 if(h->use_weight == 2){
1714 int weight0 = h->implicit_weight[refn0][refn1];
1715 int weight1 = 64 - weight0;
1716 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1717 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1718 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1720 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1721 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1722 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1723 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1724 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1725 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1726 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1727 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1728 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1731 int list = list1 ? 1 : 0;
1732 int refn = h->ref_cache[list][ scan8[n] ];
1733 Picture *ref= &h->ref_list[list][refn];
1734 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1735 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1736 qpix_put, chroma_put);
1738 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1739 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1740 if(h->use_weight_chroma){
1741 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1742 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1743 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1749 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1750 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1751 int x_offset, int y_offset,
1752 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1753 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1754 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1755 int list0, int list1){
1756 if((h->use_weight==2 && list0 && list1
1757 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1758 || h->use_weight==1)
1759 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1760 x_offset, y_offset, qpix_put, chroma_put,
1761 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1763 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1764 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1767 static inline void prefetch_motion(H264Context *h, int list){
1768 /* fetch pixels for estimated mv 4 macroblocks ahead
1769 * optimized for 64byte cache lines */
1770 MpegEncContext * const s = &h->s;
1771 const int refn = h->ref_cache[list][scan8[0]];
1773 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1774 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1775 uint8_t **src= h->ref_list[list][refn].data;
1776 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1777 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1778 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1779 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1783 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1784 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1785 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1786 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1787 MpegEncContext * const s = &h->s;
1788 const int mb_xy= h->mb_xy;
1789 const int mb_type= s->current_picture.mb_type[mb_xy];
1791 assert(IS_INTER(mb_type));
1793 prefetch_motion(h, 0);
1795 if(IS_16X16(mb_type)){
1796 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1797 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1798 &weight_op[0], &weight_avg[0],
1799 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1800 }else if(IS_16X8(mb_type)){
1801 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1802 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1803 &weight_op[1], &weight_avg[1],
1804 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1805 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1806 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1807 &weight_op[1], &weight_avg[1],
1808 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1809 }else if(IS_8X16(mb_type)){
1810 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1811 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1812 &weight_op[2], &weight_avg[2],
1813 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1814 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1815 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1816 &weight_op[2], &weight_avg[2],
1817 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 assert(IS_8X8(mb_type));
1824 const int sub_mb_type= h->sub_mb_type[i];
1826 int x_offset= (i&1)<<2;
1827 int y_offset= (i&2)<<1;
1829 if(IS_SUB_8X8(sub_mb_type)){
1830 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1832 &weight_op[3], &weight_avg[3],
1833 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1834 }else if(IS_SUB_8X4(sub_mb_type)){
1835 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1836 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1837 &weight_op[4], &weight_avg[4],
1838 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1839 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1840 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1841 &weight_op[4], &weight_avg[4],
1842 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1843 }else if(IS_SUB_4X8(sub_mb_type)){
1844 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1845 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1846 &weight_op[5], &weight_avg[5],
1847 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1848 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1849 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1850 &weight_op[5], &weight_avg[5],
1851 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 assert(IS_SUB_4X4(sub_mb_type));
1856 int sub_x_offset= x_offset + 2*(j&1);
1857 int sub_y_offset= y_offset + (j&2);
1858 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1859 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1860 &weight_op[6], &weight_avg[6],
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1867 prefetch_motion(h, 1);
1870 static av_cold void decode_init_vlc(void){
1871 static int done = 0;
1878 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1879 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1880 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1881 &chroma_dc_coeff_token_len [0], 1, 1,
1882 &chroma_dc_coeff_token_bits[0], 1, 1,
1883 INIT_VLC_USE_NEW_STATIC);
1887 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1888 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1889 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1890 &coeff_token_len [i][0], 1, 1,
1891 &coeff_token_bits[i][0], 1, 1,
1892 INIT_VLC_USE_NEW_STATIC);
1893 offset += coeff_token_vlc_tables_size[i];
1896 * This is a one time safety check to make sure that
1897 * the packed static coeff_token_vlc table sizes
1898 * were initialized correctly.
1900 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1903 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1904 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1905 init_vlc(&chroma_dc_total_zeros_vlc[i],
1906 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1907 &chroma_dc_total_zeros_len [i][0], 1, 1,
1908 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1909 INIT_VLC_USE_NEW_STATIC);
1911 for(i=0; i<15; i++){
1912 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1913 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1914 init_vlc(&total_zeros_vlc[i],
1915 TOTAL_ZEROS_VLC_BITS, 16,
1916 &total_zeros_len [i][0], 1, 1,
1917 &total_zeros_bits[i][0], 1, 1,
1918 INIT_VLC_USE_NEW_STATIC);
1922 run_vlc[i].table = run_vlc_tables[i];
1923 run_vlc[i].table_allocated = run_vlc_tables_size;
1924 init_vlc(&run_vlc[i],
1926 &run_len [i][0], 1, 1,
1927 &run_bits[i][0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1930 run7_vlc.table = run7_vlc_table,
1931 run7_vlc.table_allocated = run7_vlc_table_size;
1932 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1933 &run_len [6][0], 1, 1,
1934 &run_bits[6][0], 1, 1,
1935 INIT_VLC_USE_NEW_STATIC);
1939 static void free_tables(H264Context *h){
1942 av_freep(&h->intra4x4_pred_mode);
1943 av_freep(&h->chroma_pred_mode_table);
1944 av_freep(&h->cbp_table);
1945 av_freep(&h->mvd_table[0]);
1946 av_freep(&h->mvd_table[1]);
1947 av_freep(&h->direct_table);
1948 av_freep(&h->non_zero_count);
1949 av_freep(&h->slice_table_base);
1950 h->slice_table= NULL;
1952 av_freep(&h->mb2b_xy);
1953 av_freep(&h->mb2b8_xy);
1955 for(i = 0; i < h->s.avctx->thread_count; i++) {
1956 hx = h->thread_context[i];
1958 av_freep(&hx->top_borders[1]);
1959 av_freep(&hx->top_borders[0]);
1960 av_freep(&hx->s.obmc_scratchpad);
1964 static void init_dequant8_coeff_table(H264Context *h){
1966 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1967 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1968 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1970 for(i=0; i<2; i++ ){
1971 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1972 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1976 for(q=0; q<52; q++){
1977 int shift = div6[q];
1980 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1981 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1982 h->pps.scaling_matrix8[i][x]) << shift;
1987 static void init_dequant4_coeff_table(H264Context *h){
1989 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1990 for(i=0; i<6; i++ ){
1991 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1993 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1994 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2001 for(q=0; q<52; q++){
2002 int shift = div6[q] + 2;
2005 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2006 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2007 h->pps.scaling_matrix4[i][x]) << shift;
2012 static void init_dequant_tables(H264Context *h){
2014 init_dequant4_coeff_table(h);
2015 if(h->pps.transform_8x8_mode)
2016 init_dequant8_coeff_table(h);
2017 if(h->sps.transform_bypass){
2020 h->dequant4_coeff[i][0][x] = 1<<6;
2021 if(h->pps.transform_8x8_mode)
2024 h->dequant8_coeff[i][0][x] = 1<<6;
2031 * needs width/height
2033 static int alloc_tables(H264Context *h){
2034 MpegEncContext * const s = &h->s;
2035 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2038 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2040 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2041 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2042 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2044 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2045 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2046 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2047 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2049 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2050 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2052 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2053 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2054 for(y=0; y<s->mb_height; y++){
2055 for(x=0; x<s->mb_width; x++){
2056 const int mb_xy= x + y*s->mb_stride;
2057 const int b_xy = 4*x + 4*y*h->b_stride;
2058 const int b8_xy= 2*x + 2*y*h->b8_stride;
2060 h->mb2b_xy [mb_xy]= b_xy;
2061 h->mb2b8_xy[mb_xy]= b8_xy;
2065 s->obmc_scratchpad = NULL;
2067 if(!h->dequant4_coeff[0])
2068 init_dequant_tables(h);
2077 * Mimic alloc_tables(), but for every context thread.
2079 static void clone_tables(H264Context *dst, H264Context *src){
2080 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2081 dst->non_zero_count = src->non_zero_count;
2082 dst->slice_table = src->slice_table;
2083 dst->cbp_table = src->cbp_table;
2084 dst->mb2b_xy = src->mb2b_xy;
2085 dst->mb2b8_xy = src->mb2b8_xy;
2086 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2087 dst->mvd_table[0] = src->mvd_table[0];
2088 dst->mvd_table[1] = src->mvd_table[1];
2089 dst->direct_table = src->direct_table;
2091 dst->s.obmc_scratchpad = NULL;
2092 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2097 * Allocate buffers which are not shared amongst multiple threads.
2099 static int context_init(H264Context *h){
2100 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2105 return -1; // free_tables will clean up for us
2108 static av_cold void common_init(H264Context *h){
2109 MpegEncContext * const s = &h->s;
2111 s->width = s->avctx->width;
2112 s->height = s->avctx->height;
2113 s->codec_id= s->avctx->codec->id;
2115 ff_h264_pred_init(&h->hpc, s->codec_id);
2117 h->dequant_coeff_pps= -1;
2118 s->unrestricted_mv=1;
2119 s->decode=1; //FIXME
2121 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2122 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2125 static av_cold int decode_init(AVCodecContext *avctx){
2126 H264Context *h= avctx->priv_data;
2127 MpegEncContext * const s = &h->s;
2129 MPV_decode_defaults(s);
2134 s->out_format = FMT_H264;
2135 s->workaround_bugs= avctx->workaround_bugs;
2138 // s->decode_mb= ff_h263_decode_mb;
2139 s->quarter_sample = 1;
2142 if(avctx->codec_id == CODEC_ID_SVQ3)
2143 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2145 avctx->pix_fmt= PIX_FMT_YUV420P;
2149 if(avctx->extradata_size > 0 && avctx->extradata &&
2150 *(char *)avctx->extradata == 1){
2157 h->thread_context[0] = h;
2158 h->outputed_poc = INT_MIN;
2159 h->prev_poc_msb= 1<<16;
2163 static int frame_start(H264Context *h){
2164 MpegEncContext * const s = &h->s;
2167 if(MPV_frame_start(s, s->avctx) < 0)
2169 ff_er_frame_start(s);
2171 * MPV_frame_start uses pict_type to derive key_frame.
2172 * This is incorrect for H.264; IDR markings must be used.
2173 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2174 * See decode_nal_units().
2176 s->current_picture_ptr->key_frame= 0;
2178 assert(s->linesize && s->uvlinesize);
2180 for(i=0; i<16; i++){
2181 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2182 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2185 h->block_offset[16+i]=
2186 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2187 h->block_offset[24+16+i]=
2188 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2191 /* can't be in alloc_tables because linesize isn't known there.
2192 * FIXME: redo bipred weight to not require extra buffer? */
2193 for(i = 0; i < s->avctx->thread_count; i++)
2194 if(!h->thread_context[i]->s.obmc_scratchpad)
2195 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2197 /* some macroblocks will be accessed before they're available */
2198 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2199 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2201 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2203 // We mark the current picture as non-reference after allocating it, so
2204 // that if we break out due to an error it can be released automatically
2205 // in the next MPV_frame_start().
2206 // SVQ3 as well as most other codecs have only last/next/current and thus
2207 // get released even with set reference, besides SVQ3 and others do not
2208 // mark frames as reference later "naturally".
2209 if(s->codec_id != CODEC_ID_SVQ3)
2210 s->current_picture_ptr->reference= 0;
2212 s->current_picture_ptr->field_poc[0]=
2213 s->current_picture_ptr->field_poc[1]= INT_MAX;
2214 assert(s->current_picture_ptr->long_ref==0);
2219 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2220 MpegEncContext * const s = &h->s;
2229 src_cb -= uvlinesize;
2230 src_cr -= uvlinesize;
2232 if(!simple && FRAME_MBAFF){
2234 offset = MB_MBAFF ? 1 : 17;
2235 uvoffset= MB_MBAFF ? 1 : 9;
2237 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2238 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2239 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2240 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2246 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2247 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2248 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2249 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2255 top_idx = MB_MBAFF ? 0 : 1;
2257 step= MB_MBAFF ? 2 : 1;
2260 // There are two lines saved, the line above the the top macroblock of a pair,
2261 // and the line above the bottom macroblock
2262 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2263 for(i=1; i<17 - skiplast; i++){
2264 h->left_border[offset+i*step]= src_y[15+i* linesize];
2267 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2268 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2270 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2271 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2272 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2273 for(i=1; i<9 - skiplast; i++){
2274 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2275 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2277 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2278 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2282 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2283 MpegEncContext * const s = &h->s;
2294 if(!simple && FRAME_MBAFF){
2296 offset = MB_MBAFF ? 1 : 17;
2297 uvoffset= MB_MBAFF ? 1 : 9;
2301 top_idx = MB_MBAFF ? 0 : 1;
2303 step= MB_MBAFF ? 2 : 1;
2306 if(h->deblocking_filter == 2) {
2308 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2309 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2311 deblock_left = (s->mb_x > 0);
2312 deblock_top = (s->mb_y > !!MB_FIELD);
2315 src_y -= linesize + 1;
2316 src_cb -= uvlinesize + 1;
2317 src_cr -= uvlinesize + 1;
2319 #define XCHG(a,b,t,xchg)\
2326 for(i = !deblock_top; i<16; i++){
2327 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2329 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2333 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2334 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2335 if(s->mb_x+1 < s->mb_width){
2336 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2340 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2342 for(i = !deblock_top; i<8; i++){
2343 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2344 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2346 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2347 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2350 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2351 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2356 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2357 MpegEncContext * const s = &h->s;
2358 const int mb_x= s->mb_x;
2359 const int mb_y= s->mb_y;
2360 const int mb_xy= h->mb_xy;
2361 const int mb_type= s->current_picture.mb_type[mb_xy];
2362 uint8_t *dest_y, *dest_cb, *dest_cr;
2363 int linesize, uvlinesize /*dct_offset*/;
2365 int *block_offset = &h->block_offset[0];
2366 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2367 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2368 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2369 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2371 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2372 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2373 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2375 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2376 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2378 if (!simple && MB_FIELD) {
2379 linesize = h->mb_linesize = s->linesize * 2;
2380 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2381 block_offset = &h->block_offset[24];
2382 if(mb_y&1){ //FIXME move out of this function?
2383 dest_y -= s->linesize*15;
2384 dest_cb-= s->uvlinesize*7;
2385 dest_cr-= s->uvlinesize*7;
2389 for(list=0; list<h->list_count; list++){
2390 if(!USES_LIST(mb_type, list))
2392 if(IS_16X16(mb_type)){
2393 int8_t *ref = &h->ref_cache[list][scan8[0]];
2394 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2396 for(i=0; i<16; i+=4){
2397 int ref = h->ref_cache[list][scan8[i]];
2399 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2405 linesize = h->mb_linesize = s->linesize;
2406 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2407 // dct_offset = s->linesize * 16;
2410 if (!simple && IS_INTRA_PCM(mb_type)) {
2411 for (i=0; i<16; i++) {
2412 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2414 for (i=0; i<8; i++) {
2415 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2416 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2419 if(IS_INTRA(mb_type)){
2420 if(h->deblocking_filter)
2421 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2423 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2424 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2425 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2428 if(IS_INTRA4x4(mb_type)){
2429 if(simple || !s->encoding){
2430 if(IS_8x8DCT(mb_type)){
2431 if(transform_bypass){
2433 idct_add = s->dsp.add_pixels8;
2435 idct_dc_add = s->dsp.h264_idct8_dc_add;
2436 idct_add = s->dsp.h264_idct8_add;
2438 for(i=0; i<16; i+=4){
2439 uint8_t * const ptr= dest_y + block_offset[i];
2440 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2441 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2442 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2444 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2445 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2446 (h->topright_samples_available<<i)&0x4000, linesize);
2448 if(nnz == 1 && h->mb[i*16])
2449 idct_dc_add(ptr, h->mb + i*16, linesize);
2451 idct_add (ptr, h->mb + i*16, linesize);
2456 if(transform_bypass){
2458 idct_add = s->dsp.add_pixels4;
2460 idct_dc_add = s->dsp.h264_idct_dc_add;
2461 idct_add = s->dsp.h264_idct_add;
2463 for(i=0; i<16; i++){
2464 uint8_t * const ptr= dest_y + block_offset[i];
2465 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2467 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2468 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2472 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2473 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2474 assert(mb_y || linesize <= block_offset[i]);
2475 if(!topright_avail){
2476 tr= ptr[3 - linesize]*0x01010101;
2477 topright= (uint8_t*) &tr;
2479 topright= ptr + 4 - linesize;
2483 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2484 nnz = h->non_zero_count_cache[ scan8[i] ];
2487 if(nnz == 1 && h->mb[i*16])
2488 idct_dc_add(ptr, h->mb + i*16, linesize);
2490 idct_add (ptr, h->mb + i*16, linesize);
2492 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2499 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2501 if(!transform_bypass)
2502 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2504 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2506 if(h->deblocking_filter)
2507 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2509 hl_motion(h, dest_y, dest_cb, dest_cr,
2510 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2511 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2512 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2516 if(!IS_INTRA4x4(mb_type)){
2518 if(IS_INTRA16x16(mb_type)){
2519 if(transform_bypass){
2520 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2521 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2523 for(i=0; i<16; i++){
2524 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2525 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2529 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2531 }else if(h->cbp&15){
2532 if(transform_bypass){
2533 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2534 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2535 for(i=0; i<16; i+=di){
2536 if(h->non_zero_count_cache[ scan8[i] ]){
2537 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2541 if(IS_8x8DCT(mb_type)){
2542 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2544 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2549 for(i=0; i<16; i++){
2550 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2551 uint8_t * const ptr= dest_y + block_offset[i];
2552 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2558 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2559 uint8_t *dest[2] = {dest_cb, dest_cr};
2560 if(transform_bypass){
2561 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2562 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2563 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2565 idct_add = s->dsp.add_pixels4;
2566 for(i=16; i<16+8; i++){
2567 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2568 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2572 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2573 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2575 idct_add = s->dsp.h264_idct_add;
2576 idct_dc_add = s->dsp.h264_idct_dc_add;
2577 for(i=16; i<16+8; i++){
2578 if(h->non_zero_count_cache[ scan8[i] ])
2579 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2580 else if(h->mb[i*16])
2581 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2584 for(i=16; i<16+8; i++){
2585 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2586 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2587 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2594 if(h->deblocking_filter) {
2595 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2596 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2597 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2598 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2599 if (!simple && FRAME_MBAFF) {
2600 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2602 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2608 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2610 static void hl_decode_mb_simple(H264Context *h){
2611 hl_decode_mb_internal(h, 1);
2615 * Process a macroblock; this handles edge cases, such as interlacing.
2617 static void av_noinline hl_decode_mb_complex(H264Context *h){
2618 hl_decode_mb_internal(h, 0);
2621 static void hl_decode_mb(H264Context *h){
2622 MpegEncContext * const s = &h->s;
2623 const int mb_xy= h->mb_xy;
2624 const int mb_type= s->current_picture.mb_type[mb_xy];
2625 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2627 if(ENABLE_H264_ENCODER && !s->decode)
2631 hl_decode_mb_complex(h);
2632 else hl_decode_mb_simple(h);
2635 static void pic_as_field(Picture *pic, const int parity){
2637 for (i = 0; i < 4; ++i) {
2638 if (parity == PICT_BOTTOM_FIELD)
2639 pic->data[i] += pic->linesize[i];
2640 pic->reference = parity;
2641 pic->linesize[i] *= 2;
2643 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2646 static int split_field_copy(Picture *dest, Picture *src,
2647 int parity, int id_add){
2648 int match = !!(src->reference & parity);
2652 if(parity != PICT_FRAME){
2653 pic_as_field(dest, parity);
2655 dest->pic_id += id_add;
2662 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2666 while(i[0]<len || i[1]<len){
2667 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2669 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2672 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2673 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2676 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2677 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2684 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2689 best_poc= dir ? INT_MIN : INT_MAX;
2691 for(i=0; i<len; i++){
2692 const int poc= src[i]->poc;
2693 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2695 sorted[out_i]= src[i];
2698 if(best_poc == (dir ? INT_MIN : INT_MAX))
2700 limit= sorted[out_i++]->poc - dir;
2706 * fills the default_ref_list.
2708 static int fill_default_ref_list(H264Context *h){
2709 MpegEncContext * const s = &h->s;
2712 if(h->slice_type_nos==FF_B_TYPE){
2713 Picture *sorted[32];
2718 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2720 cur_poc= s->current_picture_ptr->poc;
2722 for(list= 0; list<2; list++){
2723 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2724 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2726 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2727 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2730 if(len < h->ref_count[list])
2731 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2735 if(lens[0] == lens[1] && lens[1] > 1){
2736 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2738 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2741 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2742 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2744 if(len < h->ref_count[0])
2745 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2748 for (i=0; i<h->ref_count[0]; i++) {
2749 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2751 if(h->slice_type_nos==FF_B_TYPE){
2752 for (i=0; i<h->ref_count[1]; i++) {
2753 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2760 static void print_short_term(H264Context *h);
2761 static void print_long_term(H264Context *h);
2764 * Extract structure information about the picture described by pic_num in
2765 * the current decoding context (frame or field). Note that pic_num is
2766 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2767 * @param pic_num picture number for which to extract structure information
2768 * @param structure one of PICT_XXX describing structure of picture
2770 * @return frame number (short term) or long term index of picture
2771 * described by pic_num
2773 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2774 MpegEncContext * const s = &h->s;
2776 *structure = s->picture_structure;
2779 /* opposite field */
2780 *structure ^= PICT_FRAME;
2787 static int decode_ref_pic_list_reordering(H264Context *h){
2788 MpegEncContext * const s = &h->s;
2789 int list, index, pic_structure;
2791 print_short_term(h);
2794 for(list=0; list<h->list_count; list++){
2795 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2797 if(get_bits1(&s->gb)){
2798 int pred= h->curr_pic_num;
2800 for(index=0; ; index++){
2801 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2802 unsigned int pic_id;
2804 Picture *ref = NULL;
2806 if(reordering_of_pic_nums_idc==3)
2809 if(index >= h->ref_count[list]){
2810 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2814 if(reordering_of_pic_nums_idc<3){
2815 if(reordering_of_pic_nums_idc<2){
2816 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2819 if(abs_diff_pic_num > h->max_pic_num){
2820 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2824 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2825 else pred+= abs_diff_pic_num;
2826 pred &= h->max_pic_num - 1;
2828 frame_num = pic_num_extract(h, pred, &pic_structure);
2830 for(i= h->short_ref_count-1; i>=0; i--){
2831 ref = h->short_ref[i];
2832 assert(ref->reference);
2833 assert(!ref->long_ref);
2835 ref->frame_num == frame_num &&
2836 (ref->reference & pic_structure)
2844 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2846 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2849 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2852 ref = h->long_ref[long_idx];
2853 assert(!(ref && !ref->reference));
2854 if(ref && (ref->reference & pic_structure)){
2855 ref->pic_id= pic_id;
2856 assert(ref->long_ref);
2864 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2865 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2867 for(i=index; i+1<h->ref_count[list]; i++){
2868 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2871 for(; i > index; i--){
2872 h->ref_list[list][i]= h->ref_list[list][i-1];
2874 h->ref_list[list][index]= *ref;
2876 pic_as_field(&h->ref_list[list][index], pic_structure);
2880 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2886 for(list=0; list<h->list_count; list++){
2887 for(index= 0; index < h->ref_count[list]; index++){
2888 if(!h->ref_list[list][index].data[0]){
2889 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2890 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2898 static void fill_mbaff_ref_list(H264Context *h){
2900 for(list=0; list<2; list++){ //FIXME try list_count
2901 for(i=0; i<h->ref_count[list]; i++){
2902 Picture *frame = &h->ref_list[list][i];
2903 Picture *field = &h->ref_list[list][16+2*i];
2906 field[0].linesize[j] <<= 1;
2907 field[0].reference = PICT_TOP_FIELD;
2908 field[0].poc= field[0].field_poc[0];
2909 field[1] = field[0];
2911 field[1].data[j] += frame->linesize[j];
2912 field[1].reference = PICT_BOTTOM_FIELD;
2913 field[1].poc= field[1].field_poc[1];
2915 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2916 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2918 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2919 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2923 for(j=0; j<h->ref_count[1]; j++){
2924 for(i=0; i<h->ref_count[0]; i++)
2925 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2926 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2927 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2931 static int pred_weight_table(H264Context *h){
2932 MpegEncContext * const s = &h->s;
2934 int luma_def, chroma_def;
2937 h->use_weight_chroma= 0;
2938 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2939 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2940 luma_def = 1<<h->luma_log2_weight_denom;
2941 chroma_def = 1<<h->chroma_log2_weight_denom;
2943 for(list=0; list<2; list++){
2944 for(i=0; i<h->ref_count[list]; i++){
2945 int luma_weight_flag, chroma_weight_flag;
2947 luma_weight_flag= get_bits1(&s->gb);
2948 if(luma_weight_flag){
2949 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2950 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2951 if( h->luma_weight[list][i] != luma_def
2952 || h->luma_offset[list][i] != 0)
2955 h->luma_weight[list][i]= luma_def;
2956 h->luma_offset[list][i]= 0;
2960 chroma_weight_flag= get_bits1(&s->gb);
2961 if(chroma_weight_flag){
2964 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2965 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2966 if( h->chroma_weight[list][i][j] != chroma_def
2967 || h->chroma_offset[list][i][j] != 0)
2968 h->use_weight_chroma= 1;
2973 h->chroma_weight[list][i][j]= chroma_def;
2974 h->chroma_offset[list][i][j]= 0;
2979 if(h->slice_type_nos != FF_B_TYPE) break;
2981 h->use_weight= h->use_weight || h->use_weight_chroma;
2985 static void implicit_weight_table(H264Context *h){
2986 MpegEncContext * const s = &h->s;
2988 int cur_poc = s->current_picture_ptr->poc;
2990 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2991 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2993 h->use_weight_chroma= 0;
2998 h->use_weight_chroma= 2;
2999 h->luma_log2_weight_denom= 5;
3000 h->chroma_log2_weight_denom= 5;
3002 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3003 int poc0 = h->ref_list[0][ref0].poc;
3004 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3005 int poc1 = h->ref_list[1][ref1].poc;
3006 int td = av_clip(poc1 - poc0, -128, 127);
3008 int tb = av_clip(cur_poc - poc0, -128, 127);
3009 int tx = (16384 + (FFABS(td) >> 1)) / td;
3010 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3011 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3012 h->implicit_weight[ref0][ref1] = 32;
3014 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3016 h->implicit_weight[ref0][ref1] = 32;
3022 * Mark a picture as no longer needed for reference. The refmask
3023 * argument allows unreferencing of individual fields or the whole frame.
3024 * If the picture becomes entirely unreferenced, but is being held for
3025 * display purposes, it is marked as such.
3026 * @param refmask mask of fields to unreference; the mask is bitwise
3027 * anded with the reference marking of pic
3028 * @return non-zero if pic becomes entirely unreferenced (except possibly
3029 * for display purposes) zero if one of the fields remains in
3032 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3034 if (pic->reference &= refmask) {
3037 for(i = 0; h->delayed_pic[i]; i++)
3038 if(pic == h->delayed_pic[i]){
3039 pic->reference=DELAYED_PIC_REF;
3047 * instantaneous decoder refresh.
3049 static void idr(H264Context *h){
3052 for(i=0; i<16; i++){
3053 remove_long(h, i, 0);
3055 assert(h->long_ref_count==0);
3057 for(i=0; i<h->short_ref_count; i++){
3058 unreference_pic(h, h->short_ref[i], 0);
3059 h->short_ref[i]= NULL;
3061 h->short_ref_count=0;
3062 h->prev_frame_num= 0;
3063 h->prev_frame_num_offset= 0;
3068 /* forget old pics after a seek */
3069 static void flush_dpb(AVCodecContext *avctx){
3070 H264Context *h= avctx->priv_data;
3072 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3073 if(h->delayed_pic[i])
3074 h->delayed_pic[i]->reference= 0;
3075 h->delayed_pic[i]= NULL;
3077 h->outputed_poc= INT_MIN;
3079 if(h->s.current_picture_ptr)
3080 h->s.current_picture_ptr->reference= 0;
3081 h->s.first_field= 0;
3082 ff_mpeg_flush(avctx);
3086 * Find a Picture in the short term reference list by frame number.
3087 * @param frame_num frame number to search for
3088 * @param idx the index into h->short_ref where returned picture is found
3089 * undefined if no picture found.
3090 * @return pointer to the found picture, or NULL if no pic with the provided
3091 * frame number is found
3093 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3094 MpegEncContext * const s = &h->s;
3097 for(i=0; i<h->short_ref_count; i++){
3098 Picture *pic= h->short_ref[i];
3099 if(s->avctx->debug&FF_DEBUG_MMCO)
3100 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3101 if(pic->frame_num == frame_num) {
3110 * Remove a picture from the short term reference list by its index in
3111 * that list. This does no checking on the provided index; it is assumed
3112 * to be valid. Other list entries are shifted down.
3113 * @param i index into h->short_ref of picture to remove.
3115 static void remove_short_at_index(H264Context *h, int i){
3116 assert(i >= 0 && i < h->short_ref_count);
3117 h->short_ref[i]= NULL;
3118 if (--h->short_ref_count)
3119 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3124 * @return the removed picture or NULL if an error occurs
3126 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3127 MpegEncContext * const s = &h->s;
3131 if(s->avctx->debug&FF_DEBUG_MMCO)
3132 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3134 pic = find_short(h, frame_num, &i);
3136 if(unreference_pic(h, pic, ref_mask))
3137 remove_short_at_index(h, i);
3144 * Remove a picture from the long term reference list by its index in
3146 * @return the removed picture or NULL if an error occurs
3148 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3151 pic= h->long_ref[i];
3153 if(unreference_pic(h, pic, ref_mask)){
3154 assert(h->long_ref[i]->long_ref == 1);
3155 h->long_ref[i]->long_ref= 0;
3156 h->long_ref[i]= NULL;
3157 h->long_ref_count--;
3165 * print short term list
3167 static void print_short_term(H264Context *h) {
3169 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3170 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3171 for(i=0; i<h->short_ref_count; i++){
3172 Picture *pic= h->short_ref[i];
3173 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3179 * print long term list
3181 static void print_long_term(H264Context *h) {
3183 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3184 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3185 for(i = 0; i < 16; i++){
3186 Picture *pic= h->long_ref[i];
3188 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3195 * Executes the reference picture marking (memory management control operations).
3197 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3198 MpegEncContext * const s = &h->s;
3200 int current_ref_assigned=0;
3203 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3204 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3206 for(i=0; i<mmco_count; i++){
3207 int structure, frame_num;
3208 if(s->avctx->debug&FF_DEBUG_MMCO)
3209 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3211 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3212 || mmco[i].opcode == MMCO_SHORT2LONG){
3213 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3214 pic = find_short(h, frame_num, &j);
3216 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3217 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3218 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3223 switch(mmco[i].opcode){
3224 case MMCO_SHORT2UNUSED:
3225 if(s->avctx->debug&FF_DEBUG_MMCO)
3226 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3227 remove_short(h, frame_num, structure ^ PICT_FRAME);
3229 case MMCO_SHORT2LONG:
3230 if (h->long_ref[mmco[i].long_arg] != pic)
3231 remove_long(h, mmco[i].long_arg, 0);
3233 remove_short_at_index(h, j);
3234 h->long_ref[ mmco[i].long_arg ]= pic;
3235 if (h->long_ref[ mmco[i].long_arg ]){
3236 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3237 h->long_ref_count++;
3240 case MMCO_LONG2UNUSED:
3241 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3242 pic = h->long_ref[j];
3244 remove_long(h, j, structure ^ PICT_FRAME);
3245 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3246 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3249 // Comment below left from previous code as it is an interresting note.
3250 /* First field in pair is in short term list or
3251 * at a different long term index.
3252 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3253 * Report the problem and keep the pair where it is,
3254 * and mark this field valid.
3257 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3258 remove_long(h, mmco[i].long_arg, 0);
3260 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3261 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3262 h->long_ref_count++;
3265 s->current_picture_ptr->reference |= s->picture_structure;
3266 current_ref_assigned=1;
3268 case MMCO_SET_MAX_LONG:
3269 assert(mmco[i].long_arg <= 16);
3270 // just remove the long term which index is greater than new max
3271 for(j = mmco[i].long_arg; j<16; j++){
3272 remove_long(h, j, 0);
3276 while(h->short_ref_count){
3277 remove_short(h, h->short_ref[0]->frame_num, 0);
3279 for(j = 0; j < 16; j++) {
3280 remove_long(h, j, 0);
3282 s->current_picture_ptr->poc=
3283 s->current_picture_ptr->field_poc[0]=
3284 s->current_picture_ptr->field_poc[1]=
3288 s->current_picture_ptr->frame_num= 0;
3294 if (!current_ref_assigned) {
3295 /* Second field of complementary field pair; the first field of
3296 * which is already referenced. If short referenced, it
3297 * should be first entry in short_ref. If not, it must exist
3298 * in long_ref; trying to put it on the short list here is an
3299 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3301 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3302 /* Just mark the second field valid */
3303 s->current_picture_ptr->reference = PICT_FRAME;
3304 } else if (s->current_picture_ptr->long_ref) {
3305 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3306 "assignment for second field "
3307 "in complementary field pair "
3308 "(first field is long term)\n");
3310 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3312 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3315 if(h->short_ref_count)
3316 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3318 h->short_ref[0]= s->current_picture_ptr;
3319 h->short_ref_count++;
3320 s->current_picture_ptr->reference |= s->picture_structure;
3324 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3326 /* We have too many reference frames, probably due to corrupted
3327 * stream. Need to discard one frame. Prevents overrun of the
3328 * short_ref and long_ref buffers.
3330 av_log(h->s.avctx, AV_LOG_ERROR,
3331 "number of reference frames exceeds max (probably "
3332 "corrupt input), discarding one\n");
3334 if (h->long_ref_count && !h->short_ref_count) {
3335 for (i = 0; i < 16; ++i)
3340 remove_long(h, i, 0);
3342 pic = h->short_ref[h->short_ref_count - 1];
3343 remove_short(h, pic->frame_num, 0);
3347 print_short_term(h);
3352 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3353 MpegEncContext * const s = &h->s;
3357 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3358 s->broken_link= get_bits1(gb) -1;
3360 h->mmco[0].opcode= MMCO_LONG;
3361 h->mmco[0].long_arg= 0;
3365 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3366 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3367 MMCOOpcode opcode= get_ue_golomb(gb);
3369 h->mmco[i].opcode= opcode;
3370 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3371 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3372 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3373 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3377 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3378 unsigned int long_arg= get_ue_golomb(gb);
3379 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3380 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3383 h->mmco[i].long_arg= long_arg;
3386 if(opcode > (unsigned)MMCO_LONG){
3387 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3390 if(opcode == MMCO_END)
3395 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3397 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3398 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3399 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3400 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3402 if (FIELD_PICTURE) {
3403 h->mmco[0].short_pic_num *= 2;
3404 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3405 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3415 static int init_poc(H264Context *h){
3416 MpegEncContext * const s = &h->s;
3417 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3419 Picture *cur = s->current_picture_ptr;
3421 h->frame_num_offset= h->prev_frame_num_offset;
3422 if(h->frame_num < h->prev_frame_num)
3423 h->frame_num_offset += max_frame_num;
3425 if(h->sps.poc_type==0){
3426 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3428 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3429 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3430 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3431 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3433 h->poc_msb = h->prev_poc_msb;
3434 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3436 field_poc[1] = h->poc_msb + h->poc_lsb;
3437 if(s->picture_structure == PICT_FRAME)
3438 field_poc[1] += h->delta_poc_bottom;
3439 }else if(h->sps.poc_type==1){
3440 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3443 if(h->sps.poc_cycle_length != 0)
3444 abs_frame_num = h->frame_num_offset + h->frame_num;
3448 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3451 expected_delta_per_poc_cycle = 0;
3452 for(i=0; i < h->sps.poc_cycle_length; i++)
3453 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3455 if(abs_frame_num > 0){
3456 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3457 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3459 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3460 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3461 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3465 if(h->nal_ref_idc == 0)
3466 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3468 field_poc[0] = expectedpoc + h->delta_poc[0];
3469 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3471 if(s->picture_structure == PICT_FRAME)
3472 field_poc[1] += h->delta_poc[1];
3474 int poc= 2*(h->frame_num_offset + h->frame_num);
3483 if(s->picture_structure != PICT_BOTTOM_FIELD)
3484 s->current_picture_ptr->field_poc[0]= field_poc[0];
3485 if(s->picture_structure != PICT_TOP_FIELD)
3486 s->current_picture_ptr->field_poc[1]= field_poc[1];
3487 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3494 * initialize scan tables
3496 static void init_scan_tables(H264Context *h){
3497 MpegEncContext * const s = &h->s;
3499 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3500 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3501 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3503 for(i=0; i<16; i++){
3504 #define T(x) (x>>2) | ((x<<2) & 0xF)
3505 h->zigzag_scan[i] = T(zigzag_scan[i]);
3506 h-> field_scan[i] = T( field_scan[i]);
3510 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3511 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3512 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3513 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3514 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3516 for(i=0; i<64; i++){
3517 #define T(x) (x>>3) | ((x&7)<<3)
3518 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3519 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3520 h->field_scan8x8[i] = T(field_scan8x8[i]);
3521 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3525 if(h->sps.transform_bypass){ //FIXME same ugly
3526 h->zigzag_scan_q0 = zigzag_scan;
3527 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3528 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3529 h->field_scan_q0 = field_scan;
3530 h->field_scan8x8_q0 = field_scan8x8;
3531 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3533 h->zigzag_scan_q0 = h->zigzag_scan;
3534 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3535 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3536 h->field_scan_q0 = h->field_scan;
3537 h->field_scan8x8_q0 = h->field_scan8x8;
3538 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3543 * Replicates H264 "master" context to thread contexts.
3545 static void clone_slice(H264Context *dst, H264Context *src)
3547 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3548 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3549 dst->s.current_picture = src->s.current_picture;
3550 dst->s.linesize = src->s.linesize;
3551 dst->s.uvlinesize = src->s.uvlinesize;
3552 dst->s.first_field = src->s.first_field;
3554 dst->prev_poc_msb = src->prev_poc_msb;
3555 dst->prev_poc_lsb = src->prev_poc_lsb;
3556 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3557 dst->prev_frame_num = src->prev_frame_num;
3558 dst->short_ref_count = src->short_ref_count;
3560 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3561 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3562 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3563 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3565 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3566 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3570 * decodes a slice header.
3571 * This will also call MPV_common_init() and frame_start() as needed.
3573 * @param h h264context
3574 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3576 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3578 static int decode_slice_header(H264Context *h, H264Context *h0){
3579 MpegEncContext * const s = &h->s;
3580 MpegEncContext * const s0 = &h0->s;
3581 unsigned int first_mb_in_slice;
3582 unsigned int pps_id;
3583 int num_ref_idx_active_override_flag;
3584 unsigned int slice_type, tmp, i, j;
3585 int default_ref_list_done = 0;
3586 int last_pic_structure;
3588 s->dropable= h->nal_ref_idc == 0;
3590 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3591 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3592 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3594 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3595 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3598 first_mb_in_slice= get_ue_golomb(&s->gb);
3600 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3601 h0->current_slice = 0;
3602 if (!s0->first_field)
3603 s->current_picture_ptr= NULL;
3606 slice_type= get_ue_golomb(&s->gb);
3608 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3613 h->slice_type_fixed=1;
3615 h->slice_type_fixed=0;
3617 slice_type= golomb_to_pict_type[ slice_type ];
3618 if (slice_type == FF_I_TYPE
3619 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3620 default_ref_list_done = 1;
3622 h->slice_type= slice_type;
3623 h->slice_type_nos= slice_type & 3;
3625 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3626 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3627 av_log(h->s.avctx, AV_LOG_ERROR,
3628 "B picture before any references, skipping\n");
3632 pps_id= get_ue_golomb(&s->gb);
3633 if(pps_id>=MAX_PPS_COUNT){
3634 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3637 if(!h0->pps_buffers[pps_id]) {
3638 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3641 h->pps= *h0->pps_buffers[pps_id];
3643 if(!h0->sps_buffers[h->pps.sps_id]) {
3644 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3647 h->sps = *h0->sps_buffers[h->pps.sps_id];
3649 if(h == h0 && h->dequant_coeff_pps != pps_id){
3650 h->dequant_coeff_pps = pps_id;
3651 init_dequant_tables(h);
3654 s->mb_width= h->sps.mb_width;
3655 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3657 h->b_stride= s->mb_width*4;
3658 h->b8_stride= s->mb_width*2;
3660 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3661 if(h->sps.frame_mbs_only_flag)
3662 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3664 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3666 if (s->context_initialized
3667 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3669 return -1; // width / height changed during parallelized decoding
3671 flush_dpb(s->avctx);
3674 if (!s->context_initialized) {
3676 return -1; // we cant (re-)initialize context during parallel decoding
3677 if (MPV_common_init(s) < 0)
3681 init_scan_tables(h);
3684 for(i = 1; i < s->avctx->thread_count; i++) {
3686 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3687 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3688 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3691 init_scan_tables(c);
3695 for(i = 0; i < s->avctx->thread_count; i++)
3696 if(context_init(h->thread_context[i]) < 0)
3699 s->avctx->width = s->width;
3700 s->avctx->height = s->height;
3701 s->avctx->sample_aspect_ratio= h->sps.sar;
3702 if(!s->avctx->sample_aspect_ratio.den)
3703 s->avctx->sample_aspect_ratio.den = 1;
3705 if(h->sps.timing_info_present_flag){
3706 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3707 if(h->x264_build > 0 && h->x264_build < 44)
3708 s->avctx->time_base.den *= 2;
3709 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3710 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3714 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3717 h->mb_aff_frame = 0;
3718 last_pic_structure = s0->picture_structure;
3719 if(h->sps.frame_mbs_only_flag){
3720 s->picture_structure= PICT_FRAME;
3722 if(get_bits1(&s->gb)) { //field_pic_flag
3723 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3725 s->picture_structure= PICT_FRAME;
3726 h->mb_aff_frame = h->sps.mb_aff;
3729 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3731 if(h0->current_slice == 0){
3732 while(h->frame_num != h->prev_frame_num &&
3733 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3734 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3736 h->prev_frame_num++;
3737 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3738 s->current_picture_ptr->frame_num= h->prev_frame_num;
3739 execute_ref_pic_marking(h, NULL, 0);
3742 /* See if we have a decoded first field looking for a pair... */
3743 if (s0->first_field) {
3744 assert(s0->current_picture_ptr);
3745 assert(s0->current_picture_ptr->data[0]);
3746 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3748 /* figure out if we have a complementary field pair */
3749 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3751 * Previous field is unmatched. Don't display it, but let it
3752 * remain for reference if marked as such.
3754 s0->current_picture_ptr = NULL;
3755 s0->first_field = FIELD_PICTURE;
3758 if (h->nal_ref_idc &&
3759 s0->current_picture_ptr->reference &&
3760 s0->current_picture_ptr->frame_num != h->frame_num) {
3762 * This and previous field were reference, but had
3763 * different frame_nums. Consider this field first in
3764 * pair. Throw away previous field except for reference
3767 s0->first_field = 1;
3768 s0->current_picture_ptr = NULL;
3771 /* Second field in complementary pair */
3772 s0->first_field = 0;
3777 /* Frame or first field in a potentially complementary pair */
3778 assert(!s0->current_picture_ptr);
3779 s0->first_field = FIELD_PICTURE;
3782 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3783 s0->first_field = 0;
3790 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3792 assert(s->mb_num == s->mb_width * s->mb_height);
3793 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3794 first_mb_in_slice >= s->mb_num){
3795 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3798 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3799 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3800 if (s->picture_structure == PICT_BOTTOM_FIELD)
3801 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3802 assert(s->mb_y < s->mb_height);
3804 if(s->picture_structure==PICT_FRAME){
3805 h->curr_pic_num= h->frame_num;
3806 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3808 h->curr_pic_num= 2*h->frame_num + 1;
3809 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3812 if(h->nal_unit_type == NAL_IDR_SLICE){
3813 get_ue_golomb(&s->gb); /* idr_pic_id */
3816 if(h->sps.poc_type==0){
3817 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3819 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3820 h->delta_poc_bottom= get_se_golomb(&s->gb);
3824 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3825 h->delta_poc[0]= get_se_golomb(&s->gb);
3827 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3828 h->delta_poc[1]= get_se_golomb(&s->gb);
3833 if(h->pps.redundant_pic_cnt_present){
3834 h->redundant_pic_count= get_ue_golomb(&s->gb);
3837 //set defaults, might be overridden a few lines later
3838 h->ref_count[0]= h->pps.ref_count[0];
3839 h->ref_count[1]= h->pps.ref_count[1];
3841 if(h->slice_type_nos != FF_I_TYPE){
3842 if(h->slice_type_nos == FF_B_TYPE){
3843 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3845 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3847 if(num_ref_idx_active_override_flag){
3848 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3849 if(h->slice_type_nos==FF_B_TYPE)
3850 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3852 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3853 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3854 h->ref_count[0]= h->ref_count[1]= 1;
3858 if(h->slice_type_nos == FF_B_TYPE)
3865 if(!default_ref_list_done){
3866 fill_default_ref_list(h);
3869 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3872 if(h->slice_type_nos!=FF_I_TYPE){
3873 s->last_picture_ptr= &h->ref_list[0][0];
3874 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3876 if(h->slice_type_nos==FF_B_TYPE){
3877 s->next_picture_ptr= &h->ref_list[1][0];
3878 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3881 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3882 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3883 pred_weight_table(h);
3884 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3885 implicit_weight_table(h);
3890 decode_ref_pic_marking(h0, &s->gb);
3893 fill_mbaff_ref_list(h);
3895 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3896 direct_dist_scale_factor(h);
3897 direct_ref_list_init(h);
3899 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3900 tmp = get_ue_golomb(&s->gb);
3902 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3905 h->cabac_init_idc= tmp;
3908 h->last_qscale_diff = 0;
3909 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3911 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3915 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3916 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3917 //FIXME qscale / qp ... stuff
3918 if(h->slice_type == FF_SP_TYPE){
3919 get_bits1(&s->gb); /* sp_for_switch_flag */
3921 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3922 get_se_golomb(&s->gb); /* slice_qs_delta */
3925 h->deblocking_filter = 1;
3926 h->slice_alpha_c0_offset = 0;
3927 h->slice_beta_offset = 0;
3928 if( h->pps.deblocking_filter_parameters_present ) {
3929 tmp= get_ue_golomb(&s->gb);
3931 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3934 h->deblocking_filter= tmp;
3935 if(h->deblocking_filter < 2)
3936 h->deblocking_filter^= 1; // 1<->0
3938 if( h->deblocking_filter ) {
3939 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3940 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3944 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3945 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3946 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3947 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3948 h->deblocking_filter= 0;
3950 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3951 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3952 /* Cheat slightly for speed:
3953 Do not bother to deblock across slices. */
3954 h->deblocking_filter = 2;
3956 h0->max_contexts = 1;
3957 if(!h0->single_decode_warning) {
3958 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3959 h0->single_decode_warning = 1;
3962 return 1; // deblocking switched inside frame
3967 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3968 slice_group_change_cycle= get_bits(&s->gb, ?);
3971 h0->last_slice_type = slice_type;
3972 h->slice_num = ++h0->current_slice;
3973 if(h->slice_num >= MAX_SLICES){
3974 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3978 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3982 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3983 +(h->ref_list[j][i].reference&3);
3986 for(i=16; i<48; i++)
3987 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3988 +(h->ref_list[j][i].reference&3);
3991 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3992 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3994 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3995 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3997 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3999 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4000 pps_id, h->frame_num,
4001 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4002 h->ref_count[0], h->ref_count[1],
4004 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4006 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4007 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4017 static inline int get_level_prefix(GetBitContext *gb){
4021 OPEN_READER(re, gb);
4022 UPDATE_CACHE(re, gb);
4023 buf=GET_CACHE(re, gb);
4025 log= 32 - av_log2(buf);
4027 print_bin(buf>>(32-log), log);
4028 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4031 LAST_SKIP_BITS(re, gb, log);
4032 CLOSE_READER(re, gb);
4037 static inline int get_dct8x8_allowed(H264Context *h){
4040 if(!IS_SUB_8X8(h->sub_mb_type[i])
4041 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4048 * decodes a residual block.
4049 * @param n block index
4050 * @param scantable scantable
4051 * @param max_coeff number of coefficients in the block
4052 * @return <0 if an error occurred
4054 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4055 MpegEncContext * const s = &h->s;
4056 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4058 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4060 //FIXME put trailing_onex into the context
4062 if(n == CHROMA_DC_BLOCK_INDEX){
4063 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4064 total_coeff= coeff_token>>2;
4066 if(n == LUMA_DC_BLOCK_INDEX){
4067 total_coeff= pred_non_zero_count(h, 0);
4068 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4069 total_coeff= coeff_token>>2;
4071 total_coeff= pred_non_zero_count(h, n);
4072 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4073 total_coeff= coeff_token>>2;
4074 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4078 //FIXME set last_non_zero?
4082 if(total_coeff > (unsigned)max_coeff) {
4083 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4087 trailing_ones= coeff_token&3;
4088 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4089 assert(total_coeff<=16);
4091 i = show_bits(gb, 3);
4092 skip_bits(gb, trailing_ones);
4093 level[0] = 1-((i&4)>>1);
4094 level[1] = 1-((i&2) );
4095 level[2] = 1-((i&1)<<1);
4097 if(trailing_ones<total_coeff) {
4098 int level_code, mask;
4099 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4100 int prefix= get_level_prefix(gb);
4102 //first coefficient has suffix_length equal to 0 or 1
4103 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4105 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4107 level_code= (prefix<<suffix_length); //part
4108 }else if(prefix==14){
4110 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4112 level_code= prefix + get_bits(gb, 4); //part
4114 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4115 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4117 level_code += (1<<(prefix-3))-4096;
4120 if(trailing_ones < 3) level_code += 2;
4125 mask= -(level_code&1);
4126 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4128 //remaining coefficients have suffix_length > 0
4129 for(i=trailing_ones+1;i<total_coeff;i++) {
4130 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4131 prefix = get_level_prefix(gb);
4133 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4135 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4137 level_code += (1<<(prefix-3))-4096;
4139 mask= -(level_code&1);
4140 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4141 if(level_code > suffix_limit[suffix_length])
4146 if(total_coeff == max_coeff)
4149 if(n == CHROMA_DC_BLOCK_INDEX)
4150 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4152 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4155 coeff_num = zeros_left + total_coeff - 1;
4156 j = scantable[coeff_num];
4158 block[j] = level[0];
4159 for(i=1;i<total_coeff;i++) {
4162 else if(zeros_left < 7){
4163 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4165 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4167 zeros_left -= run_before;
4168 coeff_num -= 1 + run_before;
4169 j= scantable[ coeff_num ];
4174 block[j] = (level[0] * qmul[j] + 32)>>6;
4175 for(i=1;i<total_coeff;i++) {
4178 else if(zeros_left < 7){
4179 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4181 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4183 zeros_left -= run_before;
4184 coeff_num -= 1 + run_before;
4185 j= scantable[ coeff_num ];
4187 block[j]= (level[i] * qmul[j] + 32)>>6;
4192 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4199 static void predict_field_decoding_flag(H264Context *h){
4200 MpegEncContext * const s = &h->s;
4201 const int mb_xy= h->mb_xy;
4202 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4203 ? s->current_picture.mb_type[mb_xy-1]
4204 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4205 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4207 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4211 * decodes a P_SKIP or B_SKIP macroblock
4213 static void decode_mb_skip(H264Context *h){
4214 MpegEncContext * const s = &h->s;
4215 const int mb_xy= h->mb_xy;
4218 memset(h->non_zero_count[mb_xy], 0, 16);
4219 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4222 mb_type|= MB_TYPE_INTERLACED;
4224 if( h->slice_type_nos == FF_B_TYPE )
4226 // just for fill_caches. pred_direct_motion will set the real mb_type
4227 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4229 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4230 pred_direct_motion(h, &mb_type);
4231 mb_type|= MB_TYPE_SKIP;
4236 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4238 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4239 pred_pskip_motion(h, &mx, &my);
4240 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4241 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4244 write_back_motion(h, mb_type);
4245 s->current_picture.mb_type[mb_xy]= mb_type;
4246 s->current_picture.qscale_table[mb_xy]= s->qscale;
4247 h->slice_table[ mb_xy ]= h->slice_num;
4248 h->prev_mb_skipped= 1;
4252 * decodes a macroblock
4253 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4255 static int decode_mb_cavlc(H264Context *h){
4256 MpegEncContext * const s = &h->s;
4258 int partition_count;
4259 unsigned int mb_type, cbp;
4260 int dct8x8_allowed= h->pps.transform_8x8_mode;
4262 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4264 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4266 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4267 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4269 if(h->slice_type_nos != FF_I_TYPE){
4270 if(s->mb_skip_run==-1)
4271 s->mb_skip_run= get_ue_golomb(&s->gb);
4273 if (s->mb_skip_run--) {
4274 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4275 if(s->mb_skip_run==0)
4276 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4278 predict_field_decoding_flag(h);
4285 if( (s->mb_y&1) == 0 )
4286 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4289 h->prev_mb_skipped= 0;
4291 mb_type= get_ue_golomb(&s->gb);
4292 if(h->slice_type_nos == FF_B_TYPE){
4294 partition_count= b_mb_type_info[mb_type].partition_count;
4295 mb_type= b_mb_type_info[mb_type].type;
4298 goto decode_intra_mb;
4300 }else if(h->slice_type_nos == FF_P_TYPE){
4302 partition_count= p_mb_type_info[mb_type].partition_count;
4303 mb_type= p_mb_type_info[mb_type].type;
4306 goto decode_intra_mb;
4309 assert(h->slice_type_nos == FF_I_TYPE);
4310 if(h->slice_type == FF_SI_TYPE && mb_type)
4314 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4318 cbp= i_mb_type_info[mb_type].cbp;
4319 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4320 mb_type= i_mb_type_info[mb_type].type;
4324 mb_type |= MB_TYPE_INTERLACED;
4326 h->slice_table[ mb_xy ]= h->slice_num;
4328 if(IS_INTRA_PCM(mb_type)){
4331 // We assume these blocks are very rare so we do not optimize it.
4332 align_get_bits(&s->gb);
4334 // The pixels are stored in the same order as levels in h->mb array.
4335 for(x=0; x < (CHROMA ? 384 : 256); x++){
4336 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4339 // In deblocking, the quantizer is 0
4340 s->current_picture.qscale_table[mb_xy]= 0;
4341 // All coeffs are present
4342 memset(h->non_zero_count[mb_xy], 16, 16);
4344 s->current_picture.mb_type[mb_xy]= mb_type;
4349 h->ref_count[0] <<= 1;
4350 h->ref_count[1] <<= 1;
4353 fill_caches(h, mb_type, 0);
4356 if(IS_INTRA(mb_type)){
4358 // init_top_left_availability(h);
4359 if(IS_INTRA4x4(mb_type)){
4362 if(dct8x8_allowed && get_bits1(&s->gb)){
4363 mb_type |= MB_TYPE_8x8DCT;
4367 // fill_intra4x4_pred_table(h);
4368 for(i=0; i<16; i+=di){
4369 int mode= pred_intra_mode(h, i);
4371 if(!get_bits1(&s->gb)){
4372 const int rem_mode= get_bits(&s->gb, 3);
4373 mode = rem_mode + (rem_mode >= mode);
4377 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4379 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4381 write_back_intra_pred_mode(h);
4382 if( check_intra4x4_pred_mode(h) < 0)
4385 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4386 if(h->intra16x16_pred_mode < 0)
4390 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4393 h->chroma_pred_mode= pred_mode;
4395 }else if(partition_count==4){
4396 int i, j, sub_partition_count[4], list, ref[2][4];
4398 if(h->slice_type_nos == FF_B_TYPE){
4400 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4401 if(h->sub_mb_type[i] >=13){
4402 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4405 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4406 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4408 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4409 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4410 pred_direct_motion(h, &mb_type);
4411 h->ref_cache[0][scan8[4]] =
4412 h->ref_cache[1][scan8[4]] =
4413 h->ref_cache[0][scan8[12]] =
4414 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4417 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4419 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4420 if(h->sub_mb_type[i] >=4){
4421 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4424 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4425 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4429 for(list=0; list<h->list_count; list++){
4430 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4432 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4433 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4434 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4436 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4448 dct8x8_allowed = get_dct8x8_allowed(h);
4450 for(list=0; list<h->list_count; list++){
4452 if(IS_DIRECT(h->sub_mb_type[i])) {
4453 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4456 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4457 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4459 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4460 const int sub_mb_type= h->sub_mb_type[i];
4461 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4462 for(j=0; j<sub_partition_count[i]; j++){
4464 const int index= 4*i + block_width*j;
4465 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4466 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4467 mx += get_se_golomb(&s->gb);
4468 my += get_se_golomb(&s->gb);
4469 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4471 if(IS_SUB_8X8(sub_mb_type)){
4473 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4475 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4476 }else if(IS_SUB_8X4(sub_mb_type)){
4477 mv_cache[ 1 ][0]= mx;
4478 mv_cache[ 1 ][1]= my;
4479 }else if(IS_SUB_4X8(sub_mb_type)){
4480 mv_cache[ 8 ][0]= mx;
4481 mv_cache[ 8 ][1]= my;
4483 mv_cache[ 0 ][0]= mx;
4484 mv_cache[ 0 ][1]= my;
4487 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4493 }else if(IS_DIRECT(mb_type)){
4494 pred_direct_motion(h, &mb_type);
4495 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4497 int list, mx, my, i;
4498 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4499 if(IS_16X16(mb_type)){
4500 for(list=0; list<h->list_count; list++){
4502 if(IS_DIR(mb_type, 0, list)){
4503 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4504 if(val >= h->ref_count[list]){
4505 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4509 val= LIST_NOT_USED&0xFF;
4510 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4512 for(list=0; list<h->list_count; list++){
4514 if(IS_DIR(mb_type, 0, list)){
4515 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4516 mx += get_se_golomb(&s->gb);
4517 my += get_se_golomb(&s->gb);
4518 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4520 val= pack16to32(mx,my);
4523 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4526 else if(IS_16X8(mb_type)){
4527 for(list=0; list<h->list_count; list++){
4530 if(IS_DIR(mb_type, i, list)){
4531 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4532 if(val >= h->ref_count[list]){
4533 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4537 val= LIST_NOT_USED&0xFF;
4538 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4541 for(list=0; list<h->list_count; list++){
4544 if(IS_DIR(mb_type, i, list)){
4545 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4546 mx += get_se_golomb(&s->gb);
4547 my += get_se_golomb(&s->gb);
4548 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4550 val= pack16to32(mx,my);
4553 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4557 assert(IS_8X16(mb_type));
4558 for(list=0; list<h->list_count; list++){
4561 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4562 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4563 if(val >= h->ref_count[list]){
4564 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4568 val= LIST_NOT_USED&0xFF;
4569 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4572 for(list=0; list<h->list_count; list++){
4575 if(IS_DIR(mb_type, i, list)){
4576 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4577 mx += get_se_golomb(&s->gb);
4578 my += get_se_golomb(&s->gb);
4579 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4581 val= pack16to32(mx,my);
4584 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4590 if(IS_INTER(mb_type))
4591 write_back_motion(h, mb_type);
4593 if(!IS_INTRA16x16(mb_type)){
4594 cbp= get_ue_golomb(&s->gb);
4596 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4601 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4602 else cbp= golomb_to_inter_cbp [cbp];
4604 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4605 else cbp= golomb_to_inter_cbp_gray[cbp];
4610 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4611 if(get_bits1(&s->gb)){
4612 mb_type |= MB_TYPE_8x8DCT;
4613 h->cbp_table[mb_xy]= cbp;
4616 s->current_picture.mb_type[mb_xy]= mb_type;
4618 if(cbp || IS_INTRA16x16(mb_type)){
4619 int i8x8, i4x4, chroma_idx;
4621 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4622 const uint8_t *scan, *scan8x8, *dc_scan;
4624 // fill_non_zero_count_cache(h);
4626 if(IS_INTERLACED(mb_type)){
4627 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4628 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4629 dc_scan= luma_dc_field_scan;
4631 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4632 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4633 dc_scan= luma_dc_zigzag_scan;
4636 dquant= get_se_golomb(&s->gb);
4638 if( dquant > 25 || dquant < -26 ){
4639 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4643 s->qscale += dquant;
4644 if(((unsigned)s->qscale) > 51){
4645 if(s->qscale<0) s->qscale+= 52;
4646 else s->qscale-= 52;
4649 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4650 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4651 if(IS_INTRA16x16(mb_type)){
4652 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4653 return -1; //FIXME continue if partitioned and other return -1 too
4656 assert((cbp&15) == 0 || (cbp&15) == 15);
4659 for(i8x8=0; i8x8<4; i8x8++){
4660 for(i4x4=0; i4x4<4; i4x4++){
4661 const int index= i4x4 + 4*i8x8;
4662 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4668 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4671 for(i8x8=0; i8x8<4; i8x8++){
4672 if(cbp & (1<<i8x8)){
4673 if(IS_8x8DCT(mb_type)){
4674 DCTELEM *buf = &h->mb[64*i8x8];
4676 for(i4x4=0; i4x4<4; i4x4++){
4677 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4678 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4681 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4682 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4684 for(i4x4=0; i4x4<4; i4x4++){
4685 const int index= i4x4 + 4*i8x8;
4687 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4693 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4694 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4700 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4701 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4707 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4708 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4709 for(i4x4=0; i4x4<4; i4x4++){
4710 const int index= 16 + 4*chroma_idx + i4x4;
4711 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4717 uint8_t * const nnz= &h->non_zero_count_cache[0];
4718 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4719 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4722 uint8_t * const nnz= &h->non_zero_count_cache[0];
4723 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4724 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4725 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4727 s->current_picture.qscale_table[mb_xy]= s->qscale;
4728 write_back_non_zero_count(h);
4731 h->ref_count[0] >>= 1;
4732 h->ref_count[1] >>= 1;
4738 static int decode_cabac_field_decoding_flag(H264Context *h) {
4739 MpegEncContext * const s = &h->s;
4740 const int mb_x = s->mb_x;
4741 const int mb_y = s->mb_y & ~1;
4742 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4743 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4745 unsigned int ctx = 0;
4747 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4750 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4754 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4757 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4758 uint8_t *state= &h->cabac_state[ctx_base];
4762 MpegEncContext * const s = &h->s;
4763 const int mba_xy = h->left_mb_xy[0];
4764 const int mbb_xy = h->top_mb_xy;
4766 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4768 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4770 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4771 return 0; /* I4x4 */
4774 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4775 return 0; /* I4x4 */
4778 if( get_cabac_terminate( &h->cabac ) )
4779 return 25; /* PCM */
4781 mb_type = 1; /* I16x16 */
4782 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4783 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4784 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4785 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4786 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4790 static int decode_cabac_mb_type( H264Context *h ) {
4791 MpegEncContext * const s = &h->s;
4793 if( h->slice_type_nos == FF_I_TYPE ) {
4794 return decode_cabac_intra_mb_type(h, 3, 1);
4795 } else if( h->slice_type_nos == FF_P_TYPE ) {
4796 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4798 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4799 /* P_L0_D16x16, P_8x8 */
4800 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4802 /* P_L0_D8x16, P_L0_D16x8 */
4803 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4806 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4809 const int mba_xy = h->left_mb_xy[0];
4810 const int mbb_xy = h->top_mb_xy;
4813 assert(h->slice_type_nos == FF_B_TYPE);
4815 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4817 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4820 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4821 return 0; /* B_Direct_16x16 */
4823 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4824 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4827 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4828 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4829 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4830 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4832 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4833 else if( bits == 13 ) {
4834 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4835 } else if( bits == 14 )
4836 return 11; /* B_L1_L0_8x16 */
4837 else if( bits == 15 )
4838 return 22; /* B_8x8 */
4840 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4841 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4845 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4846 MpegEncContext * const s = &h->s;
4850 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4851 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4854 && h->slice_table[mba_xy] == h->slice_num
4855 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4856 mba_xy += s->mb_stride;
4858 mbb_xy = mb_xy - s->mb_stride;
4860 && h->slice_table[mbb_xy] == h->slice_num
4861 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4862 mbb_xy -= s->mb_stride;
4864 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4866 int mb_xy = h->mb_xy;
4868 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4871 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4873 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4876 if( h->slice_type_nos == FF_B_TYPE )
4878 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4881 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4884 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4887 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4888 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4889 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4891 if( mode >= pred_mode )
4897 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4898 const int mba_xy = h->left_mb_xy[0];
4899 const int mbb_xy = h->top_mb_xy;
4903 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4904 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4907 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4910 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4913 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4915 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4921 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4922 int cbp_b, cbp_a, ctx, cbp = 0;
4924 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4925 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4927 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4928 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4929 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4930 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4931 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4932 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4933 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4934 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4937 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4941 cbp_a = (h->left_cbp>>4)&0x03;
4942 cbp_b = (h-> top_cbp>>4)&0x03;
4945 if( cbp_a > 0 ) ctx++;
4946 if( cbp_b > 0 ) ctx += 2;
4947 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4951 if( cbp_a == 2 ) ctx++;
4952 if( cbp_b == 2 ) ctx += 2;
4953 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4955 static int decode_cabac_mb_dqp( H264Context *h) {
4959 if( h->last_qscale_diff != 0 )
4962 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4968 if(val > 102) //prevent infinite loop
4975 return -(val + 1)/2;
4977 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4978 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4980 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4982 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4986 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4988 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4989 return 0; /* B_Direct_8x8 */
4990 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4991 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4993 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4994 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4995 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4998 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4999 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5003 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5004 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5007 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5008 int refa = h->ref_cache[list][scan8[n] - 1];
5009 int refb = h->ref_cache[list][scan8[n] - 8];
5013 if( h->slice_type_nos == FF_B_TYPE) {
5014 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5016 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5025 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5031 if(ref >= 32 /*h->ref_list[list]*/){
5038 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5039 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5040 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5041 int ctxbase = (l == 0) ? 40 : 47;
5043 int ctx = (amvd>2) + (amvd>32);
5045 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5050 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5058 while( get_cabac_bypass( &h->cabac ) ) {
5062 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5067 if( get_cabac_bypass( &h->cabac ) )
5071 return get_cabac_bypass_sign( &h->cabac, -mvd );
5074 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5080 nza = h->left_cbp&0x100;
5081 nzb = h-> top_cbp&0x100;
5083 nza = (h->left_cbp>>(6+idx))&0x01;
5084 nzb = (h-> top_cbp>>(6+idx))&0x01;
5087 assert(cat == 1 || cat == 2 || cat == 4);
5088 nza = h->non_zero_count_cache[scan8[idx] - 1];
5089 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5098 return ctx + 4 * cat;
5101 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5102 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5103 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5104 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5105 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5108 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5109 static const int significant_coeff_flag_offset[2][6] = {
5110 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5111 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5113 static const int last_coeff_flag_offset[2][6] = {
5114 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5115 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5117 static const int coeff_abs_level_m1_offset[6] = {
5118 227+0, 227+10, 227+20, 227+30, 227+39, 426
5120 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5121 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5122 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5123 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5124 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5125 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5126 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5127 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5128 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5130 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5131 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5132 * map node ctx => cabac ctx for level=1 */
5133 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5134 /* map node ctx => cabac ctx for level>1 */
5135 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5136 static const uint8_t coeff_abs_level_transition[2][8] = {
5137 /* update node ctx after decoding a level=1 */
5138 { 1, 2, 3, 3, 4, 5, 6, 7 },
5139 /* update node ctx after decoding a level>1 */
5140 { 4, 4, 4, 4, 5, 6, 7, 7 }
5146 int coeff_count = 0;
5149 uint8_t *significant_coeff_ctx_base;
5150 uint8_t *last_coeff_ctx_base;
5151 uint8_t *abs_level_m1_ctx_base;
5154 #define CABAC_ON_STACK
5156 #ifdef CABAC_ON_STACK
5159 cc.range = h->cabac.range;
5160 cc.low = h->cabac.low;
5161 cc.bytestream= h->cabac.bytestream;
5163 #define CC &h->cabac
5167 /* cat: 0-> DC 16x16 n = 0
5168 * 1-> AC 16x16 n = luma4x4idx
5169 * 2-> Luma4x4 n = luma4x4idx
5170 * 3-> DC Chroma n = iCbCr
5171 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5172 * 5-> Luma8x8 n = 4 * luma8x8idx
5175 /* read coded block flag */
5176 if( is_dc || cat != 5 ) {
5177 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5179 h->non_zero_count_cache[scan8[n]] = 0;
5181 #ifdef CABAC_ON_STACK
5182 h->cabac.range = cc.range ;
5183 h->cabac.low = cc.low ;
5184 h->cabac.bytestream= cc.bytestream;
5190 significant_coeff_ctx_base = h->cabac_state
5191 + significant_coeff_flag_offset[MB_FIELD][cat];
5192 last_coeff_ctx_base = h->cabac_state
5193 + last_coeff_flag_offset[MB_FIELD][cat];
5194 abs_level_m1_ctx_base = h->cabac_state
5195 + coeff_abs_level_m1_offset[cat];
5197 if( !is_dc && cat == 5 ) {
5198 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5199 for(last= 0; last < coefs; last++) { \
5200 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5201 if( get_cabac( CC, sig_ctx )) { \
5202 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5203 index[coeff_count++] = last; \
5204 if( get_cabac( CC, last_ctx ) ) { \
5210 if( last == max_coeff -1 ) {\
5211 index[coeff_count++] = last;\
5213 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5214 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5215 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5217 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5219 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5221 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5224 assert(coeff_count > 0);
5228 h->cbp_table[h->mb_xy] |= 0x100;
5230 h->cbp_table[h->mb_xy] |= 0x40 << n;
5233 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5235 assert( cat == 1 || cat == 2 || cat == 4 );
5236 h->non_zero_count_cache[scan8[n]] = coeff_count;
5241 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5243 int j= scantable[index[--coeff_count]];
5245 if( get_cabac( CC, ctx ) == 0 ) {
5246 node_ctx = coeff_abs_level_transition[0][node_ctx];
5248 block[j] = get_cabac_bypass_sign( CC, -1);
5250 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5254 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5255 node_ctx = coeff_abs_level_transition[1][node_ctx];
5257 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5261 if( coeff_abs >= 15 ) {
5263 while( get_cabac_bypass( CC ) ) {
5269 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5275 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5277 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5280 } while( coeff_count );
5281 #ifdef CABAC_ON_STACK
5282 h->cabac.range = cc.range ;
5283 h->cabac.low = cc.low ;
5284 h->cabac.bytestream= cc.bytestream;
5289 #ifndef CONFIG_SMALL
5290 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5291 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5294 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5295 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5299 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5301 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5303 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5304 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5308 static inline void compute_mb_neighbors(H264Context *h)
5310 MpegEncContext * const s = &h->s;
5311 const int mb_xy = h->mb_xy;
5312 h->top_mb_xy = mb_xy - s->mb_stride;
5313 h->left_mb_xy[0] = mb_xy - 1;
5315 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5316 const int top_pair_xy = pair_xy - s->mb_stride;
5317 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5318 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5319 const int curr_mb_frame_flag = !MB_FIELD;
5320 const int bottom = (s->mb_y & 1);
5322 ? !curr_mb_frame_flag // bottom macroblock
5323 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5325 h->top_mb_xy -= s->mb_stride;
5327 if (left_mb_frame_flag != curr_mb_frame_flag) {
5328 h->left_mb_xy[0] = pair_xy - 1;
5330 } else if (FIELD_PICTURE) {
5331 h->top_mb_xy -= s->mb_stride;
5337 * decodes a macroblock
5338 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5340 static int decode_mb_cabac(H264Context *h) {
5341 MpegEncContext * const s = &h->s;
5343 int mb_type, partition_count, cbp = 0;
5344 int dct8x8_allowed= h->pps.transform_8x8_mode;
5346 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5348 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5350 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5351 if( h->slice_type_nos != FF_I_TYPE ) {
5353 /* a skipped mb needs the aff flag from the following mb */
5354 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5355 predict_field_decoding_flag(h);
5356 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5357 skip = h->next_mb_skipped;
5359 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5360 /* read skip flags */
5362 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5363 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5364 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5365 if(h->next_mb_skipped)
5366 predict_field_decoding_flag(h);
5368 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5373 h->cbp_table[mb_xy] = 0;
5374 h->chroma_pred_mode_table[mb_xy] = 0;
5375 h->last_qscale_diff = 0;
5382 if( (s->mb_y&1) == 0 )
5384 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5387 h->prev_mb_skipped = 0;
5389 compute_mb_neighbors(h);
5390 mb_type = decode_cabac_mb_type( h );
5391 assert(mb_type >= 0);
5393 if( h->slice_type_nos == FF_B_TYPE ) {
5395 partition_count= b_mb_type_info[mb_type].partition_count;
5396 mb_type= b_mb_type_info[mb_type].type;
5399 goto decode_intra_mb;
5401 } else if( h->slice_type_nos == FF_P_TYPE ) {
5403 partition_count= p_mb_type_info[mb_type].partition_count;
5404 mb_type= p_mb_type_info[mb_type].type;
5407 goto decode_intra_mb;
5410 if(h->slice_type == FF_SI_TYPE && mb_type)
5412 assert(h->slice_type_nos == FF_I_TYPE);
5414 partition_count = 0;
5415 cbp= i_mb_type_info[mb_type].cbp;
5416 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5417 mb_type= i_mb_type_info[mb_type].type;
5420 mb_type |= MB_TYPE_INTERLACED;
5422 h->slice_table[ mb_xy ]= h->slice_num;
5424 if(IS_INTRA_PCM(mb_type)) {
5427 // We assume these blocks are very rare so we do not optimize it.
5428 // FIXME The two following lines get the bitstream position in the cabac
5429 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5430 ptr= h->cabac.bytestream;
5431 if(h->cabac.low&0x1) ptr--;
5433 if(h->cabac.low&0x1FF) ptr--;
5436 // The pixels are stored in the same order as levels in h->mb array.
5437 memcpy(h->mb, ptr, 256); ptr+=256;
5439 memcpy(h->mb+128, ptr, 128); ptr+=128;
5442 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5444 // All blocks are present
5445 h->cbp_table[mb_xy] = 0x1ef;
5446 h->chroma_pred_mode_table[mb_xy] = 0;
5447 // In deblocking, the quantizer is 0
5448 s->current_picture.qscale_table[mb_xy]= 0;
5449 // All coeffs are present
5450 memset(h->non_zero_count[mb_xy], 16, 16);
5451 s->current_picture.mb_type[mb_xy]= mb_type;
5452 h->last_qscale_diff = 0;
5457 h->ref_count[0] <<= 1;
5458 h->ref_count[1] <<= 1;
5461 fill_caches(h, mb_type, 0);
5463 if( IS_INTRA( mb_type ) ) {
5465 if( IS_INTRA4x4( mb_type ) ) {
5466 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5467 mb_type |= MB_TYPE_8x8DCT;
5468 for( i = 0; i < 16; i+=4 ) {
5469 int pred = pred_intra_mode( h, i );
5470 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5471 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5474 for( i = 0; i < 16; i++ ) {
5475 int pred = pred_intra_mode( h, i );
5476 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5478 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5481 write_back_intra_pred_mode(h);
5482 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5484 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5485 if( h->intra16x16_pred_mode < 0 ) return -1;
5488 h->chroma_pred_mode_table[mb_xy] =
5489 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5491 pred_mode= check_intra_pred_mode( h, pred_mode );
5492 if( pred_mode < 0 ) return -1;
5493 h->chroma_pred_mode= pred_mode;
5495 } else if( partition_count == 4 ) {
5496 int i, j, sub_partition_count[4], list, ref[2][4];
5498 if( h->slice_type_nos == FF_B_TYPE ) {
5499 for( i = 0; i < 4; i++ ) {
5500 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5501 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5502 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5504 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5505 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5506 pred_direct_motion(h, &mb_type);
5507 h->ref_cache[0][scan8[4]] =
5508 h->ref_cache[1][scan8[4]] =
5509 h->ref_cache[0][scan8[12]] =
5510 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5511 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5512 for( i = 0; i < 4; i++ )
5513 if( IS_DIRECT(h->sub_mb_type[i]) )
5514 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5518 for( i = 0; i < 4; i++ ) {
5519 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5520 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5521 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5525 for( list = 0; list < h->list_count; list++ ) {
5526 for( i = 0; i < 4; i++ ) {
5527 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5528 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5529 if( h->ref_count[list] > 1 ){
5530 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5531 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5532 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5540 h->ref_cache[list][ scan8[4*i]+1 ]=
5541 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5546 dct8x8_allowed = get_dct8x8_allowed(h);
5548 for(list=0; list<h->list_count; list++){
5550 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5551 if(IS_DIRECT(h->sub_mb_type[i])){
5552 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5556 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5557 const int sub_mb_type= h->sub_mb_type[i];
5558 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5559 for(j=0; j<sub_partition_count[i]; j++){
5562 const int index= 4*i + block_width*j;
5563 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5564 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5565 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5567 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5568 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5569 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5571 if(IS_SUB_8X8(sub_mb_type)){
5573 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5575 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5578 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5580 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5581 }else if(IS_SUB_8X4(sub_mb_type)){
5582 mv_cache[ 1 ][0]= mx;
5583 mv_cache[ 1 ][1]= my;
5585 mvd_cache[ 1 ][0]= mx - mpx;
5586 mvd_cache[ 1 ][1]= my - mpy;
5587 }else if(IS_SUB_4X8(sub_mb_type)){
5588 mv_cache[ 8 ][0]= mx;
5589 mv_cache[ 8 ][1]= my;
5591 mvd_cache[ 8 ][0]= mx - mpx;
5592 mvd_cache[ 8 ][1]= my - mpy;
5594 mv_cache[ 0 ][0]= mx;
5595 mv_cache[ 0 ][1]= my;
5597 mvd_cache[ 0 ][0]= mx - mpx;
5598 mvd_cache[ 0 ][1]= my - mpy;
5601 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5602 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5603 p[0] = p[1] = p[8] = p[9] = 0;
5604 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5608 } else if( IS_DIRECT(mb_type) ) {
5609 pred_direct_motion(h, &mb_type);
5610 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5611 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5612 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5614 int list, mx, my, i, mpx, mpy;
5615 if(IS_16X16(mb_type)){
5616 for(list=0; list<h->list_count; list++){
5617 if(IS_DIR(mb_type, 0, list)){
5619 if(h->ref_count[list] > 1){
5620 ref= decode_cabac_mb_ref(h, list, 0);
5621 if(ref >= (unsigned)h->ref_count[list]){
5622 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5627 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5629 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5631 for(list=0; list<h->list_count; list++){
5632 if(IS_DIR(mb_type, 0, list)){
5633 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5635 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5636 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5637 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5639 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5640 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5642 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5645 else if(IS_16X8(mb_type)){
5646 for(list=0; list<h->list_count; list++){
5648 if(IS_DIR(mb_type, i, list)){
5650 if(h->ref_count[list] > 1){
5651 ref= decode_cabac_mb_ref( h, list, 8*i );
5652 if(ref >= (unsigned)h->ref_count[list]){
5653 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5658 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5660 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5663 for(list=0; list<h->list_count; list++){
5665 if(IS_DIR(mb_type, i, list)){
5666 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5667 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5668 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5669 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5671 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5672 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5674 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5675 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5680 assert(IS_8X16(mb_type));
5681 for(list=0; list<h->list_count; list++){
5683 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5685 if(h->ref_count[list] > 1){
5686 ref= decode_cabac_mb_ref( h, list, 4*i );
5687 if(ref >= (unsigned)h->ref_count[list]){
5688 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5693 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5695 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5698 for(list=0; list<h->list_count; list++){
5700 if(IS_DIR(mb_type, i, list)){
5701 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5702 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5703 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5705 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5706 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5707 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5709 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5710 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5717 if( IS_INTER( mb_type ) ) {
5718 h->chroma_pred_mode_table[mb_xy] = 0;
5719 write_back_motion( h, mb_type );
5722 if( !IS_INTRA16x16( mb_type ) ) {
5723 cbp = decode_cabac_mb_cbp_luma( h );
5725 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5728 h->cbp_table[mb_xy] = h->cbp = cbp;
5730 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5731 if( decode_cabac_mb_transform_size( h ) )
5732 mb_type |= MB_TYPE_8x8DCT;
5734 s->current_picture.mb_type[mb_xy]= mb_type;
5736 if( cbp || IS_INTRA16x16( mb_type ) ) {
5737 const uint8_t *scan, *scan8x8, *dc_scan;
5738 const uint32_t *qmul;
5741 if(IS_INTERLACED(mb_type)){
5742 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5743 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5744 dc_scan= luma_dc_field_scan;
5746 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5747 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5748 dc_scan= luma_dc_zigzag_scan;
5751 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5752 if( dqp == INT_MIN ){
5753 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5757 if(((unsigned)s->qscale) > 51){
5758 if(s->qscale<0) s->qscale+= 52;
5759 else s->qscale-= 52;
5761 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5762 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5764 if( IS_INTRA16x16( mb_type ) ) {
5766 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5767 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5770 qmul = h->dequant4_coeff[0][s->qscale];
5771 for( i = 0; i < 16; i++ ) {
5772 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5773 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5776 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5780 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5781 if( cbp & (1<<i8x8) ) {
5782 if( IS_8x8DCT(mb_type) ) {
5783 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5784 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5786 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5787 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5788 const int index = 4*i8x8 + i4x4;
5789 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5791 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5792 //STOP_TIMER("decode_residual")
5796 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5797 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5804 for( c = 0; c < 2; c++ ) {
5805 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5806 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5812 for( c = 0; c < 2; c++ ) {
5813 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5814 for( i = 0; i < 4; i++ ) {
5815 const int index = 16 + 4 * c + i;
5816 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5817 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5821 uint8_t * const nnz= &h->non_zero_count_cache[0];
5822 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5823 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5826 uint8_t * const nnz= &h->non_zero_count_cache[0];
5827 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5828 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5829 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5830 h->last_qscale_diff = 0;
5833 s->current_picture.qscale_table[mb_xy]= s->qscale;
5834 write_back_non_zero_count(h);
5837 h->ref_count[0] >>= 1;
5838 h->ref_count[1] >>= 1;
5845 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5847 const int index_a = qp + h->slice_alpha_c0_offset;
5848 const int alpha = (alpha_table+52)[index_a];
5849 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5854 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5855 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5857 /* 16px edge length, because bS=4 is triggered by being at
5858 * the edge of an intra MB, so all 4 bS are the same */
5859 for( d = 0; d < 16; d++ ) {
5860 const int p0 = pix[-1];
5861 const int p1 = pix[-2];
5862 const int p2 = pix[-3];
5864 const int q0 = pix[0];
5865 const int q1 = pix[1];
5866 const int q2 = pix[2];
5868 if( FFABS( p0 - q0 ) < alpha &&
5869 FFABS( p1 - p0 ) < beta &&
5870 FFABS( q1 - q0 ) < beta ) {
5872 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5873 if( FFABS( p2 - p0 ) < beta)
5875 const int p3 = pix[-4];
5877 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5878 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5879 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5882 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5884 if( FFABS( q2 - q0 ) < beta)
5886 const int q3 = pix[3];
5888 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5889 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5890 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5893 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5897 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5898 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5900 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5906 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5908 const int index_a = qp + h->slice_alpha_c0_offset;
5909 const int alpha = (alpha_table+52)[index_a];
5910 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5915 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5916 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5918 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5922 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5924 for( i = 0; i < 16; i++, pix += stride) {
5930 int bS_index = (i >> 1);
5933 bS_index |= (i & 1);
5936 if( bS[bS_index] == 0 ) {
5940 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5941 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5942 alpha = (alpha_table+52)[index_a];
5943 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5945 if( bS[bS_index] < 4 ) {
5946 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5947 const int p0 = pix[-1];
5948 const int p1 = pix[-2];
5949 const int p2 = pix[-3];
5950 const int q0 = pix[0];
5951 const int q1 = pix[1];
5952 const int q2 = pix[2];
5954 if( FFABS( p0 - q0 ) < alpha &&
5955 FFABS( p1 - p0 ) < beta &&
5956 FFABS( q1 - q0 ) < beta ) {
5960 if( FFABS( p2 - p0 ) < beta ) {
5961 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5964 if( FFABS( q2 - q0 ) < beta ) {
5965 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5969 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5970 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5971 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5972 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5975 const int p0 = pix[-1];
5976 const int p1 = pix[-2];
5977 const int p2 = pix[-3];
5979 const int q0 = pix[0];
5980 const int q1 = pix[1];
5981 const int q2 = pix[2];
5983 if( FFABS( p0 - q0 ) < alpha &&
5984 FFABS( p1 - p0 ) < beta &&
5985 FFABS( q1 - q0 ) < beta ) {
5987 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5988 if( FFABS( p2 - p0 ) < beta)
5990 const int p3 = pix[-4];
5992 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5993 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5994 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5997 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5999 if( FFABS( q2 - q0 ) < beta)
6001 const int q3 = pix[3];
6003 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6004 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6005 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6008 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6012 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6013 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6015 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6020 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6022 for( i = 0; i < 8; i++, pix += stride) {
6030 if( bS[bS_index] == 0 ) {
6034 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6035 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6036 alpha = (alpha_table+52)[index_a];
6037 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6039 if( bS[bS_index] < 4 ) {
6040 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6041 const int p0 = pix[-1];
6042 const int p1 = pix[-2];
6043 const int q0 = pix[0];
6044 const int q1 = pix[1];
6046 if( FFABS( p0 - q0 ) < alpha &&
6047 FFABS( p1 - p0 ) < beta &&
6048 FFABS( q1 - q0 ) < beta ) {
6049 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6051 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6052 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6053 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6056 const int p0 = pix[-1];
6057 const int p1 = pix[-2];
6058 const int q0 = pix[0];
6059 const int q1 = pix[1];
6061 if( FFABS( p0 - q0 ) < alpha &&
6062 FFABS( p1 - p0 ) < beta &&
6063 FFABS( q1 - q0 ) < beta ) {
6065 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6066 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6067 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6073 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6075 const int index_a = qp + h->slice_alpha_c0_offset;
6076 const int alpha = (alpha_table+52)[index_a];
6077 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6078 const int pix_next = stride;
6083 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6084 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6086 /* 16px edge length, see filter_mb_edgev */
6087 for( d = 0; d < 16; d++ ) {
6088 const int p0 = pix[-1*pix_next];
6089 const int p1 = pix[-2*pix_next];
6090 const int p2 = pix[-3*pix_next];
6091 const int q0 = pix[0];
6092 const int q1 = pix[1*pix_next];
6093 const int q2 = pix[2*pix_next];
6095 if( FFABS( p0 - q0 ) < alpha &&
6096 FFABS( p1 - p0 ) < beta &&
6097 FFABS( q1 - q0 ) < beta ) {
6099 const int p3 = pix[-4*pix_next];
6100 const int q3 = pix[ 3*pix_next];
6102 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6103 if( FFABS( p2 - p0 ) < beta) {
6105 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6106 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6107 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6110 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6112 if( FFABS( q2 - q0 ) < beta) {
6114 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6115 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6116 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6119 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6123 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6124 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6126 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6133 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6135 const int index_a = qp + h->slice_alpha_c0_offset;
6136 const int alpha = (alpha_table+52)[index_a];
6137 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6142 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6143 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6145 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6149 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6150 MpegEncContext * const s = &h->s;
6151 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6153 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6157 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6158 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6159 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6160 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6161 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6164 assert(!FRAME_MBAFF);
6166 mb_type = s->current_picture.mb_type[mb_xy];
6167 qp = s->current_picture.qscale_table[mb_xy];
6168 qp0 = s->current_picture.qscale_table[mb_xy-1];
6169 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6170 qpc = get_chroma_qp( h, 0, qp );
6171 qpc0 = get_chroma_qp( h, 0, qp0 );
6172 qpc1 = get_chroma_qp( h, 0, qp1 );
6173 qp0 = (qp + qp0 + 1) >> 1;
6174 qp1 = (qp + qp1 + 1) >> 1;
6175 qpc0 = (qpc + qpc0 + 1) >> 1;
6176 qpc1 = (qpc + qpc1 + 1) >> 1;
6177 qp_thresh = 15 - h->slice_alpha_c0_offset;
6178 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6179 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6182 if( IS_INTRA(mb_type) ) {
6183 int16_t bS4[4] = {4,4,4,4};
6184 int16_t bS3[4] = {3,3,3,3};
6185 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6186 if( IS_8x8DCT(mb_type) ) {
6187 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6188 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6189 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6190 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6192 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6193 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6194 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6195 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6196 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6197 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6198 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6199 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6201 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6202 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6203 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6204 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6205 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6206 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6207 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6208 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6211 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6212 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6214 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6216 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6218 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6219 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6220 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6221 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6223 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6224 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6225 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6226 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6228 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6229 bSv[0][0] = 0x0004000400040004ULL;
6230 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6231 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6233 #define FILTER(hv,dir,edge)\
6234 if(bSv[dir][edge]) {\
6235 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6237 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6238 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6244 } else if( IS_8x8DCT(mb_type) ) {
6264 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6265 MpegEncContext * const s = &h->s;
6267 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6268 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6269 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6270 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6271 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6273 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6274 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6275 // how often to recheck mv-based bS when iterating between edges
6276 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6277 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6278 // how often to recheck mv-based bS when iterating along each edge
6279 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6281 if (first_vertical_edge_done) {
6285 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6288 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6289 && !IS_INTERLACED(mb_type)
6290 && IS_INTERLACED(mbm_type)
6292 // This is a special case in the norm where the filtering must
6293 // be done twice (one each of the field) even if we are in a
6294 // frame macroblock.
6296 static const int nnz_idx[4] = {4,5,6,3};
6297 unsigned int tmp_linesize = 2 * linesize;
6298 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6299 int mbn_xy = mb_xy - 2 * s->mb_stride;
6304 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6305 if( IS_INTRA(mb_type) ||
6306 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6307 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6309 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6310 for( i = 0; i < 4; i++ ) {
6311 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6312 mbn_nnz[nnz_idx[i]] != 0 )
6318 // Do not use s->qscale as luma quantizer because it has not the same
6319 // value in IPCM macroblocks.
6320 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6321 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6322 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6323 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6324 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6325 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6326 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6327 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6334 for( edge = start; edge < edges; edge++ ) {
6335 /* mbn_xy: neighbor macroblock */
6336 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6337 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6338 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6342 if( (edge&1) && IS_8x8DCT(mb_type) )
6345 if( IS_INTRA(mb_type) ||
6346 IS_INTRA(mbn_type) ) {
6349 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6350 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6359 bS[0] = bS[1] = bS[2] = bS[3] = value;
6364 if( edge & mask_edge ) {
6365 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6368 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6369 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6372 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6373 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6374 int bn_idx= b_idx - (dir ? 8:1);
6377 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6378 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6379 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6380 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6383 if(h->slice_type_nos == FF_B_TYPE && v){
6385 for( l = 0; !v && l < 2; l++ ) {
6387 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6388 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6389 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6393 bS[0] = bS[1] = bS[2] = bS[3] = v;
6399 for( i = 0; i < 4; i++ ) {
6400 int x = dir == 0 ? edge : i;
6401 int y = dir == 0 ? i : edge;
6402 int b_idx= 8 + 4 + x + 8*y;
6403 int bn_idx= b_idx - (dir ? 8:1);
6405 if( h->non_zero_count_cache[b_idx] |
6406 h->non_zero_count_cache[bn_idx] ) {
6412 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6413 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6414 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6415 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6421 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6423 for( l = 0; l < 2; l++ ) {
6425 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6426 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6427 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6436 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6441 // Do not use s->qscale as luma quantizer because it has not the same
6442 // value in IPCM macroblocks.
6443 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6444 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6445 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6446 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6448 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6449 if( (edge&1) == 0 ) {
6450 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6451 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6452 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6453 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6456 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6457 if( (edge&1) == 0 ) {
6458 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6459 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6460 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6461 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6467 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6468 MpegEncContext * const s = &h->s;
6469 const int mb_xy= mb_x + mb_y*s->mb_stride;
6470 const int mb_type = s->current_picture.mb_type[mb_xy];
6471 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6472 int first_vertical_edge_done = 0;
6475 //for sufficiently low qp, filtering wouldn't do anything
6476 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6478 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6479 int qp = s->current_picture.qscale_table[mb_xy];
6481 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6482 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6487 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6488 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6489 int top_type, left_type[2];
6490 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6491 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6492 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6494 if(IS_8x8DCT(top_type)){
6495 h->non_zero_count_cache[4+8*0]=
6496 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6497 h->non_zero_count_cache[6+8*0]=
6498 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6500 if(IS_8x8DCT(left_type[0])){
6501 h->non_zero_count_cache[3+8*1]=
6502 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6504 if(IS_8x8DCT(left_type[1])){
6505 h->non_zero_count_cache[3+8*3]=
6506 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6509 if(IS_8x8DCT(mb_type)){
6510 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6511 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6513 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6514 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6516 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6517 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6519 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6520 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6525 // left mb is in picture
6526 && h->slice_table[mb_xy-1] != 0xFFFF
6527 // and current and left pair do not have the same interlaced type
6528 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6529 // and left mb is in the same slice if deblocking_filter == 2
6530 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6531 /* First vertical edge is different in MBAFF frames
6532 * There are 8 different bS to compute and 2 different Qp
6534 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6535 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6540 int mb_qp, mbn0_qp, mbn1_qp;
6542 first_vertical_edge_done = 1;
6544 if( IS_INTRA(mb_type) )
6545 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6547 for( i = 0; i < 8; i++ ) {
6548 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6550 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6552 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6553 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6554 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6556 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6563 mb_qp = s->current_picture.qscale_table[mb_xy];
6564 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6565 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6566 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6567 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6568 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6569 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6570 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6571 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6572 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6573 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6574 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6575 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6578 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6579 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6580 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6581 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6582 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6586 for( dir = 0; dir < 2; dir++ )
6587 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6589 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6590 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6594 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6595 H264Context *h = *(void**)arg;
6596 MpegEncContext * const s = &h->s;
6597 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6601 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6602 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6604 if( h->pps.cabac ) {
6608 align_get_bits( &s->gb );
6611 ff_init_cabac_states( &h->cabac);
6612 ff_init_cabac_decoder( &h->cabac,
6613 s->gb.buffer + get_bits_count(&s->gb)/8,
6614 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6615 /* calculate pre-state */
6616 for( i= 0; i < 460; i++ ) {
6618 if( h->slice_type_nos == FF_I_TYPE )
6619 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6621 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6624 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6626 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6631 int ret = decode_mb_cabac(h);
6633 //STOP_TIMER("decode_mb_cabac")
6635 if(ret>=0) hl_decode_mb(h);
6637 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6640 if(ret>=0) ret = decode_mb_cabac(h);
6642 if(ret>=0) hl_decode_mb(h);
6645 eos = get_cabac_terminate( &h->cabac );
6647 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6648 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6649 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6653 if( ++s->mb_x >= s->mb_width ) {
6655 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6657 if(FIELD_OR_MBAFF_PICTURE) {
6662 if( eos || s->mb_y >= s->mb_height ) {
6663 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6664 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6671 int ret = decode_mb_cavlc(h);
6673 if(ret>=0) hl_decode_mb(h);
6675 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6677 ret = decode_mb_cavlc(h);
6679 if(ret>=0) hl_decode_mb(h);
6684 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6685 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6690 if(++s->mb_x >= s->mb_width){
6692 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6694 if(FIELD_OR_MBAFF_PICTURE) {
6697 if(s->mb_y >= s->mb_height){
6698 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6700 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6701 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6705 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6712 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6713 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6714 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6715 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6719 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6728 for(;s->mb_y < s->mb_height; s->mb_y++){
6729 for(;s->mb_x < s->mb_width; s->mb_x++){
6730 int ret= decode_mb(h);
6735 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6736 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6741 if(++s->mb_x >= s->mb_width){
6743 if(++s->mb_y >= s->mb_height){
6744 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6745 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6749 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6756 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6757 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6758 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6762 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6769 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6772 return -1; //not reached
6775 static int decode_picture_timing(H264Context *h){
6776 MpegEncContext * const s = &h->s;
6777 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6778 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6779 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6781 if(h->sps.pic_struct_present_flag){
6782 unsigned int i, num_clock_ts;
6783 h->sei_pic_struct = get_bits(&s->gb, 4);
6785 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6788 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6790 for (i = 0 ; i < num_clock_ts ; i++){
6791 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6792 unsigned int full_timestamp_flag;
6793 skip_bits(&s->gb, 2); /* ct_type */
6794 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6795 skip_bits(&s->gb, 5); /* counting_type */
6796 full_timestamp_flag = get_bits(&s->gb, 1);
6797 skip_bits(&s->gb, 1); /* discontinuity_flag */
6798 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6799 skip_bits(&s->gb, 8); /* n_frames */
6800 if(full_timestamp_flag){
6801 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6802 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6803 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6805 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6806 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6807 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6808 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6809 if(get_bits(&s->gb, 1)) /* hours_flag */
6810 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6814 if(h->sps.time_offset_length > 0)
6815 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6822 static int decode_unregistered_user_data(H264Context *h, int size){
6823 MpegEncContext * const s = &h->s;
6824 uint8_t user_data[16+256];
6830 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6831 user_data[i]= get_bits(&s->gb, 8);
6835 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6836 if(e==1 && build>=0)
6837 h->x264_build= build;
6839 if(s->avctx->debug & FF_DEBUG_BUGS)
6840 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6843 skip_bits(&s->gb, 8);
6848 static int decode_sei(H264Context *h){
6849 MpegEncContext * const s = &h->s;
6851 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6856 type+= show_bits(&s->gb, 8);
6857 }while(get_bits(&s->gb, 8) == 255);
6861 size+= show_bits(&s->gb, 8);
6862 }while(get_bits(&s->gb, 8) == 255);
6865 case 1: // Picture timing SEI
6866 if(decode_picture_timing(h) < 0)
6870 if(decode_unregistered_user_data(h, size) < 0)
6874 skip_bits(&s->gb, 8*size);
6877 //FIXME check bits here
6878 align_get_bits(&s->gb);
6884 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6885 MpegEncContext * const s = &h->s;
6887 cpb_count = get_ue_golomb(&s->gb) + 1;
6889 if(cpb_count > 32U){
6890 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6894 get_bits(&s->gb, 4); /* bit_rate_scale */
6895 get_bits(&s->gb, 4); /* cpb_size_scale */
6896 for(i=0; i<cpb_count; i++){
6897 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6898 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6899 get_bits1(&s->gb); /* cbr_flag */
6901 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6902 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6903 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6904 sps->time_offset_length = get_bits(&s->gb, 5);
6908 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6909 MpegEncContext * const s = &h->s;
6910 int aspect_ratio_info_present_flag;
6911 unsigned int aspect_ratio_idc;
6913 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6915 if( aspect_ratio_info_present_flag ) {
6916 aspect_ratio_idc= get_bits(&s->gb, 8);
6917 if( aspect_ratio_idc == EXTENDED_SAR ) {
6918 sps->sar.num= get_bits(&s->gb, 16);
6919 sps->sar.den= get_bits(&s->gb, 16);
6920 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6921 sps->sar= pixel_aspect[aspect_ratio_idc];
6923 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6930 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6932 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6933 get_bits1(&s->gb); /* overscan_appropriate_flag */
6936 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6937 get_bits(&s->gb, 3); /* video_format */
6938 get_bits1(&s->gb); /* video_full_range_flag */
6939 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6940 get_bits(&s->gb, 8); /* colour_primaries */
6941 get_bits(&s->gb, 8); /* transfer_characteristics */
6942 get_bits(&s->gb, 8); /* matrix_coefficients */
6946 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6947 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6948 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6951 sps->timing_info_present_flag = get_bits1(&s->gb);
6952 if(sps->timing_info_present_flag){
6953 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6954 sps->time_scale = get_bits_long(&s->gb, 32);
6955 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6958 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6959 if(sps->nal_hrd_parameters_present_flag)
6960 if(decode_hrd_parameters(h, sps) < 0)
6962 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6963 if(sps->vcl_hrd_parameters_present_flag)
6964 if(decode_hrd_parameters(h, sps) < 0)
6966 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6967 get_bits1(&s->gb); /* low_delay_hrd_flag */
6968 sps->pic_struct_present_flag = get_bits1(&s->gb);
6970 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6971 if(sps->bitstream_restriction_flag){
6972 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6973 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6974 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6975 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6976 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6977 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6978 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6980 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6981 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6989 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6990 const uint8_t *jvt_list, const uint8_t *fallback_list){
6991 MpegEncContext * const s = &h->s;
6992 int i, last = 8, next = 8;
6993 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6994 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6995 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6997 for(i=0;i<size;i++){
6999 next = (last + get_se_golomb(&s->gb)) & 0xff;
7000 if(!i && !next){ /* matrix not written, we use the preset one */
7001 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7004 last = factors[scan[i]] = next ? next : last;
7008 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7009 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7010 MpegEncContext * const s = &h->s;
7011 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7012 const uint8_t *fallback[4] = {
7013 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7014 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7015 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7016 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7018 if(get_bits1(&s->gb)){
7019 sps->scaling_matrix_present |= is_sps;
7020 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7021 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7022 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7023 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7024 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7025 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7026 if(is_sps || pps->transform_8x8_mode){
7027 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7028 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7033 static inline int decode_seq_parameter_set(H264Context *h){
7034 MpegEncContext * const s = &h->s;
7035 int profile_idc, level_idc;
7036 unsigned int sps_id;
7040 profile_idc= get_bits(&s->gb, 8);
7041 get_bits1(&s->gb); //constraint_set0_flag
7042 get_bits1(&s->gb); //constraint_set1_flag
7043 get_bits1(&s->gb); //constraint_set2_flag
7044 get_bits1(&s->gb); //constraint_set3_flag
7045 get_bits(&s->gb, 4); // reserved
7046 level_idc= get_bits(&s->gb, 8);
7047 sps_id= get_ue_golomb(&s->gb);
7049 if(sps_id >= MAX_SPS_COUNT) {
7050 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7053 sps= av_mallocz(sizeof(SPS));
7057 sps->profile_idc= profile_idc;
7058 sps->level_idc= level_idc;
7060 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7061 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7062 sps->scaling_matrix_present = 0;
7064 if(sps->profile_idc >= 100){ //high profile
7065 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7066 if(sps->chroma_format_idc == 3)
7067 get_bits1(&s->gb); //residual_color_transform_flag
7068 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7069 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7070 sps->transform_bypass = get_bits1(&s->gb);
7071 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7073 sps->chroma_format_idc= 1;
7076 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7077 sps->poc_type= get_ue_golomb(&s->gb);
7079 if(sps->poc_type == 0){ //FIXME #define
7080 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7081 } else if(sps->poc_type == 1){//FIXME #define
7082 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7083 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7084 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7085 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7087 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7088 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7092 for(i=0; i<sps->poc_cycle_length; i++)
7093 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7094 }else if(sps->poc_type != 2){
7095 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7099 sps->ref_frame_count= get_ue_golomb(&s->gb);
7100 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7101 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7104 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7105 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7106 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7107 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7108 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7109 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7113 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7114 if(!sps->frame_mbs_only_flag)
7115 sps->mb_aff= get_bits1(&s->gb);
7119 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7121 #ifndef ALLOW_INTERLACE
7123 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7125 sps->crop= get_bits1(&s->gb);
7127 sps->crop_left = get_ue_golomb(&s->gb);
7128 sps->crop_right = get_ue_golomb(&s->gb);
7129 sps->crop_top = get_ue_golomb(&s->gb);
7130 sps->crop_bottom= get_ue_golomb(&s->gb);
7131 if(sps->crop_left || sps->crop_top){
7132 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7134 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7135 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7141 sps->crop_bottom= 0;
7144 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7145 if( sps->vui_parameters_present_flag )
7146 decode_vui_parameters(h, sps);
7148 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7149 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7150 sps_id, sps->profile_idc, sps->level_idc,
7152 sps->ref_frame_count,
7153 sps->mb_width, sps->mb_height,
7154 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7155 sps->direct_8x8_inference_flag ? "8B8" : "",
7156 sps->crop_left, sps->crop_right,
7157 sps->crop_top, sps->crop_bottom,
7158 sps->vui_parameters_present_flag ? "VUI" : "",
7159 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7162 av_free(h->sps_buffers[sps_id]);
7163 h->sps_buffers[sps_id]= sps;
7171 build_qp_table(PPS *pps, int t, int index)
7174 for(i = 0; i < 52; i++)
7175 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7178 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7179 MpegEncContext * const s = &h->s;
7180 unsigned int pps_id= get_ue_golomb(&s->gb);
7183 if(pps_id >= MAX_PPS_COUNT) {
7184 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7188 pps= av_mallocz(sizeof(PPS));
7191 pps->sps_id= get_ue_golomb(&s->gb);
7192 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7193 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7197 pps->cabac= get_bits1(&s->gb);
7198 pps->pic_order_present= get_bits1(&s->gb);
7199 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7200 if(pps->slice_group_count > 1 ){
7201 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7202 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7203 switch(pps->mb_slice_group_map_type){
7206 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7207 | run_length[ i ] |1 |ue(v) |
7212 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7214 | top_left_mb[ i ] |1 |ue(v) |
7215 | bottom_right_mb[ i ] |1 |ue(v) |
7223 | slice_group_change_direction_flag |1 |u(1) |
7224 | slice_group_change_rate_minus1 |1 |ue(v) |
7229 | slice_group_id_cnt_minus1 |1 |ue(v) |
7230 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7232 | slice_group_id[ i ] |1 |u(v) |
7237 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7238 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7239 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7240 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7244 pps->weighted_pred= get_bits1(&s->gb);
7245 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7246 pps->init_qp= get_se_golomb(&s->gb) + 26;
7247 pps->init_qs= get_se_golomb(&s->gb) + 26;
7248 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7249 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7250 pps->constrained_intra_pred= get_bits1(&s->gb);
7251 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7253 pps->transform_8x8_mode= 0;
7254 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7255 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7256 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7258 if(get_bits_count(&s->gb) < bit_length){
7259 pps->transform_8x8_mode= get_bits1(&s->gb);
7260 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7261 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7263 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7266 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7267 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7268 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7269 h->pps.chroma_qp_diff= 1;
7271 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7272 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7273 pps_id, pps->sps_id,
7274 pps->cabac ? "CABAC" : "CAVLC",
7275 pps->slice_group_count,
7276 pps->ref_count[0], pps->ref_count[1],
7277 pps->weighted_pred ? "weighted" : "",
7278 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7279 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7280 pps->constrained_intra_pred ? "CONSTR" : "",
7281 pps->redundant_pic_cnt_present ? "REDU" : "",
7282 pps->transform_8x8_mode ? "8x8DCT" : ""
7286 av_free(h->pps_buffers[pps_id]);
7287 h->pps_buffers[pps_id]= pps;
7295 * Call decode_slice() for each context.
7297 * @param h h264 master context
7298 * @param context_count number of contexts to execute
7300 static void execute_decode_slices(H264Context *h, int context_count){
7301 MpegEncContext * const s = &h->s;
7302 AVCodecContext * const avctx= s->avctx;
7306 if(context_count == 1) {
7307 decode_slice(avctx, &h);
7309 for(i = 1; i < context_count; i++) {
7310 hx = h->thread_context[i];
7311 hx->s.error_recognition = avctx->error_recognition;
7312 hx->s.error_count = 0;
7315 avctx->execute(avctx, (void *)decode_slice,
7316 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7318 /* pull back stuff from slices to master context */
7319 hx = h->thread_context[context_count - 1];
7320 s->mb_x = hx->s.mb_x;
7321 s->mb_y = hx->s.mb_y;
7322 s->dropable = hx->s.dropable;
7323 s->picture_structure = hx->s.picture_structure;
7324 for(i = 1; i < context_count; i++)
7325 h->s.error_count += h->thread_context[i]->s.error_count;
7330 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7331 MpegEncContext * const s = &h->s;
7332 AVCodecContext * const avctx= s->avctx;
7334 H264Context *hx; ///< thread context
7335 int context_count = 0;
7337 h->max_contexts = avctx->thread_count;
7340 for(i=0; i<50; i++){
7341 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7344 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7345 h->current_slice = 0;
7346 if (!s->first_field)
7347 s->current_picture_ptr= NULL;
7359 if(buf_index >= buf_size) break;
7361 for(i = 0; i < h->nal_length_size; i++)
7362 nalsize = (nalsize << 8) | buf[buf_index++];
7363 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7368 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7373 // start code prefix search
7374 for(; buf_index + 3 < buf_size; buf_index++){
7375 // This should always succeed in the first iteration.
7376 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7380 if(buf_index+3 >= buf_size) break;
7385 hx = h->thread_context[context_count];
7387 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7388 if (ptr==NULL || dst_length < 0){
7391 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7393 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7395 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7396 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7399 if (h->is_avc && (nalsize != consumed)){
7400 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7404 buf_index += consumed;
7406 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7407 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7412 switch(hx->nal_unit_type){
7414 if (h->nal_unit_type != NAL_IDR_SLICE) {
7415 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7418 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7420 init_get_bits(&hx->s.gb, ptr, bit_length);
7422 hx->inter_gb_ptr= &hx->s.gb;
7423 hx->s.data_partitioning = 0;
7425 if((err = decode_slice_header(hx, h)))
7428 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7429 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7430 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7431 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7432 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7433 && avctx->skip_frame < AVDISCARD_ALL)
7437 init_get_bits(&hx->s.gb, ptr, bit_length);
7439 hx->inter_gb_ptr= NULL;
7440 hx->s.data_partitioning = 1;
7442 err = decode_slice_header(hx, h);
7445 init_get_bits(&hx->intra_gb, ptr, bit_length);
7446 hx->intra_gb_ptr= &hx->intra_gb;
7449 init_get_bits(&hx->inter_gb, ptr, bit_length);
7450 hx->inter_gb_ptr= &hx->inter_gb;
7452 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7453 && s->context_initialized
7455 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7456 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7457 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7458 && avctx->skip_frame < AVDISCARD_ALL)
7462 init_get_bits(&s->gb, ptr, bit_length);
7466 init_get_bits(&s->gb, ptr, bit_length);
7467 decode_seq_parameter_set(h);
7469 if(s->flags& CODEC_FLAG_LOW_DELAY)
7472 if(avctx->has_b_frames < 2)
7473 avctx->has_b_frames= !s->low_delay;
7476 init_get_bits(&s->gb, ptr, bit_length);
7478 decode_picture_parameter_set(h, bit_length);
7482 case NAL_END_SEQUENCE:
7483 case NAL_END_STREAM:
7484 case NAL_FILLER_DATA:
7486 case NAL_AUXILIARY_SLICE:
7489 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7492 if(context_count == h->max_contexts) {
7493 execute_decode_slices(h, context_count);
7498 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7500 /* Slice could not be decoded in parallel mode, copy down
7501 * NAL unit stuff to context 0 and restart. Note that
7502 * rbsp_buffer is not transferred, but since we no longer
7503 * run in parallel mode this should not be an issue. */
7504 h->nal_unit_type = hx->nal_unit_type;
7505 h->nal_ref_idc = hx->nal_ref_idc;
7511 execute_decode_slices(h, context_count);
7516 * returns the number of bytes consumed for building the current frame
7518 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7519 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7520 if(pos+10>buf_size) pos=buf_size; // oops ;)
7525 static int decode_frame(AVCodecContext *avctx,
7526 void *data, int *data_size,
7527 const uint8_t *buf, int buf_size)
7529 H264Context *h = avctx->priv_data;
7530 MpegEncContext *s = &h->s;
7531 AVFrame *pict = data;
7534 s->flags= avctx->flags;
7535 s->flags2= avctx->flags2;
7537 /* end of stream, output what is still in the buffers */
7538 if (buf_size == 0) {
7542 //FIXME factorize this with the output code below
7543 out = h->delayed_pic[0];
7545 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7546 if(h->delayed_pic[i]->poc < out->poc){
7547 out = h->delayed_pic[i];
7551 for(i=out_idx; h->delayed_pic[i]; i++)
7552 h->delayed_pic[i] = h->delayed_pic[i+1];
7555 *data_size = sizeof(AVFrame);
7556 *pict= *(AVFrame*)out;
7562 if(h->is_avc && !h->got_avcC) {
7563 int i, cnt, nalsize;
7564 unsigned char *p = avctx->extradata;
7565 if(avctx->extradata_size < 7) {
7566 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7570 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7573 /* sps and pps in the avcC always have length coded with 2 bytes,
7574 so put a fake nal_length_size = 2 while parsing them */
7575 h->nal_length_size = 2;
7576 // Decode sps from avcC
7577 cnt = *(p+5) & 0x1f; // Number of sps
7579 for (i = 0; i < cnt; i++) {
7580 nalsize = AV_RB16(p) + 2;
7581 if(decode_nal_units(h, p, nalsize) < 0) {
7582 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7587 // Decode pps from avcC
7588 cnt = *(p++); // Number of pps
7589 for (i = 0; i < cnt; i++) {
7590 nalsize = AV_RB16(p) + 2;
7591 if(decode_nal_units(h, p, nalsize) != nalsize) {
7592 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7597 // Now store right nal length size, that will be use to parse all other nals
7598 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7599 // Do not reparse avcC
7603 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7604 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7609 buf_index=decode_nal_units(h, buf, buf_size);
7613 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7614 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7615 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7619 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7620 Picture *out = s->current_picture_ptr;
7621 Picture *cur = s->current_picture_ptr;
7622 int i, pics, cross_idr, out_of_order, out_idx;
7626 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7627 s->current_picture_ptr->pict_type= s->pict_type;
7630 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7631 h->prev_poc_msb= h->poc_msb;
7632 h->prev_poc_lsb= h->poc_lsb;
7634 h->prev_frame_num_offset= h->frame_num_offset;
7635 h->prev_frame_num= h->frame_num;
7638 * FIXME: Error handling code does not seem to support interlaced
7639 * when slices span multiple rows
7640 * The ff_er_add_slice calls don't work right for bottom
7641 * fields; they cause massive erroneous error concealing
7642 * Error marking covers both fields (top and bottom).
7643 * This causes a mismatched s->error_count
7644 * and a bad error table. Further, the error count goes to
7645 * INT_MAX when called for bottom field, because mb_y is
7646 * past end by one (callers fault) and resync_mb_y != 0
7647 * causes problems for the first MB line, too.
7654 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7655 /* Wait for second field. */
7659 cur->repeat_pict = 0;
7661 /* Signal interlacing information externally. */
7662 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7663 if(h->sps.pic_struct_present_flag){
7664 switch (h->sei_pic_struct)
7666 case SEI_PIC_STRUCT_FRAME:
7667 cur->interlaced_frame = 0;
7669 case SEI_PIC_STRUCT_TOP_FIELD:
7670 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7671 case SEI_PIC_STRUCT_TOP_BOTTOM:
7672 case SEI_PIC_STRUCT_BOTTOM_TOP:
7673 cur->interlaced_frame = 1;
7675 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7676 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7677 // Signal the possibility of telecined film externally (pic_struct 5,6)
7678 // From these hints, let the applications decide if they apply deinterlacing.
7679 cur->repeat_pict = 1;
7680 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7682 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7683 // Force progressive here, as doubling interlaced frame is a bad idea.
7684 cur->interlaced_frame = 0;
7685 cur->repeat_pict = 2;
7687 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7688 cur->interlaced_frame = 0;
7689 cur->repeat_pict = 4;
7693 /* Derive interlacing flag from used decoding process. */
7694 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7697 if (cur->field_poc[0] != cur->field_poc[1]){
7698 /* Derive top_field_first from field pocs. */
7699 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7701 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7702 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7703 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7704 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7705 cur->top_field_first = 1;
7707 cur->top_field_first = 0;
7709 /* Most likely progressive */
7710 cur->top_field_first = 0;
7714 //FIXME do something with unavailable reference frames
7716 /* Sort B-frames into display order */
7718 if(h->sps.bitstream_restriction_flag
7719 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7720 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7724 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7725 && !h->sps.bitstream_restriction_flag){
7726 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7731 while(h->delayed_pic[pics]) pics++;
7733 assert(pics <= MAX_DELAYED_PIC_COUNT);
7735 h->delayed_pic[pics++] = cur;
7736 if(cur->reference == 0)
7737 cur->reference = DELAYED_PIC_REF;
7739 out = h->delayed_pic[0];
7741 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7742 if(h->delayed_pic[i]->poc < out->poc){
7743 out = h->delayed_pic[i];
7746 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7748 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7750 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7752 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7754 ((!cross_idr && out->poc > h->outputed_poc + 2)
7755 || cur->pict_type == FF_B_TYPE)))
7758 s->avctx->has_b_frames++;
7761 if(out_of_order || pics > s->avctx->has_b_frames){
7762 out->reference &= ~DELAYED_PIC_REF;
7763 for(i=out_idx; h->delayed_pic[i]; i++)
7764 h->delayed_pic[i] = h->delayed_pic[i+1];
7766 if(!out_of_order && pics > s->avctx->has_b_frames){
7767 *data_size = sizeof(AVFrame);
7769 h->outputed_poc = out->poc;
7770 *pict= *(AVFrame*)out;
7772 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7777 assert(pict->data[0] || !*data_size);
7778 ff_print_debug_info(s, pict);
7779 //printf("out %d\n", (int)pict->data[0]);
7782 /* Return the Picture timestamp as the frame number */
7783 /* we subtract 1 because it is added on utils.c */
7784 avctx->frame_number = s->picture_number - 1;
7786 return get_consumed_bytes(s, buf_index, buf_size);
7789 static inline void fill_mb_avail(H264Context *h){
7790 MpegEncContext * const s = &h->s;
7791 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7794 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7795 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7796 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7802 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7803 h->mb_avail[4]= 1; //FIXME move out
7804 h->mb_avail[5]= 0; //FIXME move out
7812 #define SIZE (COUNT*40)
7818 // int int_temp[10000];
7820 AVCodecContext avctx;
7822 dsputil_init(&dsp, &avctx);
7824 init_put_bits(&pb, temp, SIZE);
7825 printf("testing unsigned exp golomb\n");
7826 for(i=0; i<COUNT; i++){
7828 set_ue_golomb(&pb, i);
7829 STOP_TIMER("set_ue_golomb");
7831 flush_put_bits(&pb);
7833 init_get_bits(&gb, temp, 8*SIZE);
7834 for(i=0; i<COUNT; i++){
7837 s= show_bits(&gb, 24);
7840 j= get_ue_golomb(&gb);
7842 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7845 STOP_TIMER("get_ue_golomb");
7849 init_put_bits(&pb, temp, SIZE);
7850 printf("testing signed exp golomb\n");
7851 for(i=0; i<COUNT; i++){
7853 set_se_golomb(&pb, i - COUNT/2);
7854 STOP_TIMER("set_se_golomb");
7856 flush_put_bits(&pb);
7858 init_get_bits(&gb, temp, 8*SIZE);
7859 for(i=0; i<COUNT; i++){
7862 s= show_bits(&gb, 24);
7865 j= get_se_golomb(&gb);
7866 if(j != i - COUNT/2){
7867 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7870 STOP_TIMER("get_se_golomb");
7874 printf("testing 4x4 (I)DCT\n");
7877 uint8_t src[16], ref[16];
7878 uint64_t error= 0, max_error=0;
7880 for(i=0; i<COUNT; i++){
7882 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7883 for(j=0; j<16; j++){
7884 ref[j]= random()%255;
7885 src[j]= random()%255;
7888 h264_diff_dct_c(block, src, ref, 4);
7891 for(j=0; j<16; j++){
7892 // printf("%d ", block[j]);
7893 block[j]= block[j]*4;
7894 if(j&1) block[j]= (block[j]*4 + 2)/5;
7895 if(j&4) block[j]= (block[j]*4 + 2)/5;
7899 s->dsp.h264_idct_add(ref, block, 4);
7900 /* for(j=0; j<16; j++){
7901 printf("%d ", ref[j]);
7905 for(j=0; j<16; j++){
7906 int diff= FFABS(src[j] - ref[j]);
7909 max_error= FFMAX(max_error, diff);
7912 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7913 printf("testing quantizer\n");
7914 for(qp=0; qp<52; qp++){
7916 src1_block[i]= src2_block[i]= random()%255;
7919 printf("Testing NAL layer\n");
7921 uint8_t bitstream[COUNT];
7922 uint8_t nal[COUNT*2];
7924 memset(&h, 0, sizeof(H264Context));
7926 for(i=0; i<COUNT; i++){
7934 for(j=0; j<COUNT; j++){
7935 bitstream[j]= (random() % 255) + 1;
7938 for(j=0; j<zeros; j++){
7939 int pos= random() % COUNT;
7940 while(bitstream[pos] == 0){
7949 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7951 printf("encoding failed\n");
7955 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7959 if(out_length != COUNT){
7960 printf("incorrect length %d %d\n", out_length, COUNT);
7964 if(consumed != nal_length){
7965 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7969 if(memcmp(bitstream, out, COUNT)){
7970 printf("mismatch\n");
7976 printf("Testing RBSP\n");
7984 static av_cold int decode_end(AVCodecContext *avctx)
7986 H264Context *h = avctx->priv_data;
7987 MpegEncContext *s = &h->s;
7990 av_freep(&h->rbsp_buffer[0]);
7991 av_freep(&h->rbsp_buffer[1]);
7992 free_tables(h); //FIXME cleanup init stuff perhaps
7994 for(i = 0; i < MAX_SPS_COUNT; i++)
7995 av_freep(h->sps_buffers + i);
7997 for(i = 0; i < MAX_PPS_COUNT; i++)
7998 av_freep(h->pps_buffers + i);
8002 // memset(h, 0, sizeof(H264Context));
8008 AVCodec h264_decoder = {
8012 sizeof(H264Context),
8017 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8019 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),