2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1591 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1592 int src_x_offset, int src_y_offset,
1593 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1594 MpegEncContext * const s = &h->s;
1595 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1596 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1597 const int luma_xy= (mx&3) + ((my&3)<<2);
1598 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1599 uint8_t * src_cb, * src_cr;
1600 int extra_width= h->emu_edge_width;
1601 int extra_height= h->emu_edge_height;
1603 const int full_mx= mx>>2;
1604 const int full_my= my>>2;
1605 const int pic_width = 16*s->mb_width;
1606 const int pic_height = 16*s->mb_height >> MB_FIELD;
1608 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1611 if(mx&7) extra_width -= 3;
1612 if(my&7) extra_height -= 3;
1614 if( full_mx < 0-extra_width
1615 || full_my < 0-extra_height
1616 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1617 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1618 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1619 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1623 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1625 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1628 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1631 // chroma offset when predicting from a field of opposite parity
1632 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1633 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1635 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1636 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1639 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1640 src_cb= s->edge_emu_buffer;
1642 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1645 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1646 src_cr= s->edge_emu_buffer;
1648 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1651 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1652 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1653 int x_offset, int y_offset,
1654 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1655 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1656 int list0, int list1){
1657 MpegEncContext * const s = &h->s;
1658 qpel_mc_func *qpix_op= qpix_put;
1659 h264_chroma_mc_func chroma_op= chroma_put;
1661 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1662 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1663 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1664 x_offset += 8*s->mb_x;
1665 y_offset += 8*(s->mb_y >> MB_FIELD);
1668 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1669 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1670 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1671 qpix_op, chroma_op);
1674 chroma_op= chroma_avg;
1678 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1679 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1680 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1681 qpix_op, chroma_op);
1685 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1686 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1687 int x_offset, int y_offset,
1688 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1689 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1690 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1691 int list0, int list1){
1692 MpegEncContext * const s = &h->s;
1694 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1695 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1696 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1697 x_offset += 8*s->mb_x;
1698 y_offset += 8*(s->mb_y >> MB_FIELD);
1701 /* don't optimize for luma-only case, since B-frames usually
1702 * use implicit weights => chroma too. */
1703 uint8_t *tmp_cb = s->obmc_scratchpad;
1704 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1705 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1706 int refn0 = h->ref_cache[0][ scan8[n] ];
1707 int refn1 = h->ref_cache[1][ scan8[n] ];
1709 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1710 dest_y, dest_cb, dest_cr,
1711 x_offset, y_offset, qpix_put, chroma_put);
1712 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1713 tmp_y, tmp_cb, tmp_cr,
1714 x_offset, y_offset, qpix_put, chroma_put);
1716 if(h->use_weight == 2){
1717 int weight0 = h->implicit_weight[refn0][refn1];
1718 int weight1 = 64 - weight0;
1719 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1720 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1721 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1723 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1724 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1725 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1726 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1727 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1728 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1729 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1730 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1731 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1734 int list = list1 ? 1 : 0;
1735 int refn = h->ref_cache[list][ scan8[n] ];
1736 Picture *ref= &h->ref_list[list][refn];
1737 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1738 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1739 qpix_put, chroma_put);
1741 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1742 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1743 if(h->use_weight_chroma){
1744 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1745 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1746 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1752 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1753 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1754 int x_offset, int y_offset,
1755 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1756 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1757 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1758 int list0, int list1){
1759 if((h->use_weight==2 && list0 && list1
1760 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1761 || h->use_weight==1)
1762 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1763 x_offset, y_offset, qpix_put, chroma_put,
1764 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1766 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1767 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1770 static inline void prefetch_motion(H264Context *h, int list){
1771 /* fetch pixels for estimated mv 4 macroblocks ahead
1772 * optimized for 64byte cache lines */
1773 MpegEncContext * const s = &h->s;
1774 const int refn = h->ref_cache[list][scan8[0]];
1776 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1777 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1778 uint8_t **src= h->ref_list[list][refn].data;
1779 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1780 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1781 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1782 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1786 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1788 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1789 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1790 MpegEncContext * const s = &h->s;
1791 const int mb_xy= h->mb_xy;
1792 const int mb_type= s->current_picture.mb_type[mb_xy];
1794 assert(IS_INTER(mb_type));
1796 prefetch_motion(h, 0);
1798 if(IS_16X16(mb_type)){
1799 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1800 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1801 &weight_op[0], &weight_avg[0],
1802 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1803 }else if(IS_16X8(mb_type)){
1804 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1805 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1806 &weight_op[1], &weight_avg[1],
1807 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1809 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1810 &weight_op[1], &weight_avg[1],
1811 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1812 }else if(IS_8X16(mb_type)){
1813 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1815 &weight_op[2], &weight_avg[2],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1818 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1819 &weight_op[2], &weight_avg[2],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1824 assert(IS_8X8(mb_type));
1827 const int sub_mb_type= h->sub_mb_type[i];
1829 int x_offset= (i&1)<<2;
1830 int y_offset= (i&2)<<1;
1832 if(IS_SUB_8X8(sub_mb_type)){
1833 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1834 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1835 &weight_op[3], &weight_avg[3],
1836 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1837 }else if(IS_SUB_8X4(sub_mb_type)){
1838 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1840 &weight_op[4], &weight_avg[4],
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1843 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1844 &weight_op[4], &weight_avg[4],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_4X8(sub_mb_type)){
1847 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1849 &weight_op[5], &weight_avg[5],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1852 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1853 &weight_op[5], &weight_avg[5],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1857 assert(IS_SUB_4X4(sub_mb_type));
1859 int sub_x_offset= x_offset + 2*(j&1);
1860 int sub_y_offset= y_offset + (j&2);
1861 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1862 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1863 &weight_op[6], &weight_avg[6],
1864 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1870 prefetch_motion(h, 1);
1873 static av_cold void decode_init_vlc(void){
1874 static int done = 0;
1881 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1882 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1883 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1884 &chroma_dc_coeff_token_len [0], 1, 1,
1885 &chroma_dc_coeff_token_bits[0], 1, 1,
1886 INIT_VLC_USE_NEW_STATIC);
1890 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1891 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1892 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1893 &coeff_token_len [i][0], 1, 1,
1894 &coeff_token_bits[i][0], 1, 1,
1895 INIT_VLC_USE_NEW_STATIC);
1896 offset += coeff_token_vlc_tables_size[i];
1899 * This is a one time safety check to make sure that
1900 * the packed static coeff_token_vlc table sizes
1901 * were initialized correctly.
1903 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1906 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1907 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1908 init_vlc(&chroma_dc_total_zeros_vlc[i],
1909 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1910 &chroma_dc_total_zeros_len [i][0], 1, 1,
1911 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1912 INIT_VLC_USE_NEW_STATIC);
1914 for(i=0; i<15; i++){
1915 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1916 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1917 init_vlc(&total_zeros_vlc[i],
1918 TOTAL_ZEROS_VLC_BITS, 16,
1919 &total_zeros_len [i][0], 1, 1,
1920 &total_zeros_bits[i][0], 1, 1,
1921 INIT_VLC_USE_NEW_STATIC);
1925 run_vlc[i].table = run_vlc_tables[i];
1926 run_vlc[i].table_allocated = run_vlc_tables_size;
1927 init_vlc(&run_vlc[i],
1929 &run_len [i][0], 1, 1,
1930 &run_bits[i][0], 1, 1,
1931 INIT_VLC_USE_NEW_STATIC);
1933 run7_vlc.table = run7_vlc_table,
1934 run7_vlc.table_allocated = run7_vlc_table_size;
1935 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1936 &run_len [6][0], 1, 1,
1937 &run_bits[6][0], 1, 1,
1938 INIT_VLC_USE_NEW_STATIC);
1942 static void free_tables(H264Context *h){
1945 av_freep(&h->intra4x4_pred_mode);
1946 av_freep(&h->chroma_pred_mode_table);
1947 av_freep(&h->cbp_table);
1948 av_freep(&h->mvd_table[0]);
1949 av_freep(&h->mvd_table[1]);
1950 av_freep(&h->direct_table);
1951 av_freep(&h->non_zero_count);
1952 av_freep(&h->slice_table_base);
1953 h->slice_table= NULL;
1955 av_freep(&h->mb2b_xy);
1956 av_freep(&h->mb2b8_xy);
1958 for(i = 0; i < h->s.avctx->thread_count; i++) {
1959 hx = h->thread_context[i];
1961 av_freep(&hx->top_borders[1]);
1962 av_freep(&hx->top_borders[0]);
1963 av_freep(&hx->s.obmc_scratchpad);
1967 static void init_dequant8_coeff_table(H264Context *h){
1969 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1970 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1971 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1973 for(i=0; i<2; i++ ){
1974 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1975 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1979 for(q=0; q<52; q++){
1980 int shift = div6[q];
1983 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1984 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1985 h->pps.scaling_matrix8[i][x]) << shift;
1990 static void init_dequant4_coeff_table(H264Context *h){
1992 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1993 for(i=0; i<6; i++ ){
1994 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1996 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1997 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2004 for(q=0; q<52; q++){
2005 int shift = div6[q] + 2;
2008 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2009 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2010 h->pps.scaling_matrix4[i][x]) << shift;
2015 static void init_dequant_tables(H264Context *h){
2017 init_dequant4_coeff_table(h);
2018 if(h->pps.transform_8x8_mode)
2019 init_dequant8_coeff_table(h);
2020 if(h->sps.transform_bypass){
2023 h->dequant4_coeff[i][0][x] = 1<<6;
2024 if(h->pps.transform_8x8_mode)
2027 h->dequant8_coeff[i][0][x] = 1<<6;
2034 * needs width/height
2036 static int alloc_tables(H264Context *h){
2037 MpegEncContext * const s = &h->s;
2038 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2041 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2043 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2044 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2045 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2047 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2048 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2049 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2050 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2052 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2053 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2055 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2056 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2057 for(y=0; y<s->mb_height; y++){
2058 for(x=0; x<s->mb_width; x++){
2059 const int mb_xy= x + y*s->mb_stride;
2060 const int b_xy = 4*x + 4*y*h->b_stride;
2061 const int b8_xy= 2*x + 2*y*h->b8_stride;
2063 h->mb2b_xy [mb_xy]= b_xy;
2064 h->mb2b8_xy[mb_xy]= b8_xy;
2068 s->obmc_scratchpad = NULL;
2070 if(!h->dequant4_coeff[0])
2071 init_dequant_tables(h);
2080 * Mimic alloc_tables(), but for every context thread.
2082 static void clone_tables(H264Context *dst, H264Context *src){
2083 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2084 dst->non_zero_count = src->non_zero_count;
2085 dst->slice_table = src->slice_table;
2086 dst->cbp_table = src->cbp_table;
2087 dst->mb2b_xy = src->mb2b_xy;
2088 dst->mb2b8_xy = src->mb2b8_xy;
2089 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2090 dst->mvd_table[0] = src->mvd_table[0];
2091 dst->mvd_table[1] = src->mvd_table[1];
2092 dst->direct_table = src->direct_table;
2094 dst->s.obmc_scratchpad = NULL;
2095 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2100 * Allocate buffers which are not shared amongst multiple threads.
2102 static int context_init(H264Context *h){
2103 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2108 return -1; // free_tables will clean up for us
2111 static av_cold void common_init(H264Context *h){
2112 MpegEncContext * const s = &h->s;
2114 s->width = s->avctx->width;
2115 s->height = s->avctx->height;
2116 s->codec_id= s->avctx->codec->id;
2118 ff_h264_pred_init(&h->hpc, s->codec_id);
2120 h->dequant_coeff_pps= -1;
2121 s->unrestricted_mv=1;
2122 s->decode=1; //FIXME
2124 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2125 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2128 static av_cold int decode_init(AVCodecContext *avctx){
2129 H264Context *h= avctx->priv_data;
2130 MpegEncContext * const s = &h->s;
2132 MPV_decode_defaults(s);
2137 s->out_format = FMT_H264;
2138 s->workaround_bugs= avctx->workaround_bugs;
2141 // s->decode_mb= ff_h263_decode_mb;
2142 s->quarter_sample = 1;
2145 if(avctx->codec_id == CODEC_ID_SVQ3)
2146 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2148 avctx->pix_fmt= PIX_FMT_YUV420P;
2152 if(avctx->extradata_size > 0 && avctx->extradata &&
2153 *(char *)avctx->extradata == 1){
2160 h->thread_context[0] = h;
2161 h->outputed_poc = INT_MIN;
2162 h->prev_poc_msb= 1<<16;
2166 static int frame_start(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2170 if(MPV_frame_start(s, s->avctx) < 0)
2172 ff_er_frame_start(s);
2174 * MPV_frame_start uses pict_type to derive key_frame.
2175 * This is incorrect for H.264; IDR markings must be used.
2176 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2177 * See decode_nal_units().
2179 s->current_picture_ptr->key_frame= 0;
2181 assert(s->linesize && s->uvlinesize);
2183 for(i=0; i<16; i++){
2184 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2185 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2188 h->block_offset[16+i]=
2189 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2190 h->block_offset[24+16+i]=
2191 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2194 /* can't be in alloc_tables because linesize isn't known there.
2195 * FIXME: redo bipred weight to not require extra buffer? */
2196 for(i = 0; i < s->avctx->thread_count; i++)
2197 if(!h->thread_context[i]->s.obmc_scratchpad)
2198 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2200 /* some macroblocks will be accessed before they're available */
2201 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2202 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2204 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2206 // We mark the current picture as non-reference after allocating it, so
2207 // that if we break out due to an error it can be released automatically
2208 // in the next MPV_frame_start().
2209 // SVQ3 as well as most other codecs have only last/next/current and thus
2210 // get released even with set reference, besides SVQ3 and others do not
2211 // mark frames as reference later "naturally".
2212 if(s->codec_id != CODEC_ID_SVQ3)
2213 s->current_picture_ptr->reference= 0;
2215 s->current_picture_ptr->field_poc[0]=
2216 s->current_picture_ptr->field_poc[1]= INT_MAX;
2217 assert(s->current_picture_ptr->long_ref==0);
2222 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2223 MpegEncContext * const s = &h->s;
2232 src_cb -= uvlinesize;
2233 src_cr -= uvlinesize;
2235 if(!simple && FRAME_MBAFF){
2237 offset = MB_MBAFF ? 1 : 17;
2238 uvoffset= MB_MBAFF ? 1 : 9;
2240 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2242 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2243 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2244 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2249 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2250 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2251 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2252 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2258 top_idx = MB_MBAFF ? 0 : 1;
2260 step= MB_MBAFF ? 2 : 1;
2263 // There are two lines saved, the line above the the top macroblock of a pair,
2264 // and the line above the bottom macroblock
2265 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2266 for(i=1; i<17 - skiplast; i++){
2267 h->left_border[offset+i*step]= src_y[15+i* linesize];
2270 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2271 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2273 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2274 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2275 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2276 for(i=1; i<9 - skiplast; i++){
2277 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2278 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2280 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2281 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2285 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2286 MpegEncContext * const s = &h->s;
2297 if(!simple && FRAME_MBAFF){
2299 offset = MB_MBAFF ? 1 : 17;
2300 uvoffset= MB_MBAFF ? 1 : 9;
2304 top_idx = MB_MBAFF ? 0 : 1;
2306 step= MB_MBAFF ? 2 : 1;
2309 if(h->deblocking_filter == 2) {
2311 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2312 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2314 deblock_left = (s->mb_x > 0);
2315 deblock_top = (s->mb_y > !!MB_FIELD);
2318 src_y -= linesize + 1;
2319 src_cb -= uvlinesize + 1;
2320 src_cr -= uvlinesize + 1;
2322 #define XCHG(a,b,t,xchg)\
2329 for(i = !deblock_top; i<16; i++){
2330 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2332 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2336 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2337 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2338 if(s->mb_x+1 < s->mb_width){
2339 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2343 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2345 for(i = !deblock_top; i<8; i++){
2346 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2347 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2350 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2353 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2359 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2360 MpegEncContext * const s = &h->s;
2361 const int mb_x= s->mb_x;
2362 const int mb_y= s->mb_y;
2363 const int mb_xy= h->mb_xy;
2364 const int mb_type= s->current_picture.mb_type[mb_xy];
2365 uint8_t *dest_y, *dest_cb, *dest_cr;
2366 int linesize, uvlinesize /*dct_offset*/;
2368 int *block_offset = &h->block_offset[0];
2369 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2370 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2371 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2372 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2374 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2375 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2376 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2378 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2379 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2381 if (!simple && MB_FIELD) {
2382 linesize = h->mb_linesize = s->linesize * 2;
2383 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2384 block_offset = &h->block_offset[24];
2385 if(mb_y&1){ //FIXME move out of this function?
2386 dest_y -= s->linesize*15;
2387 dest_cb-= s->uvlinesize*7;
2388 dest_cr-= s->uvlinesize*7;
2392 for(list=0; list<h->list_count; list++){
2393 if(!USES_LIST(mb_type, list))
2395 if(IS_16X16(mb_type)){
2396 int8_t *ref = &h->ref_cache[list][scan8[0]];
2397 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2399 for(i=0; i<16; i+=4){
2400 int ref = h->ref_cache[list][scan8[i]];
2402 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2408 linesize = h->mb_linesize = s->linesize;
2409 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2410 // dct_offset = s->linesize * 16;
2413 if(transform_bypass){
2415 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2416 }else if(IS_8x8DCT(mb_type)){
2417 idct_dc_add = s->dsp.h264_idct8_dc_add;
2418 idct_add = s->dsp.h264_idct8_add;
2420 idct_dc_add = s->dsp.h264_idct_dc_add;
2421 idct_add = s->dsp.h264_idct_add;
2424 if (!simple && IS_INTRA_PCM(mb_type)) {
2425 for (i=0; i<16; i++) {
2426 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2428 for (i=0; i<8; i++) {
2429 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2430 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2433 if(IS_INTRA(mb_type)){
2434 if(h->deblocking_filter)
2435 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2437 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2438 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2439 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2442 if(IS_INTRA4x4(mb_type)){
2443 if(simple || !s->encoding){
2444 if(IS_8x8DCT(mb_type)){
2445 for(i=0; i<16; i+=4){
2446 uint8_t * const ptr= dest_y + block_offset[i];
2447 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2448 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2449 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2451 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2452 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2453 (h->topright_samples_available<<i)&0x4000, linesize);
2455 if(nnz == 1 && h->mb[i*16])
2456 idct_dc_add(ptr, h->mb + i*16, linesize);
2458 idct_add (ptr, h->mb + i*16, linesize);
2463 for(i=0; i<16; i++){
2464 uint8_t * const ptr= dest_y + block_offset[i];
2465 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2467 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2468 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2472 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2473 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2474 assert(mb_y || linesize <= block_offset[i]);
2475 if(!topright_avail){
2476 tr= ptr[3 - linesize]*0x01010101;
2477 topright= (uint8_t*) &tr;
2479 topright= ptr + 4 - linesize;
2483 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2484 nnz = h->non_zero_count_cache[ scan8[i] ];
2487 if(nnz == 1 && h->mb[i*16])
2488 idct_dc_add(ptr, h->mb + i*16, linesize);
2490 idct_add (ptr, h->mb + i*16, linesize);
2492 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2498 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2500 if(!transform_bypass)
2501 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2503 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2505 if(h->deblocking_filter)
2506 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2508 hl_motion(h, dest_y, dest_cb, dest_cr,
2509 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2510 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2511 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2515 if(!IS_INTRA4x4(mb_type)){
2517 if(IS_INTRA16x16(mb_type)){
2518 if(transform_bypass && h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2519 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2521 for(i=0; i<16; i++){
2522 if(h->non_zero_count_cache[ scan8[i] ])
2523 idct_add (dest_y + block_offset[i], h->mb + i*16, linesize);
2524 else if(h->mb[i*16])
2525 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2528 }else if(h->cbp&15){
2529 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2530 for(i=0; i<16; i+=di){
2531 int nnz = h->non_zero_count_cache[ scan8[i] ];
2533 if(nnz==1 && h->mb[i*16])
2534 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2536 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2541 for(i=0; i<16; i++){
2542 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2543 uint8_t * const ptr= dest_y + block_offset[i];
2544 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2550 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2551 uint8_t *dest[2] = {dest_cb, dest_cr};
2552 if(transform_bypass){
2553 idct_add = idct_dc_add = s->dsp.add_pixels4;
2555 idct_add = s->dsp.h264_idct_add;
2556 idct_dc_add = s->dsp.h264_idct_dc_add;
2557 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2558 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2561 if(transform_bypass && IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2562 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2563 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2565 for(i=16; i<16+8; i++){
2566 if(h->non_zero_count_cache[ scan8[i] ])
2567 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2568 else if(h->mb[i*16])
2569 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2573 for(i=16; i<16+8; i++){
2574 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2575 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2576 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2582 if(h->deblocking_filter) {
2583 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2584 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2585 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2586 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2587 if (!simple && FRAME_MBAFF) {
2588 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2590 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2596 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2598 static void hl_decode_mb_simple(H264Context *h){
2599 hl_decode_mb_internal(h, 1);
2603 * Process a macroblock; this handles edge cases, such as interlacing.
2605 static void av_noinline hl_decode_mb_complex(H264Context *h){
2606 hl_decode_mb_internal(h, 0);
2609 static void hl_decode_mb(H264Context *h){
2610 MpegEncContext * const s = &h->s;
2611 const int mb_xy= h->mb_xy;
2612 const int mb_type= s->current_picture.mb_type[mb_xy];
2613 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2615 if(ENABLE_H264_ENCODER && !s->decode)
2619 hl_decode_mb_complex(h);
2620 else hl_decode_mb_simple(h);
2623 static void pic_as_field(Picture *pic, const int parity){
2625 for (i = 0; i < 4; ++i) {
2626 if (parity == PICT_BOTTOM_FIELD)
2627 pic->data[i] += pic->linesize[i];
2628 pic->reference = parity;
2629 pic->linesize[i] *= 2;
2631 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2634 static int split_field_copy(Picture *dest, Picture *src,
2635 int parity, int id_add){
2636 int match = !!(src->reference & parity);
2640 if(parity != PICT_FRAME){
2641 pic_as_field(dest, parity);
2643 dest->pic_id += id_add;
2650 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2654 while(i[0]<len || i[1]<len){
2655 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2657 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2660 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2661 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2664 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2665 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2672 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2677 best_poc= dir ? INT_MIN : INT_MAX;
2679 for(i=0; i<len; i++){
2680 const int poc= src[i]->poc;
2681 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2683 sorted[out_i]= src[i];
2686 if(best_poc == (dir ? INT_MIN : INT_MAX))
2688 limit= sorted[out_i++]->poc - dir;
2694 * fills the default_ref_list.
2696 static int fill_default_ref_list(H264Context *h){
2697 MpegEncContext * const s = &h->s;
2700 if(h->slice_type_nos==FF_B_TYPE){
2701 Picture *sorted[32];
2706 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2708 cur_poc= s->current_picture_ptr->poc;
2710 for(list= 0; list<2; list++){
2711 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2712 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2714 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2715 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2718 if(len < h->ref_count[list])
2719 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2723 if(lens[0] == lens[1] && lens[1] > 1){
2724 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2726 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2729 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2730 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2732 if(len < h->ref_count[0])
2733 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2736 for (i=0; i<h->ref_count[0]; i++) {
2737 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2739 if(h->slice_type_nos==FF_B_TYPE){
2740 for (i=0; i<h->ref_count[1]; i++) {
2741 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2748 static void print_short_term(H264Context *h);
2749 static void print_long_term(H264Context *h);
2752 * Extract structure information about the picture described by pic_num in
2753 * the current decoding context (frame or field). Note that pic_num is
2754 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2755 * @param pic_num picture number for which to extract structure information
2756 * @param structure one of PICT_XXX describing structure of picture
2758 * @return frame number (short term) or long term index of picture
2759 * described by pic_num
2761 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2762 MpegEncContext * const s = &h->s;
2764 *structure = s->picture_structure;
2767 /* opposite field */
2768 *structure ^= PICT_FRAME;
2775 static int decode_ref_pic_list_reordering(H264Context *h){
2776 MpegEncContext * const s = &h->s;
2777 int list, index, pic_structure;
2779 print_short_term(h);
2782 for(list=0; list<h->list_count; list++){
2783 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2785 if(get_bits1(&s->gb)){
2786 int pred= h->curr_pic_num;
2788 for(index=0; ; index++){
2789 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2790 unsigned int pic_id;
2792 Picture *ref = NULL;
2794 if(reordering_of_pic_nums_idc==3)
2797 if(index >= h->ref_count[list]){
2798 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2802 if(reordering_of_pic_nums_idc<3){
2803 if(reordering_of_pic_nums_idc<2){
2804 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2807 if(abs_diff_pic_num > h->max_pic_num){
2808 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2812 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2813 else pred+= abs_diff_pic_num;
2814 pred &= h->max_pic_num - 1;
2816 frame_num = pic_num_extract(h, pred, &pic_structure);
2818 for(i= h->short_ref_count-1; i>=0; i--){
2819 ref = h->short_ref[i];
2820 assert(ref->reference);
2821 assert(!ref->long_ref);
2823 ref->frame_num == frame_num &&
2824 (ref->reference & pic_structure)
2832 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2834 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2837 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2840 ref = h->long_ref[long_idx];
2841 assert(!(ref && !ref->reference));
2842 if(ref && (ref->reference & pic_structure)){
2843 ref->pic_id= pic_id;
2844 assert(ref->long_ref);
2852 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2853 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2855 for(i=index; i+1<h->ref_count[list]; i++){
2856 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2859 for(; i > index; i--){
2860 h->ref_list[list][i]= h->ref_list[list][i-1];
2862 h->ref_list[list][index]= *ref;
2864 pic_as_field(&h->ref_list[list][index], pic_structure);
2868 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2874 for(list=0; list<h->list_count; list++){
2875 for(index= 0; index < h->ref_count[list]; index++){
2876 if(!h->ref_list[list][index].data[0]){
2877 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2878 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2886 static void fill_mbaff_ref_list(H264Context *h){
2888 for(list=0; list<2; list++){ //FIXME try list_count
2889 for(i=0; i<h->ref_count[list]; i++){
2890 Picture *frame = &h->ref_list[list][i];
2891 Picture *field = &h->ref_list[list][16+2*i];
2894 field[0].linesize[j] <<= 1;
2895 field[0].reference = PICT_TOP_FIELD;
2896 field[0].poc= field[0].field_poc[0];
2897 field[1] = field[0];
2899 field[1].data[j] += frame->linesize[j];
2900 field[1].reference = PICT_BOTTOM_FIELD;
2901 field[1].poc= field[1].field_poc[1];
2903 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2904 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2906 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2907 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2911 for(j=0; j<h->ref_count[1]; j++){
2912 for(i=0; i<h->ref_count[0]; i++)
2913 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2914 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2915 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2919 static int pred_weight_table(H264Context *h){
2920 MpegEncContext * const s = &h->s;
2922 int luma_def, chroma_def;
2925 h->use_weight_chroma= 0;
2926 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2927 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2928 luma_def = 1<<h->luma_log2_weight_denom;
2929 chroma_def = 1<<h->chroma_log2_weight_denom;
2931 for(list=0; list<2; list++){
2932 for(i=0; i<h->ref_count[list]; i++){
2933 int luma_weight_flag, chroma_weight_flag;
2935 luma_weight_flag= get_bits1(&s->gb);
2936 if(luma_weight_flag){
2937 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2938 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2939 if( h->luma_weight[list][i] != luma_def
2940 || h->luma_offset[list][i] != 0)
2943 h->luma_weight[list][i]= luma_def;
2944 h->luma_offset[list][i]= 0;
2948 chroma_weight_flag= get_bits1(&s->gb);
2949 if(chroma_weight_flag){
2952 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2953 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2954 if( h->chroma_weight[list][i][j] != chroma_def
2955 || h->chroma_offset[list][i][j] != 0)
2956 h->use_weight_chroma= 1;
2961 h->chroma_weight[list][i][j]= chroma_def;
2962 h->chroma_offset[list][i][j]= 0;
2967 if(h->slice_type_nos != FF_B_TYPE) break;
2969 h->use_weight= h->use_weight || h->use_weight_chroma;
2973 static void implicit_weight_table(H264Context *h){
2974 MpegEncContext * const s = &h->s;
2976 int cur_poc = s->current_picture_ptr->poc;
2978 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2979 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2981 h->use_weight_chroma= 0;
2986 h->use_weight_chroma= 2;
2987 h->luma_log2_weight_denom= 5;
2988 h->chroma_log2_weight_denom= 5;
2990 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
2991 int poc0 = h->ref_list[0][ref0].poc;
2992 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
2993 int poc1 = h->ref_list[1][ref1].poc;
2994 int td = av_clip(poc1 - poc0, -128, 127);
2996 int tb = av_clip(cur_poc - poc0, -128, 127);
2997 int tx = (16384 + (FFABS(td) >> 1)) / td;
2998 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
2999 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3000 h->implicit_weight[ref0][ref1] = 32;
3002 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3004 h->implicit_weight[ref0][ref1] = 32;
3010 * Mark a picture as no longer needed for reference. The refmask
3011 * argument allows unreferencing of individual fields or the whole frame.
3012 * If the picture becomes entirely unreferenced, but is being held for
3013 * display purposes, it is marked as such.
3014 * @param refmask mask of fields to unreference; the mask is bitwise
3015 * anded with the reference marking of pic
3016 * @return non-zero if pic becomes entirely unreferenced (except possibly
3017 * for display purposes) zero if one of the fields remains in
3020 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3022 if (pic->reference &= refmask) {
3025 for(i = 0; h->delayed_pic[i]; i++)
3026 if(pic == h->delayed_pic[i]){
3027 pic->reference=DELAYED_PIC_REF;
3035 * instantaneous decoder refresh.
3037 static void idr(H264Context *h){
3040 for(i=0; i<16; i++){
3041 remove_long(h, i, 0);
3043 assert(h->long_ref_count==0);
3045 for(i=0; i<h->short_ref_count; i++){
3046 unreference_pic(h, h->short_ref[i], 0);
3047 h->short_ref[i]= NULL;
3049 h->short_ref_count=0;
3050 h->prev_frame_num= 0;
3051 h->prev_frame_num_offset= 0;
3056 /* forget old pics after a seek */
3057 static void flush_dpb(AVCodecContext *avctx){
3058 H264Context *h= avctx->priv_data;
3060 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3061 if(h->delayed_pic[i])
3062 h->delayed_pic[i]->reference= 0;
3063 h->delayed_pic[i]= NULL;
3065 h->outputed_poc= INT_MIN;
3067 if(h->s.current_picture_ptr)
3068 h->s.current_picture_ptr->reference= 0;
3069 h->s.first_field= 0;
3070 ff_mpeg_flush(avctx);
3074 * Find a Picture in the short term reference list by frame number.
3075 * @param frame_num frame number to search for
3076 * @param idx the index into h->short_ref where returned picture is found
3077 * undefined if no picture found.
3078 * @return pointer to the found picture, or NULL if no pic with the provided
3079 * frame number is found
3081 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3082 MpegEncContext * const s = &h->s;
3085 for(i=0; i<h->short_ref_count; i++){
3086 Picture *pic= h->short_ref[i];
3087 if(s->avctx->debug&FF_DEBUG_MMCO)
3088 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3089 if(pic->frame_num == frame_num) {
3098 * Remove a picture from the short term reference list by its index in
3099 * that list. This does no checking on the provided index; it is assumed
3100 * to be valid. Other list entries are shifted down.
3101 * @param i index into h->short_ref of picture to remove.
3103 static void remove_short_at_index(H264Context *h, int i){
3104 assert(i >= 0 && i < h->short_ref_count);
3105 h->short_ref[i]= NULL;
3106 if (--h->short_ref_count)
3107 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3112 * @return the removed picture or NULL if an error occurs
3114 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3115 MpegEncContext * const s = &h->s;
3119 if(s->avctx->debug&FF_DEBUG_MMCO)
3120 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3122 pic = find_short(h, frame_num, &i);
3124 if(unreference_pic(h, pic, ref_mask))
3125 remove_short_at_index(h, i);
3132 * Remove a picture from the long term reference list by its index in
3134 * @return the removed picture or NULL if an error occurs
3136 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3139 pic= h->long_ref[i];
3141 if(unreference_pic(h, pic, ref_mask)){
3142 assert(h->long_ref[i]->long_ref == 1);
3143 h->long_ref[i]->long_ref= 0;
3144 h->long_ref[i]= NULL;
3145 h->long_ref_count--;
3153 * print short term list
3155 static void print_short_term(H264Context *h) {
3157 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3158 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3159 for(i=0; i<h->short_ref_count; i++){
3160 Picture *pic= h->short_ref[i];
3161 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3167 * print long term list
3169 static void print_long_term(H264Context *h) {
3171 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3172 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3173 for(i = 0; i < 16; i++){
3174 Picture *pic= h->long_ref[i];
3176 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3183 * Executes the reference picture marking (memory management control operations).
3185 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3186 MpegEncContext * const s = &h->s;
3188 int current_ref_assigned=0;
3191 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3192 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3194 for(i=0; i<mmco_count; i++){
3195 int structure, frame_num;
3196 if(s->avctx->debug&FF_DEBUG_MMCO)
3197 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3199 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3200 || mmco[i].opcode == MMCO_SHORT2LONG){
3201 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3202 pic = find_short(h, frame_num, &j);
3204 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3205 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3206 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3211 switch(mmco[i].opcode){
3212 case MMCO_SHORT2UNUSED:
3213 if(s->avctx->debug&FF_DEBUG_MMCO)
3214 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3215 remove_short(h, frame_num, structure ^ PICT_FRAME);
3217 case MMCO_SHORT2LONG:
3218 if (h->long_ref[mmco[i].long_arg] != pic)
3219 remove_long(h, mmco[i].long_arg, 0);
3221 remove_short_at_index(h, j);
3222 h->long_ref[ mmco[i].long_arg ]= pic;
3223 if (h->long_ref[ mmco[i].long_arg ]){
3224 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3225 h->long_ref_count++;
3228 case MMCO_LONG2UNUSED:
3229 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3230 pic = h->long_ref[j];
3232 remove_long(h, j, structure ^ PICT_FRAME);
3233 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3234 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3237 // Comment below left from previous code as it is an interresting note.
3238 /* First field in pair is in short term list or
3239 * at a different long term index.
3240 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3241 * Report the problem and keep the pair where it is,
3242 * and mark this field valid.
3245 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3246 remove_long(h, mmco[i].long_arg, 0);
3248 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3249 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3250 h->long_ref_count++;
3253 s->current_picture_ptr->reference |= s->picture_structure;
3254 current_ref_assigned=1;
3256 case MMCO_SET_MAX_LONG:
3257 assert(mmco[i].long_arg <= 16);
3258 // just remove the long term which index is greater than new max
3259 for(j = mmco[i].long_arg; j<16; j++){
3260 remove_long(h, j, 0);
3264 while(h->short_ref_count){
3265 remove_short(h, h->short_ref[0]->frame_num, 0);
3267 for(j = 0; j < 16; j++) {
3268 remove_long(h, j, 0);
3270 s->current_picture_ptr->poc=
3271 s->current_picture_ptr->field_poc[0]=
3272 s->current_picture_ptr->field_poc[1]=
3276 s->current_picture_ptr->frame_num= 0;
3282 if (!current_ref_assigned) {
3283 /* Second field of complementary field pair; the first field of
3284 * which is already referenced. If short referenced, it
3285 * should be first entry in short_ref. If not, it must exist
3286 * in long_ref; trying to put it on the short list here is an
3287 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3289 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3290 /* Just mark the second field valid */
3291 s->current_picture_ptr->reference = PICT_FRAME;
3292 } else if (s->current_picture_ptr->long_ref) {
3293 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3294 "assignment for second field "
3295 "in complementary field pair "
3296 "(first field is long term)\n");
3298 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3300 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3303 if(h->short_ref_count)
3304 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3306 h->short_ref[0]= s->current_picture_ptr;
3307 h->short_ref_count++;
3308 s->current_picture_ptr->reference |= s->picture_structure;
3312 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3314 /* We have too many reference frames, probably due to corrupted
3315 * stream. Need to discard one frame. Prevents overrun of the
3316 * short_ref and long_ref buffers.
3318 av_log(h->s.avctx, AV_LOG_ERROR,
3319 "number of reference frames exceeds max (probably "
3320 "corrupt input), discarding one\n");
3322 if (h->long_ref_count && !h->short_ref_count) {
3323 for (i = 0; i < 16; ++i)
3328 remove_long(h, i, 0);
3330 pic = h->short_ref[h->short_ref_count - 1];
3331 remove_short(h, pic->frame_num, 0);
3335 print_short_term(h);
3340 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3341 MpegEncContext * const s = &h->s;
3345 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3346 s->broken_link= get_bits1(gb) -1;
3348 h->mmco[0].opcode= MMCO_LONG;
3349 h->mmco[0].long_arg= 0;
3353 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3354 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3355 MMCOOpcode opcode= get_ue_golomb(gb);
3357 h->mmco[i].opcode= opcode;
3358 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3359 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3360 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3361 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3365 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3366 unsigned int long_arg= get_ue_golomb(gb);
3367 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3368 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3371 h->mmco[i].long_arg= long_arg;
3374 if(opcode > (unsigned)MMCO_LONG){
3375 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3378 if(opcode == MMCO_END)
3383 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3385 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3386 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3387 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3388 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3390 if (FIELD_PICTURE) {
3391 h->mmco[0].short_pic_num *= 2;
3392 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3393 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3403 static int init_poc(H264Context *h){
3404 MpegEncContext * const s = &h->s;
3405 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3407 Picture *cur = s->current_picture_ptr;
3409 h->frame_num_offset= h->prev_frame_num_offset;
3410 if(h->frame_num < h->prev_frame_num)
3411 h->frame_num_offset += max_frame_num;
3413 if(h->sps.poc_type==0){
3414 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3416 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3417 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3418 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3419 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3421 h->poc_msb = h->prev_poc_msb;
3422 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3424 field_poc[1] = h->poc_msb + h->poc_lsb;
3425 if(s->picture_structure == PICT_FRAME)
3426 field_poc[1] += h->delta_poc_bottom;
3427 }else if(h->sps.poc_type==1){
3428 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3431 if(h->sps.poc_cycle_length != 0)
3432 abs_frame_num = h->frame_num_offset + h->frame_num;
3436 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3439 expected_delta_per_poc_cycle = 0;
3440 for(i=0; i < h->sps.poc_cycle_length; i++)
3441 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3443 if(abs_frame_num > 0){
3444 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3445 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3447 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3448 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3449 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3453 if(h->nal_ref_idc == 0)
3454 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3456 field_poc[0] = expectedpoc + h->delta_poc[0];
3457 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3459 if(s->picture_structure == PICT_FRAME)
3460 field_poc[1] += h->delta_poc[1];
3462 int poc= 2*(h->frame_num_offset + h->frame_num);
3471 if(s->picture_structure != PICT_BOTTOM_FIELD)
3472 s->current_picture_ptr->field_poc[0]= field_poc[0];
3473 if(s->picture_structure != PICT_TOP_FIELD)
3474 s->current_picture_ptr->field_poc[1]= field_poc[1];
3475 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3482 * initialize scan tables
3484 static void init_scan_tables(H264Context *h){
3485 MpegEncContext * const s = &h->s;
3487 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3488 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3489 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3491 for(i=0; i<16; i++){
3492 #define T(x) (x>>2) | ((x<<2) & 0xF)
3493 h->zigzag_scan[i] = T(zigzag_scan[i]);
3494 h-> field_scan[i] = T( field_scan[i]);
3498 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3499 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3500 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3501 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3502 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3504 for(i=0; i<64; i++){
3505 #define T(x) (x>>3) | ((x&7)<<3)
3506 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3507 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3508 h->field_scan8x8[i] = T(field_scan8x8[i]);
3509 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3513 if(h->sps.transform_bypass){ //FIXME same ugly
3514 h->zigzag_scan_q0 = zigzag_scan;
3515 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3516 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3517 h->field_scan_q0 = field_scan;
3518 h->field_scan8x8_q0 = field_scan8x8;
3519 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3521 h->zigzag_scan_q0 = h->zigzag_scan;
3522 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3523 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3524 h->field_scan_q0 = h->field_scan;
3525 h->field_scan8x8_q0 = h->field_scan8x8;
3526 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3531 * Replicates H264 "master" context to thread contexts.
3533 static void clone_slice(H264Context *dst, H264Context *src)
3535 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3536 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3537 dst->s.current_picture = src->s.current_picture;
3538 dst->s.linesize = src->s.linesize;
3539 dst->s.uvlinesize = src->s.uvlinesize;
3540 dst->s.first_field = src->s.first_field;
3542 dst->prev_poc_msb = src->prev_poc_msb;
3543 dst->prev_poc_lsb = src->prev_poc_lsb;
3544 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3545 dst->prev_frame_num = src->prev_frame_num;
3546 dst->short_ref_count = src->short_ref_count;
3548 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3549 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3550 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3551 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3553 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3554 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3558 * decodes a slice header.
3559 * This will also call MPV_common_init() and frame_start() as needed.
3561 * @param h h264context
3562 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3564 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3566 static int decode_slice_header(H264Context *h, H264Context *h0){
3567 MpegEncContext * const s = &h->s;
3568 MpegEncContext * const s0 = &h0->s;
3569 unsigned int first_mb_in_slice;
3570 unsigned int pps_id;
3571 int num_ref_idx_active_override_flag;
3572 unsigned int slice_type, tmp, i, j;
3573 int default_ref_list_done = 0;
3574 int last_pic_structure;
3576 s->dropable= h->nal_ref_idc == 0;
3578 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3579 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3580 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3582 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3583 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3586 first_mb_in_slice= get_ue_golomb(&s->gb);
3588 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3589 h0->current_slice = 0;
3590 if (!s0->first_field)
3591 s->current_picture_ptr= NULL;
3594 slice_type= get_ue_golomb(&s->gb);
3596 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3601 h->slice_type_fixed=1;
3603 h->slice_type_fixed=0;
3605 slice_type= golomb_to_pict_type[ slice_type ];
3606 if (slice_type == FF_I_TYPE
3607 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3608 default_ref_list_done = 1;
3610 h->slice_type= slice_type;
3611 h->slice_type_nos= slice_type & 3;
3613 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3614 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3615 av_log(h->s.avctx, AV_LOG_ERROR,
3616 "B picture before any references, skipping\n");
3620 pps_id= get_ue_golomb(&s->gb);
3621 if(pps_id>=MAX_PPS_COUNT){
3622 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3625 if(!h0->pps_buffers[pps_id]) {
3626 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3629 h->pps= *h0->pps_buffers[pps_id];
3631 if(!h0->sps_buffers[h->pps.sps_id]) {
3632 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3635 h->sps = *h0->sps_buffers[h->pps.sps_id];
3637 if(h == h0 && h->dequant_coeff_pps != pps_id){
3638 h->dequant_coeff_pps = pps_id;
3639 init_dequant_tables(h);
3642 s->mb_width= h->sps.mb_width;
3643 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3645 h->b_stride= s->mb_width*4;
3646 h->b8_stride= s->mb_width*2;
3648 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3649 if(h->sps.frame_mbs_only_flag)
3650 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3652 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3654 if (s->context_initialized
3655 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3657 return -1; // width / height changed during parallelized decoding
3659 flush_dpb(s->avctx);
3662 if (!s->context_initialized) {
3664 return -1; // we cant (re-)initialize context during parallel decoding
3665 if (MPV_common_init(s) < 0)
3669 init_scan_tables(h);
3672 for(i = 1; i < s->avctx->thread_count; i++) {
3674 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3675 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3676 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3679 init_scan_tables(c);
3683 for(i = 0; i < s->avctx->thread_count; i++)
3684 if(context_init(h->thread_context[i]) < 0)
3687 s->avctx->width = s->width;
3688 s->avctx->height = s->height;
3689 s->avctx->sample_aspect_ratio= h->sps.sar;
3690 if(!s->avctx->sample_aspect_ratio.den)
3691 s->avctx->sample_aspect_ratio.den = 1;
3693 if(h->sps.timing_info_present_flag){
3694 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3695 if(h->x264_build > 0 && h->x264_build < 44)
3696 s->avctx->time_base.den *= 2;
3697 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3698 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3702 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3705 h->mb_aff_frame = 0;
3706 last_pic_structure = s0->picture_structure;
3707 if(h->sps.frame_mbs_only_flag){
3708 s->picture_structure= PICT_FRAME;
3710 if(get_bits1(&s->gb)) { //field_pic_flag
3711 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3713 s->picture_structure= PICT_FRAME;
3714 h->mb_aff_frame = h->sps.mb_aff;
3717 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3719 if(h0->current_slice == 0){
3720 while(h->frame_num != h->prev_frame_num &&
3721 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3722 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3724 h->prev_frame_num++;
3725 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3726 s->current_picture_ptr->frame_num= h->prev_frame_num;
3727 execute_ref_pic_marking(h, NULL, 0);
3730 /* See if we have a decoded first field looking for a pair... */
3731 if (s0->first_field) {
3732 assert(s0->current_picture_ptr);
3733 assert(s0->current_picture_ptr->data[0]);
3734 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3736 /* figure out if we have a complementary field pair */
3737 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3739 * Previous field is unmatched. Don't display it, but let it
3740 * remain for reference if marked as such.
3742 s0->current_picture_ptr = NULL;
3743 s0->first_field = FIELD_PICTURE;
3746 if (h->nal_ref_idc &&
3747 s0->current_picture_ptr->reference &&
3748 s0->current_picture_ptr->frame_num != h->frame_num) {
3750 * This and previous field were reference, but had
3751 * different frame_nums. Consider this field first in
3752 * pair. Throw away previous field except for reference
3755 s0->first_field = 1;
3756 s0->current_picture_ptr = NULL;
3759 /* Second field in complementary pair */
3760 s0->first_field = 0;
3765 /* Frame or first field in a potentially complementary pair */
3766 assert(!s0->current_picture_ptr);
3767 s0->first_field = FIELD_PICTURE;
3770 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3771 s0->first_field = 0;
3778 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3780 assert(s->mb_num == s->mb_width * s->mb_height);
3781 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3782 first_mb_in_slice >= s->mb_num){
3783 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3786 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3787 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3788 if (s->picture_structure == PICT_BOTTOM_FIELD)
3789 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3790 assert(s->mb_y < s->mb_height);
3792 if(s->picture_structure==PICT_FRAME){
3793 h->curr_pic_num= h->frame_num;
3794 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3796 h->curr_pic_num= 2*h->frame_num + 1;
3797 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3800 if(h->nal_unit_type == NAL_IDR_SLICE){
3801 get_ue_golomb(&s->gb); /* idr_pic_id */
3804 if(h->sps.poc_type==0){
3805 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3807 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3808 h->delta_poc_bottom= get_se_golomb(&s->gb);
3812 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3813 h->delta_poc[0]= get_se_golomb(&s->gb);
3815 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3816 h->delta_poc[1]= get_se_golomb(&s->gb);
3821 if(h->pps.redundant_pic_cnt_present){
3822 h->redundant_pic_count= get_ue_golomb(&s->gb);
3825 //set defaults, might be overridden a few lines later
3826 h->ref_count[0]= h->pps.ref_count[0];
3827 h->ref_count[1]= h->pps.ref_count[1];
3829 if(h->slice_type_nos != FF_I_TYPE){
3830 if(h->slice_type_nos == FF_B_TYPE){
3831 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3833 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3835 if(num_ref_idx_active_override_flag){
3836 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3837 if(h->slice_type_nos==FF_B_TYPE)
3838 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3840 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3841 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3842 h->ref_count[0]= h->ref_count[1]= 1;
3846 if(h->slice_type_nos == FF_B_TYPE)
3853 if(!default_ref_list_done){
3854 fill_default_ref_list(h);
3857 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3860 if(h->slice_type_nos!=FF_I_TYPE){
3861 s->last_picture_ptr= &h->ref_list[0][0];
3862 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3864 if(h->slice_type_nos==FF_B_TYPE){
3865 s->next_picture_ptr= &h->ref_list[1][0];
3866 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3869 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3870 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3871 pred_weight_table(h);
3872 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3873 implicit_weight_table(h);
3878 decode_ref_pic_marking(h0, &s->gb);
3881 fill_mbaff_ref_list(h);
3883 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3884 direct_dist_scale_factor(h);
3885 direct_ref_list_init(h);
3887 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3888 tmp = get_ue_golomb(&s->gb);
3890 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3893 h->cabac_init_idc= tmp;
3896 h->last_qscale_diff = 0;
3897 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3899 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3903 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3904 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3905 //FIXME qscale / qp ... stuff
3906 if(h->slice_type == FF_SP_TYPE){
3907 get_bits1(&s->gb); /* sp_for_switch_flag */
3909 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3910 get_se_golomb(&s->gb); /* slice_qs_delta */
3913 h->deblocking_filter = 1;
3914 h->slice_alpha_c0_offset = 0;
3915 h->slice_beta_offset = 0;
3916 if( h->pps.deblocking_filter_parameters_present ) {
3917 tmp= get_ue_golomb(&s->gb);
3919 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3922 h->deblocking_filter= tmp;
3923 if(h->deblocking_filter < 2)
3924 h->deblocking_filter^= 1; // 1<->0
3926 if( h->deblocking_filter ) {
3927 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3928 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3932 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3933 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3934 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3935 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3936 h->deblocking_filter= 0;
3938 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3939 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3940 /* Cheat slightly for speed:
3941 Do not bother to deblock across slices. */
3942 h->deblocking_filter = 2;
3944 h0->max_contexts = 1;
3945 if(!h0->single_decode_warning) {
3946 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3947 h0->single_decode_warning = 1;
3950 return 1; // deblocking switched inside frame
3955 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3956 slice_group_change_cycle= get_bits(&s->gb, ?);
3959 h0->last_slice_type = slice_type;
3960 h->slice_num = ++h0->current_slice;
3961 if(h->slice_num >= MAX_SLICES){
3962 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3966 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3970 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3971 +(h->ref_list[j][i].reference&3);
3974 for(i=16; i<48; i++)
3975 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3976 +(h->ref_list[j][i].reference&3);
3979 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3980 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3982 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3983 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3985 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3987 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3988 pps_id, h->frame_num,
3989 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3990 h->ref_count[0], h->ref_count[1],
3992 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
3994 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
3995 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4005 static inline int get_level_prefix(GetBitContext *gb){
4009 OPEN_READER(re, gb);
4010 UPDATE_CACHE(re, gb);
4011 buf=GET_CACHE(re, gb);
4013 log= 32 - av_log2(buf);
4015 print_bin(buf>>(32-log), log);
4016 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4019 LAST_SKIP_BITS(re, gb, log);
4020 CLOSE_READER(re, gb);
4025 static inline int get_dct8x8_allowed(H264Context *h){
4028 if(!IS_SUB_8X8(h->sub_mb_type[i])
4029 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4036 * decodes a residual block.
4037 * @param n block index
4038 * @param scantable scantable
4039 * @param max_coeff number of coefficients in the block
4040 * @return <0 if an error occurred
4042 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4043 MpegEncContext * const s = &h->s;
4044 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4046 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4048 //FIXME put trailing_onex into the context
4050 if(n == CHROMA_DC_BLOCK_INDEX){
4051 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4052 total_coeff= coeff_token>>2;
4054 if(n == LUMA_DC_BLOCK_INDEX){
4055 total_coeff= pred_non_zero_count(h, 0);
4056 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4057 total_coeff= coeff_token>>2;
4059 total_coeff= pred_non_zero_count(h, n);
4060 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4061 total_coeff= coeff_token>>2;
4062 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4066 //FIXME set last_non_zero?
4070 if(total_coeff > (unsigned)max_coeff) {
4071 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4075 trailing_ones= coeff_token&3;
4076 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4077 assert(total_coeff<=16);
4079 for(i=0; i<trailing_ones; i++){
4080 level[i]= 1 - 2*get_bits1(gb);
4084 int level_code, mask;
4085 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4086 int prefix= get_level_prefix(gb);
4088 //first coefficient has suffix_length equal to 0 or 1
4089 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4091 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4093 level_code= (prefix<<suffix_length); //part
4094 }else if(prefix==14){
4096 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4098 level_code= prefix + get_bits(gb, 4); //part
4100 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4101 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4103 level_code += (1<<(prefix-3))-4096;
4106 if(trailing_ones < 3) level_code += 2;
4111 mask= -(level_code&1);
4112 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4115 //remaining coefficients have suffix_length > 0
4116 for(;i<total_coeff;i++) {
4117 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4118 prefix = get_level_prefix(gb);
4120 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4122 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4124 level_code += (1<<(prefix-3))-4096;
4126 mask= -(level_code&1);
4127 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4128 if(level_code > suffix_limit[suffix_length])
4133 if(total_coeff == max_coeff)
4136 if(n == CHROMA_DC_BLOCK_INDEX)
4137 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4139 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4142 coeff_num = zeros_left + total_coeff - 1;
4143 j = scantable[coeff_num];
4145 block[j] = level[0];
4146 for(i=1;i<total_coeff;i++) {
4149 else if(zeros_left < 7){
4150 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4152 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4154 zeros_left -= run_before;
4155 coeff_num -= 1 + run_before;
4156 j= scantable[ coeff_num ];
4161 block[j] = (level[0] * qmul[j] + 32)>>6;
4162 for(i=1;i<total_coeff;i++) {
4165 else if(zeros_left < 7){
4166 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4168 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4170 zeros_left -= run_before;
4171 coeff_num -= 1 + run_before;
4172 j= scantable[ coeff_num ];
4174 block[j]= (level[i] * qmul[j] + 32)>>6;
4179 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4186 static void predict_field_decoding_flag(H264Context *h){
4187 MpegEncContext * const s = &h->s;
4188 const int mb_xy= h->mb_xy;
4189 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4190 ? s->current_picture.mb_type[mb_xy-1]
4191 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4192 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4194 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4198 * decodes a P_SKIP or B_SKIP macroblock
4200 static void decode_mb_skip(H264Context *h){
4201 MpegEncContext * const s = &h->s;
4202 const int mb_xy= h->mb_xy;
4205 memset(h->non_zero_count[mb_xy], 0, 16);
4206 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4209 mb_type|= MB_TYPE_INTERLACED;
4211 if( h->slice_type_nos == FF_B_TYPE )
4213 // just for fill_caches. pred_direct_motion will set the real mb_type
4214 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4216 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4217 pred_direct_motion(h, &mb_type);
4218 mb_type|= MB_TYPE_SKIP;
4223 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4225 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4226 pred_pskip_motion(h, &mx, &my);
4227 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4228 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4231 write_back_motion(h, mb_type);
4232 s->current_picture.mb_type[mb_xy]= mb_type;
4233 s->current_picture.qscale_table[mb_xy]= s->qscale;
4234 h->slice_table[ mb_xy ]= h->slice_num;
4235 h->prev_mb_skipped= 1;
4239 * decodes a macroblock
4240 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4242 static int decode_mb_cavlc(H264Context *h){
4243 MpegEncContext * const s = &h->s;
4245 int partition_count;
4246 unsigned int mb_type, cbp;
4247 int dct8x8_allowed= h->pps.transform_8x8_mode;
4249 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4251 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4253 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4254 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4256 if(h->slice_type_nos != FF_I_TYPE){
4257 if(s->mb_skip_run==-1)
4258 s->mb_skip_run= get_ue_golomb(&s->gb);
4260 if (s->mb_skip_run--) {
4261 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4262 if(s->mb_skip_run==0)
4263 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4265 predict_field_decoding_flag(h);
4272 if( (s->mb_y&1) == 0 )
4273 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4276 h->prev_mb_skipped= 0;
4278 mb_type= get_ue_golomb(&s->gb);
4279 if(h->slice_type_nos == FF_B_TYPE){
4281 partition_count= b_mb_type_info[mb_type].partition_count;
4282 mb_type= b_mb_type_info[mb_type].type;
4285 goto decode_intra_mb;
4287 }else if(h->slice_type_nos == FF_P_TYPE){
4289 partition_count= p_mb_type_info[mb_type].partition_count;
4290 mb_type= p_mb_type_info[mb_type].type;
4293 goto decode_intra_mb;
4296 assert(h->slice_type_nos == FF_I_TYPE);
4297 if(h->slice_type == FF_SI_TYPE && mb_type)
4301 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4305 cbp= i_mb_type_info[mb_type].cbp;
4306 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4307 mb_type= i_mb_type_info[mb_type].type;
4311 mb_type |= MB_TYPE_INTERLACED;
4313 h->slice_table[ mb_xy ]= h->slice_num;
4315 if(IS_INTRA_PCM(mb_type)){
4318 // We assume these blocks are very rare so we do not optimize it.
4319 align_get_bits(&s->gb);
4321 // The pixels are stored in the same order as levels in h->mb array.
4322 for(x=0; x < (CHROMA ? 384 : 256); x++){
4323 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4326 // In deblocking, the quantizer is 0
4327 s->current_picture.qscale_table[mb_xy]= 0;
4328 // All coeffs are present
4329 memset(h->non_zero_count[mb_xy], 16, 16);
4331 s->current_picture.mb_type[mb_xy]= mb_type;
4336 h->ref_count[0] <<= 1;
4337 h->ref_count[1] <<= 1;
4340 fill_caches(h, mb_type, 0);
4343 if(IS_INTRA(mb_type)){
4345 // init_top_left_availability(h);
4346 if(IS_INTRA4x4(mb_type)){
4349 if(dct8x8_allowed && get_bits1(&s->gb)){
4350 mb_type |= MB_TYPE_8x8DCT;
4354 // fill_intra4x4_pred_table(h);
4355 for(i=0; i<16; i+=di){
4356 int mode= pred_intra_mode(h, i);
4358 if(!get_bits1(&s->gb)){
4359 const int rem_mode= get_bits(&s->gb, 3);
4360 mode = rem_mode + (rem_mode >= mode);
4364 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4366 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4368 write_back_intra_pred_mode(h);
4369 if( check_intra4x4_pred_mode(h) < 0)
4372 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4373 if(h->intra16x16_pred_mode < 0)
4377 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4380 h->chroma_pred_mode= pred_mode;
4382 }else if(partition_count==4){
4383 int i, j, sub_partition_count[4], list, ref[2][4];
4385 if(h->slice_type_nos == FF_B_TYPE){
4387 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4388 if(h->sub_mb_type[i] >=13){
4389 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4392 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4393 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4395 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4396 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4397 pred_direct_motion(h, &mb_type);
4398 h->ref_cache[0][scan8[4]] =
4399 h->ref_cache[1][scan8[4]] =
4400 h->ref_cache[0][scan8[12]] =
4401 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4404 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4406 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4407 if(h->sub_mb_type[i] >=4){
4408 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4411 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4412 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4416 for(list=0; list<h->list_count; list++){
4417 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4419 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4420 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4421 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4423 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4435 dct8x8_allowed = get_dct8x8_allowed(h);
4437 for(list=0; list<h->list_count; list++){
4439 if(IS_DIRECT(h->sub_mb_type[i])) {
4440 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4443 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4444 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4446 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4447 const int sub_mb_type= h->sub_mb_type[i];
4448 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4449 for(j=0; j<sub_partition_count[i]; j++){
4451 const int index= 4*i + block_width*j;
4452 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4453 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4454 mx += get_se_golomb(&s->gb);
4455 my += get_se_golomb(&s->gb);
4456 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4458 if(IS_SUB_8X8(sub_mb_type)){
4460 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4462 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4463 }else if(IS_SUB_8X4(sub_mb_type)){
4464 mv_cache[ 1 ][0]= mx;
4465 mv_cache[ 1 ][1]= my;
4466 }else if(IS_SUB_4X8(sub_mb_type)){
4467 mv_cache[ 8 ][0]= mx;
4468 mv_cache[ 8 ][1]= my;
4470 mv_cache[ 0 ][0]= mx;
4471 mv_cache[ 0 ][1]= my;
4474 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4480 }else if(IS_DIRECT(mb_type)){
4481 pred_direct_motion(h, &mb_type);
4482 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4484 int list, mx, my, i;
4485 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4486 if(IS_16X16(mb_type)){
4487 for(list=0; list<h->list_count; list++){
4489 if(IS_DIR(mb_type, 0, list)){
4490 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4491 if(val >= h->ref_count[list]){
4492 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4496 val= LIST_NOT_USED&0xFF;
4497 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4499 for(list=0; list<h->list_count; list++){
4501 if(IS_DIR(mb_type, 0, list)){
4502 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4503 mx += get_se_golomb(&s->gb);
4504 my += get_se_golomb(&s->gb);
4505 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4507 val= pack16to32(mx,my);
4510 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4513 else if(IS_16X8(mb_type)){
4514 for(list=0; list<h->list_count; list++){
4517 if(IS_DIR(mb_type, i, list)){
4518 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4519 if(val >= h->ref_count[list]){
4520 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4524 val= LIST_NOT_USED&0xFF;
4525 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4528 for(list=0; list<h->list_count; list++){
4531 if(IS_DIR(mb_type, i, list)){
4532 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4533 mx += get_se_golomb(&s->gb);
4534 my += get_se_golomb(&s->gb);
4535 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4537 val= pack16to32(mx,my);
4540 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4544 assert(IS_8X16(mb_type));
4545 for(list=0; list<h->list_count; list++){
4548 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4549 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4550 if(val >= h->ref_count[list]){
4551 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4555 val= LIST_NOT_USED&0xFF;
4556 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4559 for(list=0; list<h->list_count; list++){
4562 if(IS_DIR(mb_type, i, list)){
4563 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4564 mx += get_se_golomb(&s->gb);
4565 my += get_se_golomb(&s->gb);
4566 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4568 val= pack16to32(mx,my);
4571 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4577 if(IS_INTER(mb_type))
4578 write_back_motion(h, mb_type);
4580 if(!IS_INTRA16x16(mb_type)){
4581 cbp= get_ue_golomb(&s->gb);
4583 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4588 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4589 else cbp= golomb_to_inter_cbp [cbp];
4591 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4592 else cbp= golomb_to_inter_cbp_gray[cbp];
4597 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4598 if(get_bits1(&s->gb)){
4599 mb_type |= MB_TYPE_8x8DCT;
4600 h->cbp_table[mb_xy]= cbp;
4603 s->current_picture.mb_type[mb_xy]= mb_type;
4605 if(cbp || IS_INTRA16x16(mb_type)){
4606 int i8x8, i4x4, chroma_idx;
4608 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4609 const uint8_t *scan, *scan8x8, *dc_scan;
4611 // fill_non_zero_count_cache(h);
4613 if(IS_INTERLACED(mb_type)){
4614 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4615 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4616 dc_scan= luma_dc_field_scan;
4618 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4619 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4620 dc_scan= luma_dc_zigzag_scan;
4623 dquant= get_se_golomb(&s->gb);
4625 if( dquant > 25 || dquant < -26 ){
4626 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4630 s->qscale += dquant;
4631 if(((unsigned)s->qscale) > 51){
4632 if(s->qscale<0) s->qscale+= 52;
4633 else s->qscale-= 52;
4636 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4637 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4638 if(IS_INTRA16x16(mb_type)){
4639 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4640 return -1; //FIXME continue if partitioned and other return -1 too
4643 assert((cbp&15) == 0 || (cbp&15) == 15);
4646 for(i8x8=0; i8x8<4; i8x8++){
4647 for(i4x4=0; i4x4<4; i4x4++){
4648 const int index= i4x4 + 4*i8x8;
4649 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4655 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4658 for(i8x8=0; i8x8<4; i8x8++){
4659 if(cbp & (1<<i8x8)){
4660 if(IS_8x8DCT(mb_type)){
4661 DCTELEM *buf = &h->mb[64*i8x8];
4663 for(i4x4=0; i4x4<4; i4x4++){
4664 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4665 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4668 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4669 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4671 for(i4x4=0; i4x4<4; i4x4++){
4672 const int index= i4x4 + 4*i8x8;
4674 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4680 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4681 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4687 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4688 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4694 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4695 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4696 for(i4x4=0; i4x4<4; i4x4++){
4697 const int index= 16 + 4*chroma_idx + i4x4;
4698 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4704 uint8_t * const nnz= &h->non_zero_count_cache[0];
4705 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4706 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4709 uint8_t * const nnz= &h->non_zero_count_cache[0];
4710 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4711 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4712 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4714 s->current_picture.qscale_table[mb_xy]= s->qscale;
4715 write_back_non_zero_count(h);
4718 h->ref_count[0] >>= 1;
4719 h->ref_count[1] >>= 1;
4725 static int decode_cabac_field_decoding_flag(H264Context *h) {
4726 MpegEncContext * const s = &h->s;
4727 const int mb_x = s->mb_x;
4728 const int mb_y = s->mb_y & ~1;
4729 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4730 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4732 unsigned int ctx = 0;
4734 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4737 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4741 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4744 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4745 uint8_t *state= &h->cabac_state[ctx_base];
4749 MpegEncContext * const s = &h->s;
4750 const int mba_xy = h->left_mb_xy[0];
4751 const int mbb_xy = h->top_mb_xy;
4753 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4755 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4757 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4758 return 0; /* I4x4 */
4761 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4762 return 0; /* I4x4 */
4765 if( get_cabac_terminate( &h->cabac ) )
4766 return 25; /* PCM */
4768 mb_type = 1; /* I16x16 */
4769 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4770 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4771 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4772 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4773 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4777 static int decode_cabac_mb_type( H264Context *h ) {
4778 MpegEncContext * const s = &h->s;
4780 if( h->slice_type_nos == FF_I_TYPE ) {
4781 return decode_cabac_intra_mb_type(h, 3, 1);
4782 } else if( h->slice_type_nos == FF_P_TYPE ) {
4783 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4785 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4786 /* P_L0_D16x16, P_8x8 */
4787 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4789 /* P_L0_D8x16, P_L0_D16x8 */
4790 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4793 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4795 } else if( h->slice_type_nos == FF_B_TYPE ) {
4796 const int mba_xy = h->left_mb_xy[0];
4797 const int mbb_xy = h->top_mb_xy;
4801 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4803 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4806 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4807 return 0; /* B_Direct_16x16 */
4809 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4810 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4813 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4814 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4815 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4816 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4818 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4819 else if( bits == 13 ) {
4820 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4821 } else if( bits == 14 )
4822 return 11; /* B_L1_L0_8x16 */
4823 else if( bits == 15 )
4824 return 22; /* B_8x8 */
4826 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4827 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4829 /* TODO SI/SP frames? */
4834 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4835 MpegEncContext * const s = &h->s;
4839 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4840 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4843 && h->slice_table[mba_xy] == h->slice_num
4844 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4845 mba_xy += s->mb_stride;
4847 mbb_xy = mb_xy - s->mb_stride;
4849 && h->slice_table[mbb_xy] == h->slice_num
4850 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4851 mbb_xy -= s->mb_stride;
4853 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4855 int mb_xy = h->mb_xy;
4857 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4860 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4862 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4865 if( h->slice_type_nos == FF_B_TYPE )
4867 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4870 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4873 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4876 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4877 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4878 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4880 if( mode >= pred_mode )
4886 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4887 const int mba_xy = h->left_mb_xy[0];
4888 const int mbb_xy = h->top_mb_xy;
4892 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4893 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4896 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4899 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4902 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4904 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4910 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4911 int cbp_b, cbp_a, ctx, cbp = 0;
4913 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4914 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4916 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4917 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4918 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4919 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4920 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4921 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4922 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4923 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4926 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4930 cbp_a = (h->left_cbp>>4)&0x03;
4931 cbp_b = (h-> top_cbp>>4)&0x03;
4934 if( cbp_a > 0 ) ctx++;
4935 if( cbp_b > 0 ) ctx += 2;
4936 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4940 if( cbp_a == 2 ) ctx++;
4941 if( cbp_b == 2 ) ctx += 2;
4942 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4944 static int decode_cabac_mb_dqp( H264Context *h) {
4948 if( h->last_qscale_diff != 0 )
4951 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4957 if(val > 102) //prevent infinite loop
4964 return -(val + 1)/2;
4966 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4967 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4969 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4971 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4975 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4977 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4978 return 0; /* B_Direct_8x8 */
4979 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4980 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4982 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4983 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4984 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4987 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4988 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4992 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4993 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4996 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4997 int refa = h->ref_cache[list][scan8[n] - 1];
4998 int refb = h->ref_cache[list][scan8[n] - 8];
5002 if( h->slice_type_nos == FF_B_TYPE) {
5003 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5005 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5014 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5020 if(ref >= 32 /*h->ref_list[list]*/){
5021 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5022 return 0; //FIXME we should return -1 and check the return everywhere
5028 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5029 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5030 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5031 int ctxbase = (l == 0) ? 40 : 47;
5036 else if( amvd > 32 )
5041 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5046 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5054 while( get_cabac_bypass( &h->cabac ) ) {
5058 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5063 if( get_cabac_bypass( &h->cabac ) )
5067 return get_cabac_bypass_sign( &h->cabac, -mvd );
5070 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5076 nza = h->left_cbp&0x100;
5077 nzb = h-> top_cbp&0x100;
5079 nza = (h->left_cbp>>(6+idx))&0x01;
5080 nzb = (h-> top_cbp>>(6+idx))&0x01;
5084 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5085 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5087 assert(cat == 1 || cat == 2);
5088 nza = h->non_zero_count_cache[scan8[idx] - 1];
5089 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5099 return ctx + 4 * cat;
5102 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5103 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5104 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5105 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5106 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5109 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5110 static const int significant_coeff_flag_offset[2][6] = {
5111 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5112 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5114 static const int last_coeff_flag_offset[2][6] = {
5115 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5116 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5118 static const int coeff_abs_level_m1_offset[6] = {
5119 227+0, 227+10, 227+20, 227+30, 227+39, 426
5121 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5122 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5123 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5124 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5125 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5126 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5127 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5128 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5129 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5131 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5132 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5133 * map node ctx => cabac ctx for level=1 */
5134 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5135 /* map node ctx => cabac ctx for level>1 */
5136 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5137 static const uint8_t coeff_abs_level_transition[2][8] = {
5138 /* update node ctx after decoding a level=1 */
5139 { 1, 2, 3, 3, 4, 5, 6, 7 },
5140 /* update node ctx after decoding a level>1 */
5141 { 4, 4, 4, 4, 5, 6, 7, 7 }
5147 int coeff_count = 0;
5150 uint8_t *significant_coeff_ctx_base;
5151 uint8_t *last_coeff_ctx_base;
5152 uint8_t *abs_level_m1_ctx_base;
5155 #define CABAC_ON_STACK
5157 #ifdef CABAC_ON_STACK
5160 cc.range = h->cabac.range;
5161 cc.low = h->cabac.low;
5162 cc.bytestream= h->cabac.bytestream;
5164 #define CC &h->cabac
5168 /* cat: 0-> DC 16x16 n = 0
5169 * 1-> AC 16x16 n = luma4x4idx
5170 * 2-> Luma4x4 n = luma4x4idx
5171 * 3-> DC Chroma n = iCbCr
5172 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5173 * 5-> Luma8x8 n = 4 * luma8x8idx
5176 /* read coded block flag */
5177 if( is_dc || cat != 5 ) {
5178 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5181 h->non_zero_count_cache[scan8[16+n]] = 0;
5183 h->non_zero_count_cache[scan8[n]] = 0;
5186 #ifdef CABAC_ON_STACK
5187 h->cabac.range = cc.range ;
5188 h->cabac.low = cc.low ;
5189 h->cabac.bytestream= cc.bytestream;
5195 significant_coeff_ctx_base = h->cabac_state
5196 + significant_coeff_flag_offset[MB_FIELD][cat];
5197 last_coeff_ctx_base = h->cabac_state
5198 + last_coeff_flag_offset[MB_FIELD][cat];
5199 abs_level_m1_ctx_base = h->cabac_state
5200 + coeff_abs_level_m1_offset[cat];
5202 if( !is_dc && cat == 5 ) {
5203 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5204 for(last= 0; last < coefs; last++) { \
5205 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5206 if( get_cabac( CC, sig_ctx )) { \
5207 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5208 index[coeff_count++] = last; \
5209 if( get_cabac( CC, last_ctx ) ) { \
5215 if( last == max_coeff -1 ) {\
5216 index[coeff_count++] = last;\
5218 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5219 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5220 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5222 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5224 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5226 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5229 assert(coeff_count > 0);
5233 h->cbp_table[h->mb_xy] |= 0x100;
5235 h->cbp_table[h->mb_xy] |= 0x40 << n;
5238 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5240 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5242 assert( cat == 1 || cat == 2 );
5243 h->non_zero_count_cache[scan8[n]] = coeff_count;
5248 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5250 int j= scantable[index[--coeff_count]];
5252 if( get_cabac( CC, ctx ) == 0 ) {
5253 node_ctx = coeff_abs_level_transition[0][node_ctx];
5255 block[j] = get_cabac_bypass_sign( CC, -1);
5257 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5261 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5262 node_ctx = coeff_abs_level_transition[1][node_ctx];
5264 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5268 if( coeff_abs >= 15 ) {
5270 while( get_cabac_bypass( CC ) ) {
5276 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5282 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5284 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5287 } while( coeff_count );
5288 #ifdef CABAC_ON_STACK
5289 h->cabac.range = cc.range ;
5290 h->cabac.low = cc.low ;
5291 h->cabac.bytestream= cc.bytestream;
5296 #ifndef CONFIG_SMALL
5297 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5298 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5301 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5302 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5306 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5308 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5310 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5311 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5315 static inline void compute_mb_neighbors(H264Context *h)
5317 MpegEncContext * const s = &h->s;
5318 const int mb_xy = h->mb_xy;
5319 h->top_mb_xy = mb_xy - s->mb_stride;
5320 h->left_mb_xy[0] = mb_xy - 1;
5322 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5323 const int top_pair_xy = pair_xy - s->mb_stride;
5324 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5325 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5326 const int curr_mb_frame_flag = !MB_FIELD;
5327 const int bottom = (s->mb_y & 1);
5329 ? !curr_mb_frame_flag // bottom macroblock
5330 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5332 h->top_mb_xy -= s->mb_stride;
5334 if (left_mb_frame_flag != curr_mb_frame_flag) {
5335 h->left_mb_xy[0] = pair_xy - 1;
5337 } else if (FIELD_PICTURE) {
5338 h->top_mb_xy -= s->mb_stride;
5344 * decodes a macroblock
5345 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5347 static int decode_mb_cabac(H264Context *h) {
5348 MpegEncContext * const s = &h->s;
5350 int mb_type, partition_count, cbp = 0;
5351 int dct8x8_allowed= h->pps.transform_8x8_mode;
5353 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5355 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5357 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5358 if( h->slice_type_nos != FF_I_TYPE ) {
5360 /* a skipped mb needs the aff flag from the following mb */
5361 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5362 predict_field_decoding_flag(h);
5363 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5364 skip = h->next_mb_skipped;
5366 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5367 /* read skip flags */
5369 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5370 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5371 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5372 if(h->next_mb_skipped)
5373 predict_field_decoding_flag(h);
5375 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5380 h->cbp_table[mb_xy] = 0;
5381 h->chroma_pred_mode_table[mb_xy] = 0;
5382 h->last_qscale_diff = 0;
5389 if( (s->mb_y&1) == 0 )
5391 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5394 h->prev_mb_skipped = 0;
5396 compute_mb_neighbors(h);
5397 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5398 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5402 if( h->slice_type_nos == FF_B_TYPE ) {
5404 partition_count= b_mb_type_info[mb_type].partition_count;
5405 mb_type= b_mb_type_info[mb_type].type;
5408 goto decode_intra_mb;
5410 } else if( h->slice_type_nos == FF_P_TYPE ) {
5412 partition_count= p_mb_type_info[mb_type].partition_count;
5413 mb_type= p_mb_type_info[mb_type].type;
5416 goto decode_intra_mb;
5419 if(h->slice_type == FF_SI_TYPE && mb_type)
5421 assert(h->slice_type_nos == FF_I_TYPE);
5423 partition_count = 0;
5424 cbp= i_mb_type_info[mb_type].cbp;
5425 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5426 mb_type= i_mb_type_info[mb_type].type;
5429 mb_type |= MB_TYPE_INTERLACED;
5431 h->slice_table[ mb_xy ]= h->slice_num;
5433 if(IS_INTRA_PCM(mb_type)) {
5436 // We assume these blocks are very rare so we do not optimize it.
5437 // FIXME The two following lines get the bitstream position in the cabac
5438 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5439 ptr= h->cabac.bytestream;
5440 if(h->cabac.low&0x1) ptr--;
5442 if(h->cabac.low&0x1FF) ptr--;
5445 // The pixels are stored in the same order as levels in h->mb array.
5446 memcpy(h->mb, ptr, 256); ptr+=256;
5448 memcpy(h->mb+128, ptr, 128); ptr+=128;
5451 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5453 // All blocks are present
5454 h->cbp_table[mb_xy] = 0x1ef;
5455 h->chroma_pred_mode_table[mb_xy] = 0;
5456 // In deblocking, the quantizer is 0
5457 s->current_picture.qscale_table[mb_xy]= 0;
5458 // All coeffs are present
5459 memset(h->non_zero_count[mb_xy], 16, 16);
5460 s->current_picture.mb_type[mb_xy]= mb_type;
5461 h->last_qscale_diff = 0;
5466 h->ref_count[0] <<= 1;
5467 h->ref_count[1] <<= 1;
5470 fill_caches(h, mb_type, 0);
5472 if( IS_INTRA( mb_type ) ) {
5474 if( IS_INTRA4x4( mb_type ) ) {
5475 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5476 mb_type |= MB_TYPE_8x8DCT;
5477 for( i = 0; i < 16; i+=4 ) {
5478 int pred = pred_intra_mode( h, i );
5479 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5480 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5483 for( i = 0; i < 16; i++ ) {
5484 int pred = pred_intra_mode( h, i );
5485 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5487 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5490 write_back_intra_pred_mode(h);
5491 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5493 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5494 if( h->intra16x16_pred_mode < 0 ) return -1;
5497 h->chroma_pred_mode_table[mb_xy] =
5498 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5500 pred_mode= check_intra_pred_mode( h, pred_mode );
5501 if( pred_mode < 0 ) return -1;
5502 h->chroma_pred_mode= pred_mode;
5504 } else if( partition_count == 4 ) {
5505 int i, j, sub_partition_count[4], list, ref[2][4];
5507 if( h->slice_type_nos == FF_B_TYPE ) {
5508 for( i = 0; i < 4; i++ ) {
5509 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5510 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5511 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5513 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5514 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5515 pred_direct_motion(h, &mb_type);
5516 h->ref_cache[0][scan8[4]] =
5517 h->ref_cache[1][scan8[4]] =
5518 h->ref_cache[0][scan8[12]] =
5519 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5520 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5521 for( i = 0; i < 4; i++ )
5522 if( IS_DIRECT(h->sub_mb_type[i]) )
5523 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5527 for( i = 0; i < 4; i++ ) {
5528 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5529 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5530 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5534 for( list = 0; list < h->list_count; list++ ) {
5535 for( i = 0; i < 4; i++ ) {
5536 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5537 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5538 if( h->ref_count[list] > 1 )
5539 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5545 h->ref_cache[list][ scan8[4*i]+1 ]=
5546 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5551 dct8x8_allowed = get_dct8x8_allowed(h);
5553 for(list=0; list<h->list_count; list++){
5555 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5556 if(IS_DIRECT(h->sub_mb_type[i])){
5557 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5561 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5562 const int sub_mb_type= h->sub_mb_type[i];
5563 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5564 for(j=0; j<sub_partition_count[i]; j++){
5567 const int index= 4*i + block_width*j;
5568 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5569 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5570 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5572 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5573 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5574 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5576 if(IS_SUB_8X8(sub_mb_type)){
5578 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5580 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5583 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5585 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5586 }else if(IS_SUB_8X4(sub_mb_type)){
5587 mv_cache[ 1 ][0]= mx;
5588 mv_cache[ 1 ][1]= my;
5590 mvd_cache[ 1 ][0]= mx - mpx;
5591 mvd_cache[ 1 ][1]= my - mpy;
5592 }else if(IS_SUB_4X8(sub_mb_type)){
5593 mv_cache[ 8 ][0]= mx;
5594 mv_cache[ 8 ][1]= my;
5596 mvd_cache[ 8 ][0]= mx - mpx;
5597 mvd_cache[ 8 ][1]= my - mpy;
5599 mv_cache[ 0 ][0]= mx;
5600 mv_cache[ 0 ][1]= my;
5602 mvd_cache[ 0 ][0]= mx - mpx;
5603 mvd_cache[ 0 ][1]= my - mpy;
5606 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5607 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5608 p[0] = p[1] = p[8] = p[9] = 0;
5609 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5613 } else if( IS_DIRECT(mb_type) ) {
5614 pred_direct_motion(h, &mb_type);
5615 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5616 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5617 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5619 int list, mx, my, i, mpx, mpy;
5620 if(IS_16X16(mb_type)){
5621 for(list=0; list<h->list_count; list++){
5622 if(IS_DIR(mb_type, 0, list)){
5623 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5624 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5626 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5628 for(list=0; list<h->list_count; list++){
5629 if(IS_DIR(mb_type, 0, list)){
5630 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5632 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5633 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5634 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5636 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5637 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5639 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5642 else if(IS_16X8(mb_type)){
5643 for(list=0; list<h->list_count; list++){
5645 if(IS_DIR(mb_type, i, list)){
5646 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5647 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5649 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5652 for(list=0; list<h->list_count; list++){
5654 if(IS_DIR(mb_type, i, list)){
5655 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5656 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5657 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5658 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5660 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5661 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5663 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5664 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5669 assert(IS_8X16(mb_type));
5670 for(list=0; list<h->list_count; list++){
5672 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5673 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5674 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5676 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5679 for(list=0; list<h->list_count; list++){
5681 if(IS_DIR(mb_type, i, list)){
5682 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5683 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5684 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5686 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5687 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5688 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5690 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5691 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5698 if( IS_INTER( mb_type ) ) {
5699 h->chroma_pred_mode_table[mb_xy] = 0;
5700 write_back_motion( h, mb_type );
5703 if( !IS_INTRA16x16( mb_type ) ) {
5704 cbp = decode_cabac_mb_cbp_luma( h );
5706 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5709 h->cbp_table[mb_xy] = h->cbp = cbp;
5711 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5712 if( decode_cabac_mb_transform_size( h ) )
5713 mb_type |= MB_TYPE_8x8DCT;
5715 s->current_picture.mb_type[mb_xy]= mb_type;
5717 if( cbp || IS_INTRA16x16( mb_type ) ) {
5718 const uint8_t *scan, *scan8x8, *dc_scan;
5719 const uint32_t *qmul;
5722 if(IS_INTERLACED(mb_type)){
5723 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5724 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5725 dc_scan= luma_dc_field_scan;
5727 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5728 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5729 dc_scan= luma_dc_zigzag_scan;
5732 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5733 if( dqp == INT_MIN ){
5734 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5738 if(((unsigned)s->qscale) > 51){
5739 if(s->qscale<0) s->qscale+= 52;
5740 else s->qscale-= 52;
5742 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5743 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5745 if( IS_INTRA16x16( mb_type ) ) {
5747 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5748 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5751 qmul = h->dequant4_coeff[0][s->qscale];
5752 for( i = 0; i < 16; i++ ) {
5753 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5754 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5757 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5761 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5762 if( cbp & (1<<i8x8) ) {
5763 if( IS_8x8DCT(mb_type) ) {
5764 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5765 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5767 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5768 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5769 const int index = 4*i8x8 + i4x4;
5770 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5772 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5773 //STOP_TIMER("decode_residual")
5777 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5778 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5785 for( c = 0; c < 2; c++ ) {
5786 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5787 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5793 for( c = 0; c < 2; c++ ) {
5794 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5795 for( i = 0; i < 4; i++ ) {
5796 const int index = 16 + 4 * c + i;
5797 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5798 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5802 uint8_t * const nnz= &h->non_zero_count_cache[0];
5803 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5804 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5807 uint8_t * const nnz= &h->non_zero_count_cache[0];
5808 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5809 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5810 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5811 h->last_qscale_diff = 0;
5814 s->current_picture.qscale_table[mb_xy]= s->qscale;
5815 write_back_non_zero_count(h);
5818 h->ref_count[0] >>= 1;
5819 h->ref_count[1] >>= 1;
5826 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5828 const int index_a = qp + h->slice_alpha_c0_offset;
5829 const int alpha = (alpha_table+52)[index_a];
5830 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5835 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5836 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5838 /* 16px edge length, because bS=4 is triggered by being at
5839 * the edge of an intra MB, so all 4 bS are the same */
5840 for( d = 0; d < 16; d++ ) {
5841 const int p0 = pix[-1];
5842 const int p1 = pix[-2];
5843 const int p2 = pix[-3];
5845 const int q0 = pix[0];
5846 const int q1 = pix[1];
5847 const int q2 = pix[2];
5849 if( FFABS( p0 - q0 ) < alpha &&
5850 FFABS( p1 - p0 ) < beta &&
5851 FFABS( q1 - q0 ) < beta ) {
5853 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5854 if( FFABS( p2 - p0 ) < beta)
5856 const int p3 = pix[-4];
5858 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5859 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5860 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5863 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5865 if( FFABS( q2 - q0 ) < beta)
5867 const int q3 = pix[3];
5869 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5870 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5871 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5874 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5878 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5879 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5881 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5887 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5889 const int index_a = qp + h->slice_alpha_c0_offset;
5890 const int alpha = (alpha_table+52)[index_a];
5891 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5896 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5897 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5899 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5903 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5905 for( i = 0; i < 16; i++, pix += stride) {
5911 int bS_index = (i >> 1);
5914 bS_index |= (i & 1);
5917 if( bS[bS_index] == 0 ) {
5921 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5922 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5923 alpha = (alpha_table+52)[index_a];
5924 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5926 if( bS[bS_index] < 4 ) {
5927 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5928 const int p0 = pix[-1];
5929 const int p1 = pix[-2];
5930 const int p2 = pix[-3];
5931 const int q0 = pix[0];
5932 const int q1 = pix[1];
5933 const int q2 = pix[2];
5935 if( FFABS( p0 - q0 ) < alpha &&
5936 FFABS( p1 - p0 ) < beta &&
5937 FFABS( q1 - q0 ) < beta ) {
5941 if( FFABS( p2 - p0 ) < beta ) {
5942 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5945 if( FFABS( q2 - q0 ) < beta ) {
5946 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5950 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5951 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5952 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5953 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5956 const int p0 = pix[-1];
5957 const int p1 = pix[-2];
5958 const int p2 = pix[-3];
5960 const int q0 = pix[0];
5961 const int q1 = pix[1];
5962 const int q2 = pix[2];
5964 if( FFABS( p0 - q0 ) < alpha &&
5965 FFABS( p1 - p0 ) < beta &&
5966 FFABS( q1 - q0 ) < beta ) {
5968 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5969 if( FFABS( p2 - p0 ) < beta)
5971 const int p3 = pix[-4];
5973 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5974 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5975 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5978 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5980 if( FFABS( q2 - q0 ) < beta)
5982 const int q3 = pix[3];
5984 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5985 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5986 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5989 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5993 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5994 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5996 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6001 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6003 for( i = 0; i < 8; i++, pix += stride) {
6011 if( bS[bS_index] == 0 ) {
6015 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6016 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6017 alpha = (alpha_table+52)[index_a];
6018 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6020 if( bS[bS_index] < 4 ) {
6021 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6022 const int p0 = pix[-1];
6023 const int p1 = pix[-2];
6024 const int q0 = pix[0];
6025 const int q1 = pix[1];
6027 if( FFABS( p0 - q0 ) < alpha &&
6028 FFABS( p1 - p0 ) < beta &&
6029 FFABS( q1 - q0 ) < beta ) {
6030 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6032 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6033 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6034 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6037 const int p0 = pix[-1];
6038 const int p1 = pix[-2];
6039 const int q0 = pix[0];
6040 const int q1 = pix[1];
6042 if( FFABS( p0 - q0 ) < alpha &&
6043 FFABS( p1 - p0 ) < beta &&
6044 FFABS( q1 - q0 ) < beta ) {
6046 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6047 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6048 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6054 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6056 const int index_a = qp + h->slice_alpha_c0_offset;
6057 const int alpha = (alpha_table+52)[index_a];
6058 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6059 const int pix_next = stride;
6064 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6065 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6067 /* 16px edge length, see filter_mb_edgev */
6068 for( d = 0; d < 16; d++ ) {
6069 const int p0 = pix[-1*pix_next];
6070 const int p1 = pix[-2*pix_next];
6071 const int p2 = pix[-3*pix_next];
6072 const int q0 = pix[0];
6073 const int q1 = pix[1*pix_next];
6074 const int q2 = pix[2*pix_next];
6076 if( FFABS( p0 - q0 ) < alpha &&
6077 FFABS( p1 - p0 ) < beta &&
6078 FFABS( q1 - q0 ) < beta ) {
6080 const int p3 = pix[-4*pix_next];
6081 const int q3 = pix[ 3*pix_next];
6083 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6084 if( FFABS( p2 - p0 ) < beta) {
6086 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6087 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6088 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6091 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6093 if( FFABS( q2 - q0 ) < beta) {
6095 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6096 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6097 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6100 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6104 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6105 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6107 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6114 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6116 const int index_a = qp + h->slice_alpha_c0_offset;
6117 const int alpha = (alpha_table+52)[index_a];
6118 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6123 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6124 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6126 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6130 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6131 MpegEncContext * const s = &h->s;
6132 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6134 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6138 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6139 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6140 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6141 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6142 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6145 assert(!FRAME_MBAFF);
6147 mb_type = s->current_picture.mb_type[mb_xy];
6148 qp = s->current_picture.qscale_table[mb_xy];
6149 qp0 = s->current_picture.qscale_table[mb_xy-1];
6150 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6151 qpc = get_chroma_qp( h, 0, qp );
6152 qpc0 = get_chroma_qp( h, 0, qp0 );
6153 qpc1 = get_chroma_qp( h, 0, qp1 );
6154 qp0 = (qp + qp0 + 1) >> 1;
6155 qp1 = (qp + qp1 + 1) >> 1;
6156 qpc0 = (qpc + qpc0 + 1) >> 1;
6157 qpc1 = (qpc + qpc1 + 1) >> 1;
6158 qp_thresh = 15 - h->slice_alpha_c0_offset;
6159 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6160 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6163 if( IS_INTRA(mb_type) ) {
6164 int16_t bS4[4] = {4,4,4,4};
6165 int16_t bS3[4] = {3,3,3,3};
6166 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6167 if( IS_8x8DCT(mb_type) ) {
6168 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6169 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6170 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6171 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6173 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6174 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6175 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6176 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6177 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6178 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6179 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6180 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6182 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6183 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6184 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6185 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6186 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6187 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6188 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6189 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6192 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6193 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6195 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6197 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6199 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6200 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6201 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6202 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6204 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6205 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6206 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6207 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6209 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6210 bSv[0][0] = 0x0004000400040004ULL;
6211 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6212 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6214 #define FILTER(hv,dir,edge)\
6215 if(bSv[dir][edge]) {\
6216 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6218 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6219 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6225 } else if( IS_8x8DCT(mb_type) ) {
6245 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6246 MpegEncContext * const s = &h->s;
6248 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6249 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6250 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6251 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6252 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6254 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6255 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6256 // how often to recheck mv-based bS when iterating between edges
6257 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6258 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6259 // how often to recheck mv-based bS when iterating along each edge
6260 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6262 if (first_vertical_edge_done) {
6266 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6269 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6270 && !IS_INTERLACED(mb_type)
6271 && IS_INTERLACED(mbm_type)
6273 // This is a special case in the norm where the filtering must
6274 // be done twice (one each of the field) even if we are in a
6275 // frame macroblock.
6277 static const int nnz_idx[4] = {4,5,6,3};
6278 unsigned int tmp_linesize = 2 * linesize;
6279 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6280 int mbn_xy = mb_xy - 2 * s->mb_stride;
6285 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6286 if( IS_INTRA(mb_type) ||
6287 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6288 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6290 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6291 for( i = 0; i < 4; i++ ) {
6292 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6293 mbn_nnz[nnz_idx[i]] != 0 )
6299 // Do not use s->qscale as luma quantizer because it has not the same
6300 // value in IPCM macroblocks.
6301 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6302 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6303 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6304 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6305 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6306 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6307 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6308 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6315 for( edge = start; edge < edges; edge++ ) {
6316 /* mbn_xy: neighbor macroblock */
6317 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6318 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6319 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6323 if( (edge&1) && IS_8x8DCT(mb_type) )
6326 if( IS_INTRA(mb_type) ||
6327 IS_INTRA(mbn_type) ) {
6330 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6331 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6340 bS[0] = bS[1] = bS[2] = bS[3] = value;
6345 if( edge & mask_edge ) {
6346 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6349 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6350 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6353 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6354 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6355 int bn_idx= b_idx - (dir ? 8:1);
6358 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6359 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6360 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6361 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6364 if(h->slice_type_nos == FF_B_TYPE && v){
6366 for( l = 0; !v && l < 2; l++ ) {
6368 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6369 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6370 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6374 bS[0] = bS[1] = bS[2] = bS[3] = v;
6380 for( i = 0; i < 4; i++ ) {
6381 int x = dir == 0 ? edge : i;
6382 int y = dir == 0 ? i : edge;
6383 int b_idx= 8 + 4 + x + 8*y;
6384 int bn_idx= b_idx - (dir ? 8:1);
6386 if( h->non_zero_count_cache[b_idx] |
6387 h->non_zero_count_cache[bn_idx] ) {
6393 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6394 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6395 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6396 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6402 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6404 for( l = 0; l < 2; l++ ) {
6406 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6407 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6408 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6417 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6422 // Do not use s->qscale as luma quantizer because it has not the same
6423 // value in IPCM macroblocks.
6424 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6425 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6426 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6427 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6429 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6430 if( (edge&1) == 0 ) {
6431 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6432 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6433 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6434 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6437 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6438 if( (edge&1) == 0 ) {
6439 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6440 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6441 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6442 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6448 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6449 MpegEncContext * const s = &h->s;
6450 const int mb_xy= mb_x + mb_y*s->mb_stride;
6451 const int mb_type = s->current_picture.mb_type[mb_xy];
6452 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6453 int first_vertical_edge_done = 0;
6456 //for sufficiently low qp, filtering wouldn't do anything
6457 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6459 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6460 int qp = s->current_picture.qscale_table[mb_xy];
6462 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6463 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6468 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6469 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6470 int top_type, left_type[2];
6471 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6472 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6473 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6475 if(IS_8x8DCT(top_type)){
6476 h->non_zero_count_cache[4+8*0]=
6477 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6478 h->non_zero_count_cache[6+8*0]=
6479 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6481 if(IS_8x8DCT(left_type[0])){
6482 h->non_zero_count_cache[3+8*1]=
6483 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6485 if(IS_8x8DCT(left_type[1])){
6486 h->non_zero_count_cache[3+8*3]=
6487 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6490 if(IS_8x8DCT(mb_type)){
6491 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6492 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6494 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6495 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6497 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6498 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6500 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6501 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6506 // left mb is in picture
6507 && h->slice_table[mb_xy-1] != 0xFFFF
6508 // and current and left pair do not have the same interlaced type
6509 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6510 // and left mb is in the same slice if deblocking_filter == 2
6511 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6512 /* First vertical edge is different in MBAFF frames
6513 * There are 8 different bS to compute and 2 different Qp
6515 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6516 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6521 int mb_qp, mbn0_qp, mbn1_qp;
6523 first_vertical_edge_done = 1;
6525 if( IS_INTRA(mb_type) )
6526 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6528 for( i = 0; i < 8; i++ ) {
6529 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6531 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6533 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6534 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6535 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6537 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6544 mb_qp = s->current_picture.qscale_table[mb_xy];
6545 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6546 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6547 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6548 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6549 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6550 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6551 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6552 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6553 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6554 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6555 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6556 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6559 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6560 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6561 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6562 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6563 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6567 for( dir = 0; dir < 2; dir++ )
6568 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6570 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6571 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6575 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6576 H264Context *h = *(void**)arg;
6577 MpegEncContext * const s = &h->s;
6578 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6582 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6583 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6585 if( h->pps.cabac ) {
6589 align_get_bits( &s->gb );
6592 ff_init_cabac_states( &h->cabac);
6593 ff_init_cabac_decoder( &h->cabac,
6594 s->gb.buffer + get_bits_count(&s->gb)/8,
6595 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6596 /* calculate pre-state */
6597 for( i= 0; i < 460; i++ ) {
6599 if( h->slice_type_nos == FF_I_TYPE )
6600 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6602 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6605 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6607 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6612 int ret = decode_mb_cabac(h);
6614 //STOP_TIMER("decode_mb_cabac")
6616 if(ret>=0) hl_decode_mb(h);
6618 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6621 if(ret>=0) ret = decode_mb_cabac(h);
6623 if(ret>=0) hl_decode_mb(h);
6626 eos = get_cabac_terminate( &h->cabac );
6628 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6629 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6630 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6634 if( ++s->mb_x >= s->mb_width ) {
6636 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6638 if(FIELD_OR_MBAFF_PICTURE) {
6643 if( eos || s->mb_y >= s->mb_height ) {
6644 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6645 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6652 int ret = decode_mb_cavlc(h);
6654 if(ret>=0) hl_decode_mb(h);
6656 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6658 ret = decode_mb_cavlc(h);
6660 if(ret>=0) hl_decode_mb(h);
6665 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6666 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6671 if(++s->mb_x >= s->mb_width){
6673 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6675 if(FIELD_OR_MBAFF_PICTURE) {
6678 if(s->mb_y >= s->mb_height){
6679 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6681 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6682 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6686 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6693 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6694 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6695 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6696 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6700 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6709 for(;s->mb_y < s->mb_height; s->mb_y++){
6710 for(;s->mb_x < s->mb_width; s->mb_x++){
6711 int ret= decode_mb(h);
6716 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6717 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6722 if(++s->mb_x >= s->mb_width){
6724 if(++s->mb_y >= s->mb_height){
6725 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6726 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6730 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6737 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6738 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6739 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6743 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6750 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6753 return -1; //not reached
6756 static int decode_picture_timing(H264Context *h){
6757 MpegEncContext * const s = &h->s;
6758 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6759 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6760 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6762 if(h->sps.pic_struct_present_flag){
6763 unsigned int i, num_clock_ts;
6764 h->sei_pic_struct = get_bits(&s->gb, 4);
6766 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6769 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6771 for (i = 0 ; i < num_clock_ts ; i++){
6772 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6773 unsigned int full_timestamp_flag;
6774 skip_bits(&s->gb, 2); /* ct_type */
6775 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6776 skip_bits(&s->gb, 5); /* counting_type */
6777 full_timestamp_flag = get_bits(&s->gb, 1);
6778 skip_bits(&s->gb, 1); /* discontinuity_flag */
6779 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6780 skip_bits(&s->gb, 8); /* n_frames */
6781 if(full_timestamp_flag){
6782 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6783 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6784 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6786 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6787 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6788 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6789 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6790 if(get_bits(&s->gb, 1)) /* hours_flag */
6791 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6795 if(h->sps.time_offset_length > 0)
6796 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6803 static int decode_unregistered_user_data(H264Context *h, int size){
6804 MpegEncContext * const s = &h->s;
6805 uint8_t user_data[16+256];
6811 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6812 user_data[i]= get_bits(&s->gb, 8);
6816 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6817 if(e==1 && build>=0)
6818 h->x264_build= build;
6820 if(s->avctx->debug & FF_DEBUG_BUGS)
6821 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6824 skip_bits(&s->gb, 8);
6829 static int decode_sei(H264Context *h){
6830 MpegEncContext * const s = &h->s;
6832 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6837 type+= show_bits(&s->gb, 8);
6838 }while(get_bits(&s->gb, 8) == 255);
6842 size+= show_bits(&s->gb, 8);
6843 }while(get_bits(&s->gb, 8) == 255);
6846 case 1: // Picture timing SEI
6847 if(decode_picture_timing(h) < 0)
6851 if(decode_unregistered_user_data(h, size) < 0)
6855 skip_bits(&s->gb, 8*size);
6858 //FIXME check bits here
6859 align_get_bits(&s->gb);
6865 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6866 MpegEncContext * const s = &h->s;
6868 cpb_count = get_ue_golomb(&s->gb) + 1;
6870 if(cpb_count > 32U){
6871 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6875 get_bits(&s->gb, 4); /* bit_rate_scale */
6876 get_bits(&s->gb, 4); /* cpb_size_scale */
6877 for(i=0; i<cpb_count; i++){
6878 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6879 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6880 get_bits1(&s->gb); /* cbr_flag */
6882 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6883 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6884 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6885 sps->time_offset_length = get_bits(&s->gb, 5);
6889 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6890 MpegEncContext * const s = &h->s;
6891 int aspect_ratio_info_present_flag;
6892 unsigned int aspect_ratio_idc;
6894 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6896 if( aspect_ratio_info_present_flag ) {
6897 aspect_ratio_idc= get_bits(&s->gb, 8);
6898 if( aspect_ratio_idc == EXTENDED_SAR ) {
6899 sps->sar.num= get_bits(&s->gb, 16);
6900 sps->sar.den= get_bits(&s->gb, 16);
6901 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6902 sps->sar= pixel_aspect[aspect_ratio_idc];
6904 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6911 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6913 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6914 get_bits1(&s->gb); /* overscan_appropriate_flag */
6917 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6918 get_bits(&s->gb, 3); /* video_format */
6919 get_bits1(&s->gb); /* video_full_range_flag */
6920 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6921 get_bits(&s->gb, 8); /* colour_primaries */
6922 get_bits(&s->gb, 8); /* transfer_characteristics */
6923 get_bits(&s->gb, 8); /* matrix_coefficients */
6927 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6928 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6929 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6932 sps->timing_info_present_flag = get_bits1(&s->gb);
6933 if(sps->timing_info_present_flag){
6934 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6935 sps->time_scale = get_bits_long(&s->gb, 32);
6936 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6939 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6940 if(sps->nal_hrd_parameters_present_flag)
6941 if(decode_hrd_parameters(h, sps) < 0)
6943 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6944 if(sps->vcl_hrd_parameters_present_flag)
6945 if(decode_hrd_parameters(h, sps) < 0)
6947 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6948 get_bits1(&s->gb); /* low_delay_hrd_flag */
6949 sps->pic_struct_present_flag = get_bits1(&s->gb);
6951 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6952 if(sps->bitstream_restriction_flag){
6953 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6954 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6955 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6956 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6957 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6958 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6959 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6961 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6962 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6970 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6971 const uint8_t *jvt_list, const uint8_t *fallback_list){
6972 MpegEncContext * const s = &h->s;
6973 int i, last = 8, next = 8;
6974 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6975 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6976 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6978 for(i=0;i<size;i++){
6980 next = (last + get_se_golomb(&s->gb)) & 0xff;
6981 if(!i && !next){ /* matrix not written, we use the preset one */
6982 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6985 last = factors[scan[i]] = next ? next : last;
6989 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6990 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6991 MpegEncContext * const s = &h->s;
6992 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6993 const uint8_t *fallback[4] = {
6994 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6995 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6996 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6997 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6999 if(get_bits1(&s->gb)){
7000 sps->scaling_matrix_present |= is_sps;
7001 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7002 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7003 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7004 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7005 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7006 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7007 if(is_sps || pps->transform_8x8_mode){
7008 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7009 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7014 static inline int decode_seq_parameter_set(H264Context *h){
7015 MpegEncContext * const s = &h->s;
7016 int profile_idc, level_idc;
7017 unsigned int sps_id;
7021 profile_idc= get_bits(&s->gb, 8);
7022 get_bits1(&s->gb); //constraint_set0_flag
7023 get_bits1(&s->gb); //constraint_set1_flag
7024 get_bits1(&s->gb); //constraint_set2_flag
7025 get_bits1(&s->gb); //constraint_set3_flag
7026 get_bits(&s->gb, 4); // reserved
7027 level_idc= get_bits(&s->gb, 8);
7028 sps_id= get_ue_golomb(&s->gb);
7030 if(sps_id >= MAX_SPS_COUNT) {
7031 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7034 sps= av_mallocz(sizeof(SPS));
7038 sps->profile_idc= profile_idc;
7039 sps->level_idc= level_idc;
7041 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7042 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7043 sps->scaling_matrix_present = 0;
7045 if(sps->profile_idc >= 100){ //high profile
7046 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7047 if(sps->chroma_format_idc == 3)
7048 get_bits1(&s->gb); //residual_color_transform_flag
7049 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7050 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7051 sps->transform_bypass = get_bits1(&s->gb);
7052 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7054 sps->chroma_format_idc= 1;
7057 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7058 sps->poc_type= get_ue_golomb(&s->gb);
7060 if(sps->poc_type == 0){ //FIXME #define
7061 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7062 } else if(sps->poc_type == 1){//FIXME #define
7063 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7064 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7065 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7066 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7068 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7069 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7073 for(i=0; i<sps->poc_cycle_length; i++)
7074 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7075 }else if(sps->poc_type != 2){
7076 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7080 sps->ref_frame_count= get_ue_golomb(&s->gb);
7081 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7082 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7085 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7086 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7087 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7088 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7089 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7090 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7094 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7095 if(!sps->frame_mbs_only_flag)
7096 sps->mb_aff= get_bits1(&s->gb);
7100 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7102 #ifndef ALLOW_INTERLACE
7104 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7106 sps->crop= get_bits1(&s->gb);
7108 sps->crop_left = get_ue_golomb(&s->gb);
7109 sps->crop_right = get_ue_golomb(&s->gb);
7110 sps->crop_top = get_ue_golomb(&s->gb);
7111 sps->crop_bottom= get_ue_golomb(&s->gb);
7112 if(sps->crop_left || sps->crop_top){
7113 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7115 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7116 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7122 sps->crop_bottom= 0;
7125 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7126 if( sps->vui_parameters_present_flag )
7127 decode_vui_parameters(h, sps);
7129 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7130 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7131 sps_id, sps->profile_idc, sps->level_idc,
7133 sps->ref_frame_count,
7134 sps->mb_width, sps->mb_height,
7135 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7136 sps->direct_8x8_inference_flag ? "8B8" : "",
7137 sps->crop_left, sps->crop_right,
7138 sps->crop_top, sps->crop_bottom,
7139 sps->vui_parameters_present_flag ? "VUI" : "",
7140 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7143 av_free(h->sps_buffers[sps_id]);
7144 h->sps_buffers[sps_id]= sps;
7152 build_qp_table(PPS *pps, int t, int index)
7155 for(i = 0; i < 52; i++)
7156 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7159 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7160 MpegEncContext * const s = &h->s;
7161 unsigned int pps_id= get_ue_golomb(&s->gb);
7164 if(pps_id >= MAX_PPS_COUNT) {
7165 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7169 pps= av_mallocz(sizeof(PPS));
7172 pps->sps_id= get_ue_golomb(&s->gb);
7173 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7174 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7178 pps->cabac= get_bits1(&s->gb);
7179 pps->pic_order_present= get_bits1(&s->gb);
7180 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7181 if(pps->slice_group_count > 1 ){
7182 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7183 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7184 switch(pps->mb_slice_group_map_type){
7187 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7188 | run_length[ i ] |1 |ue(v) |
7193 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7195 | top_left_mb[ i ] |1 |ue(v) |
7196 | bottom_right_mb[ i ] |1 |ue(v) |
7204 | slice_group_change_direction_flag |1 |u(1) |
7205 | slice_group_change_rate_minus1 |1 |ue(v) |
7210 | slice_group_id_cnt_minus1 |1 |ue(v) |
7211 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7213 | slice_group_id[ i ] |1 |u(v) |
7218 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7219 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7220 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7221 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7225 pps->weighted_pred= get_bits1(&s->gb);
7226 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7227 pps->init_qp= get_se_golomb(&s->gb) + 26;
7228 pps->init_qs= get_se_golomb(&s->gb) + 26;
7229 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7230 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7231 pps->constrained_intra_pred= get_bits1(&s->gb);
7232 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7234 pps->transform_8x8_mode= 0;
7235 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7236 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7237 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7239 if(get_bits_count(&s->gb) < bit_length){
7240 pps->transform_8x8_mode= get_bits1(&s->gb);
7241 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7242 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7244 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7247 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7248 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7249 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7250 h->pps.chroma_qp_diff= 1;
7252 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7253 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7254 pps_id, pps->sps_id,
7255 pps->cabac ? "CABAC" : "CAVLC",
7256 pps->slice_group_count,
7257 pps->ref_count[0], pps->ref_count[1],
7258 pps->weighted_pred ? "weighted" : "",
7259 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7260 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7261 pps->constrained_intra_pred ? "CONSTR" : "",
7262 pps->redundant_pic_cnt_present ? "REDU" : "",
7263 pps->transform_8x8_mode ? "8x8DCT" : ""
7267 av_free(h->pps_buffers[pps_id]);
7268 h->pps_buffers[pps_id]= pps;
7276 * Call decode_slice() for each context.
7278 * @param h h264 master context
7279 * @param context_count number of contexts to execute
7281 static void execute_decode_slices(H264Context *h, int context_count){
7282 MpegEncContext * const s = &h->s;
7283 AVCodecContext * const avctx= s->avctx;
7287 if(context_count == 1) {
7288 decode_slice(avctx, &h);
7290 for(i = 1; i < context_count; i++) {
7291 hx = h->thread_context[i];
7292 hx->s.error_recognition = avctx->error_recognition;
7293 hx->s.error_count = 0;
7296 avctx->execute(avctx, (void *)decode_slice,
7297 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7299 /* pull back stuff from slices to master context */
7300 hx = h->thread_context[context_count - 1];
7301 s->mb_x = hx->s.mb_x;
7302 s->mb_y = hx->s.mb_y;
7303 s->dropable = hx->s.dropable;
7304 s->picture_structure = hx->s.picture_structure;
7305 for(i = 1; i < context_count; i++)
7306 h->s.error_count += h->thread_context[i]->s.error_count;
7311 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7312 MpegEncContext * const s = &h->s;
7313 AVCodecContext * const avctx= s->avctx;
7315 H264Context *hx; ///< thread context
7316 int context_count = 0;
7318 h->max_contexts = avctx->thread_count;
7321 for(i=0; i<50; i++){
7322 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7325 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7326 h->current_slice = 0;
7327 if (!s->first_field)
7328 s->current_picture_ptr= NULL;
7340 if(buf_index >= buf_size) break;
7342 for(i = 0; i < h->nal_length_size; i++)
7343 nalsize = (nalsize << 8) | buf[buf_index++];
7344 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7349 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7354 // start code prefix search
7355 for(; buf_index + 3 < buf_size; buf_index++){
7356 // This should always succeed in the first iteration.
7357 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7361 if(buf_index+3 >= buf_size) break;
7366 hx = h->thread_context[context_count];
7368 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7369 if (ptr==NULL || dst_length < 0){
7372 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7374 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7376 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7377 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7380 if (h->is_avc && (nalsize != consumed)){
7381 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7385 buf_index += consumed;
7387 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7388 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7393 switch(hx->nal_unit_type){
7395 if (h->nal_unit_type != NAL_IDR_SLICE) {
7396 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7399 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7401 init_get_bits(&hx->s.gb, ptr, bit_length);
7403 hx->inter_gb_ptr= &hx->s.gb;
7404 hx->s.data_partitioning = 0;
7406 if((err = decode_slice_header(hx, h)))
7409 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7410 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7411 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7412 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7413 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7414 && avctx->skip_frame < AVDISCARD_ALL)
7418 init_get_bits(&hx->s.gb, ptr, bit_length);
7420 hx->inter_gb_ptr= NULL;
7421 hx->s.data_partitioning = 1;
7423 err = decode_slice_header(hx, h);
7426 init_get_bits(&hx->intra_gb, ptr, bit_length);
7427 hx->intra_gb_ptr= &hx->intra_gb;
7430 init_get_bits(&hx->inter_gb, ptr, bit_length);
7431 hx->inter_gb_ptr= &hx->inter_gb;
7433 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7434 && s->context_initialized
7436 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7437 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7438 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7439 && avctx->skip_frame < AVDISCARD_ALL)
7443 init_get_bits(&s->gb, ptr, bit_length);
7447 init_get_bits(&s->gb, ptr, bit_length);
7448 decode_seq_parameter_set(h);
7450 if(s->flags& CODEC_FLAG_LOW_DELAY)
7453 if(avctx->has_b_frames < 2)
7454 avctx->has_b_frames= !s->low_delay;
7457 init_get_bits(&s->gb, ptr, bit_length);
7459 decode_picture_parameter_set(h, bit_length);
7463 case NAL_END_SEQUENCE:
7464 case NAL_END_STREAM:
7465 case NAL_FILLER_DATA:
7467 case NAL_AUXILIARY_SLICE:
7470 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7473 if(context_count == h->max_contexts) {
7474 execute_decode_slices(h, context_count);
7479 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7481 /* Slice could not be decoded in parallel mode, copy down
7482 * NAL unit stuff to context 0 and restart. Note that
7483 * rbsp_buffer is not transferred, but since we no longer
7484 * run in parallel mode this should not be an issue. */
7485 h->nal_unit_type = hx->nal_unit_type;
7486 h->nal_ref_idc = hx->nal_ref_idc;
7492 execute_decode_slices(h, context_count);
7497 * returns the number of bytes consumed for building the current frame
7499 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7500 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7501 if(pos+10>buf_size) pos=buf_size; // oops ;)
7506 static int decode_frame(AVCodecContext *avctx,
7507 void *data, int *data_size,
7508 const uint8_t *buf, int buf_size)
7510 H264Context *h = avctx->priv_data;
7511 MpegEncContext *s = &h->s;
7512 AVFrame *pict = data;
7515 s->flags= avctx->flags;
7516 s->flags2= avctx->flags2;
7518 /* end of stream, output what is still in the buffers */
7519 if (buf_size == 0) {
7523 //FIXME factorize this with the output code below
7524 out = h->delayed_pic[0];
7526 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7527 if(h->delayed_pic[i]->poc < out->poc){
7528 out = h->delayed_pic[i];
7532 for(i=out_idx; h->delayed_pic[i]; i++)
7533 h->delayed_pic[i] = h->delayed_pic[i+1];
7536 *data_size = sizeof(AVFrame);
7537 *pict= *(AVFrame*)out;
7543 if(h->is_avc && !h->got_avcC) {
7544 int i, cnt, nalsize;
7545 unsigned char *p = avctx->extradata;
7546 if(avctx->extradata_size < 7) {
7547 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7551 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7554 /* sps and pps in the avcC always have length coded with 2 bytes,
7555 so put a fake nal_length_size = 2 while parsing them */
7556 h->nal_length_size = 2;
7557 // Decode sps from avcC
7558 cnt = *(p+5) & 0x1f; // Number of sps
7560 for (i = 0; i < cnt; i++) {
7561 nalsize = AV_RB16(p) + 2;
7562 if(decode_nal_units(h, p, nalsize) < 0) {
7563 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7568 // Decode pps from avcC
7569 cnt = *(p++); // Number of pps
7570 for (i = 0; i < cnt; i++) {
7571 nalsize = AV_RB16(p) + 2;
7572 if(decode_nal_units(h, p, nalsize) != nalsize) {
7573 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7578 // Now store right nal length size, that will be use to parse all other nals
7579 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7580 // Do not reparse avcC
7584 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7585 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7590 buf_index=decode_nal_units(h, buf, buf_size);
7594 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7595 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7596 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7600 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7601 Picture *out = s->current_picture_ptr;
7602 Picture *cur = s->current_picture_ptr;
7603 int i, pics, cross_idr, out_of_order, out_idx;
7607 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7608 s->current_picture_ptr->pict_type= s->pict_type;
7611 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7612 h->prev_poc_msb= h->poc_msb;
7613 h->prev_poc_lsb= h->poc_lsb;
7615 h->prev_frame_num_offset= h->frame_num_offset;
7616 h->prev_frame_num= h->frame_num;
7619 * FIXME: Error handling code does not seem to support interlaced
7620 * when slices span multiple rows
7621 * The ff_er_add_slice calls don't work right for bottom
7622 * fields; they cause massive erroneous error concealing
7623 * Error marking covers both fields (top and bottom).
7624 * This causes a mismatched s->error_count
7625 * and a bad error table. Further, the error count goes to
7626 * INT_MAX when called for bottom field, because mb_y is
7627 * past end by one (callers fault) and resync_mb_y != 0
7628 * causes problems for the first MB line, too.
7635 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7636 /* Wait for second field. */
7640 cur->repeat_pict = 0;
7642 /* Signal interlacing information externally. */
7643 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7644 if(h->sps.pic_struct_present_flag){
7645 switch (h->sei_pic_struct)
7647 case SEI_PIC_STRUCT_FRAME:
7648 cur->interlaced_frame = 0;
7650 case SEI_PIC_STRUCT_TOP_FIELD:
7651 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7652 case SEI_PIC_STRUCT_TOP_BOTTOM:
7653 case SEI_PIC_STRUCT_BOTTOM_TOP:
7654 cur->interlaced_frame = 1;
7656 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7657 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7658 // Signal the possibility of telecined film externally (pic_struct 5,6)
7659 // From these hints, let the applications decide if they apply deinterlacing.
7660 cur->repeat_pict = 1;
7661 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7663 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7664 // Force progressive here, as doubling interlaced frame is a bad idea.
7665 cur->interlaced_frame = 0;
7666 cur->repeat_pict = 2;
7668 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7669 cur->interlaced_frame = 0;
7670 cur->repeat_pict = 4;
7674 /* Derive interlacing flag from used decoding process. */
7675 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7678 if (cur->field_poc[0] != cur->field_poc[1]){
7679 /* Derive top_field_first from field pocs. */
7680 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7682 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7683 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7684 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7685 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7686 cur->top_field_first = 1;
7688 cur->top_field_first = 0;
7690 /* Most likely progressive */
7691 cur->top_field_first = 0;
7695 //FIXME do something with unavailable reference frames
7697 /* Sort B-frames into display order */
7699 if(h->sps.bitstream_restriction_flag
7700 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7701 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7705 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7706 && !h->sps.bitstream_restriction_flag){
7707 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7712 while(h->delayed_pic[pics]) pics++;
7714 assert(pics <= MAX_DELAYED_PIC_COUNT);
7716 h->delayed_pic[pics++] = cur;
7717 if(cur->reference == 0)
7718 cur->reference = DELAYED_PIC_REF;
7720 out = h->delayed_pic[0];
7722 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7723 if(h->delayed_pic[i]->poc < out->poc){
7724 out = h->delayed_pic[i];
7727 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7729 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7731 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7733 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7735 ((!cross_idr && out->poc > h->outputed_poc + 2)
7736 || cur->pict_type == FF_B_TYPE)))
7739 s->avctx->has_b_frames++;
7742 if(out_of_order || pics > s->avctx->has_b_frames){
7743 out->reference &= ~DELAYED_PIC_REF;
7744 for(i=out_idx; h->delayed_pic[i]; i++)
7745 h->delayed_pic[i] = h->delayed_pic[i+1];
7747 if(!out_of_order && pics > s->avctx->has_b_frames){
7748 *data_size = sizeof(AVFrame);
7750 h->outputed_poc = out->poc;
7751 *pict= *(AVFrame*)out;
7753 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7758 assert(pict->data[0] || !*data_size);
7759 ff_print_debug_info(s, pict);
7760 //printf("out %d\n", (int)pict->data[0]);
7763 /* Return the Picture timestamp as the frame number */
7764 /* we subtract 1 because it is added on utils.c */
7765 avctx->frame_number = s->picture_number - 1;
7767 return get_consumed_bytes(s, buf_index, buf_size);
7770 static inline void fill_mb_avail(H264Context *h){
7771 MpegEncContext * const s = &h->s;
7772 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7775 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7776 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7777 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7783 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7784 h->mb_avail[4]= 1; //FIXME move out
7785 h->mb_avail[5]= 0; //FIXME move out
7793 #define SIZE (COUNT*40)
7799 // int int_temp[10000];
7801 AVCodecContext avctx;
7803 dsputil_init(&dsp, &avctx);
7805 init_put_bits(&pb, temp, SIZE);
7806 printf("testing unsigned exp golomb\n");
7807 for(i=0; i<COUNT; i++){
7809 set_ue_golomb(&pb, i);
7810 STOP_TIMER("set_ue_golomb");
7812 flush_put_bits(&pb);
7814 init_get_bits(&gb, temp, 8*SIZE);
7815 for(i=0; i<COUNT; i++){
7818 s= show_bits(&gb, 24);
7821 j= get_ue_golomb(&gb);
7823 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7826 STOP_TIMER("get_ue_golomb");
7830 init_put_bits(&pb, temp, SIZE);
7831 printf("testing signed exp golomb\n");
7832 for(i=0; i<COUNT; i++){
7834 set_se_golomb(&pb, i - COUNT/2);
7835 STOP_TIMER("set_se_golomb");
7837 flush_put_bits(&pb);
7839 init_get_bits(&gb, temp, 8*SIZE);
7840 for(i=0; i<COUNT; i++){
7843 s= show_bits(&gb, 24);
7846 j= get_se_golomb(&gb);
7847 if(j != i - COUNT/2){
7848 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7851 STOP_TIMER("get_se_golomb");
7855 printf("testing 4x4 (I)DCT\n");
7858 uint8_t src[16], ref[16];
7859 uint64_t error= 0, max_error=0;
7861 for(i=0; i<COUNT; i++){
7863 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7864 for(j=0; j<16; j++){
7865 ref[j]= random()%255;
7866 src[j]= random()%255;
7869 h264_diff_dct_c(block, src, ref, 4);
7872 for(j=0; j<16; j++){
7873 // printf("%d ", block[j]);
7874 block[j]= block[j]*4;
7875 if(j&1) block[j]= (block[j]*4 + 2)/5;
7876 if(j&4) block[j]= (block[j]*4 + 2)/5;
7880 s->dsp.h264_idct_add(ref, block, 4);
7881 /* for(j=0; j<16; j++){
7882 printf("%d ", ref[j]);
7886 for(j=0; j<16; j++){
7887 int diff= FFABS(src[j] - ref[j]);
7890 max_error= FFMAX(max_error, diff);
7893 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7894 printf("testing quantizer\n");
7895 for(qp=0; qp<52; qp++){
7897 src1_block[i]= src2_block[i]= random()%255;
7900 printf("Testing NAL layer\n");
7902 uint8_t bitstream[COUNT];
7903 uint8_t nal[COUNT*2];
7905 memset(&h, 0, sizeof(H264Context));
7907 for(i=0; i<COUNT; i++){
7915 for(j=0; j<COUNT; j++){
7916 bitstream[j]= (random() % 255) + 1;
7919 for(j=0; j<zeros; j++){
7920 int pos= random() % COUNT;
7921 while(bitstream[pos] == 0){
7930 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7932 printf("encoding failed\n");
7936 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7940 if(out_length != COUNT){
7941 printf("incorrect length %d %d\n", out_length, COUNT);
7945 if(consumed != nal_length){
7946 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7950 if(memcmp(bitstream, out, COUNT)){
7951 printf("mismatch\n");
7957 printf("Testing RBSP\n");
7965 static av_cold int decode_end(AVCodecContext *avctx)
7967 H264Context *h = avctx->priv_data;
7968 MpegEncContext *s = &h->s;
7971 av_freep(&h->rbsp_buffer[0]);
7972 av_freep(&h->rbsp_buffer[1]);
7973 free_tables(h); //FIXME cleanup init stuff perhaps
7975 for(i = 0; i < MAX_SPS_COUNT; i++)
7976 av_freep(h->sps_buffers + i);
7978 for(i = 0; i < MAX_PPS_COUNT; i++)
7979 av_freep(h->pps_buffers + i);
7983 // memset(h, 0, sizeof(H264Context));
7989 AVCodec h264_decoder = {
7993 sizeof(H264Context),
7998 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8000 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),