2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
36 #include "vdpau_internal.h"
40 #include "x86/h264_i386.h"
47 * Value of Picture.reference when Picture is not a reference picture, but
48 * is held for delayed output.
50 #define DELAYED_PIC_REF 4
52 static VLC coeff_token_vlc[4];
53 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
54 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
56 static VLC chroma_dc_coeff_token_vlc;
57 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
58 static const int chroma_dc_coeff_token_vlc_table_size = 256;
60 static VLC total_zeros_vlc[15];
61 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
62 static const int total_zeros_vlc_tables_size = 512;
64 static VLC chroma_dc_total_zeros_vlc[3];
65 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
66 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
68 static VLC run_vlc[6];
69 static VLC_TYPE run_vlc_tables[6][8][2];
70 static const int run_vlc_tables_size = 8;
73 static VLC_TYPE run7_vlc_table[96][2];
74 static const int run7_vlc_table_size = 96;
76 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
77 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
78 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 static Picture * remove_long(H264Context *h, int i, int ref_mask);
82 static av_always_inline uint32_t pack16to32(int a, int b){
83 #ifdef WORDS_BIGENDIAN
84 return (b&0xFFFF) + (a<<16);
86 return (a&0xFFFF) + (b<<16);
90 static const uint8_t rem6[52]={
91 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
94 static const uint8_t div6[52]={
95 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
98 static const int left_block_options[4][8]={
105 #define LEVEL_TAB_BITS 8
106 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
108 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
109 MpegEncContext * const s = &h->s;
110 const int mb_xy= h->mb_xy;
111 int topleft_xy, top_xy, topright_xy, left_xy[2];
112 int topleft_type, top_type, topright_type, left_type[2];
113 const int * left_block;
114 int topleft_partition= -1;
117 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
119 //FIXME deblocking could skip the intra and nnz parts.
120 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
123 /* Wow, what a mess, why didn't they simplify the interlacing & intra
124 * stuff, I can't imagine that these complex rules are worth it. */
126 topleft_xy = top_xy - 1;
127 topright_xy= top_xy + 1;
128 left_xy[1] = left_xy[0] = mb_xy-1;
129 left_block = left_block_options[0];
131 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
132 const int top_pair_xy = pair_xy - s->mb_stride;
133 const int topleft_pair_xy = top_pair_xy - 1;
134 const int topright_pair_xy = top_pair_xy + 1;
135 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
136 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
137 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
138 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
139 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
140 const int bottom = (s->mb_y & 1);
141 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
143 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
144 top_xy -= s->mb_stride;
146 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
147 topleft_xy -= s->mb_stride;
148 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
149 topleft_xy += s->mb_stride;
150 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
151 topleft_partition = 0;
153 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
154 topright_xy -= s->mb_stride;
156 if (left_mb_field_flag != curr_mb_field_flag) {
157 left_xy[1] = left_xy[0] = pair_xy - 1;
158 if (curr_mb_field_flag) {
159 left_xy[1] += s->mb_stride;
160 left_block = left_block_options[3];
162 left_block= left_block_options[2 - bottom];
167 h->top_mb_xy = top_xy;
168 h->left_mb_xy[0] = left_xy[0];
169 h->left_mb_xy[1] = left_xy[1];
173 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
174 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
175 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
177 if(MB_MBAFF && !IS_INTRA(mb_type)){
179 for(list=0; list<h->list_count; list++){
180 //These values where changed for ease of performing MC, we need to change them back
181 //FIXME maybe we can make MC and loop filter use the same values or prevent
182 //the MC code from changing ref_cache and rather use a temporary array.
183 if(USES_LIST(mb_type,list)){
184 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
185 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
186 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
188 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
189 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
194 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
195 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
196 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
197 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
198 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
200 if(IS_INTRA(mb_type)){
201 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
202 h->topleft_samples_available=
203 h->top_samples_available=
204 h->left_samples_available= 0xFFFF;
205 h->topright_samples_available= 0xEEEA;
207 if(!(top_type & type_mask)){
208 h->topleft_samples_available= 0xB3FF;
209 h->top_samples_available= 0x33FF;
210 h->topright_samples_available= 0x26EA;
212 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
213 if(IS_INTERLACED(mb_type)){
214 if(!(left_type[0] & type_mask)){
215 h->topleft_samples_available&= 0xDFFF;
216 h->left_samples_available&= 0x5FFF;
218 if(!(left_type[1] & type_mask)){
219 h->topleft_samples_available&= 0xFF5F;
220 h->left_samples_available&= 0xFF5F;
223 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
224 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
225 assert(left_xy[0] == left_xy[1]);
226 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
227 h->topleft_samples_available&= 0xDF5F;
228 h->left_samples_available&= 0x5F5F;
232 if(!(left_type[0] & type_mask)){
233 h->topleft_samples_available&= 0xDF5F;
234 h->left_samples_available&= 0x5F5F;
238 if(!(topleft_type & type_mask))
239 h->topleft_samples_available&= 0x7FFF;
241 if(!(topright_type & type_mask))
242 h->topright_samples_available&= 0xFBFF;
244 if(IS_INTRA4x4(mb_type)){
245 if(IS_INTRA4x4(top_type)){
246 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
247 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
248 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
249 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 if(!(top_type & type_mask))
257 h->intra4x4_pred_mode_cache[4+8*0]=
258 h->intra4x4_pred_mode_cache[5+8*0]=
259 h->intra4x4_pred_mode_cache[6+8*0]=
260 h->intra4x4_pred_mode_cache[7+8*0]= pred;
263 if(IS_INTRA4x4(left_type[i])){
264 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
265 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 if(!(left_type[i] & type_mask))
273 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
274 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
290 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
292 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
293 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
294 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
295 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
297 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
298 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
300 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
301 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
304 h->non_zero_count_cache[4+8*0]=
305 h->non_zero_count_cache[5+8*0]=
306 h->non_zero_count_cache[6+8*0]=
307 h->non_zero_count_cache[7+8*0]=
309 h->non_zero_count_cache[1+8*0]=
310 h->non_zero_count_cache[2+8*0]=
312 h->non_zero_count_cache[1+8*3]=
313 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
317 for (i=0; i<2; i++) {
319 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
320 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
321 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
322 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
324 h->non_zero_count_cache[3+8*1 + 2*8*i]=
325 h->non_zero_count_cache[3+8*2 + 2*8*i]=
326 h->non_zero_count_cache[0+8*1 + 8*i]=
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
334 h->top_cbp = h->cbp_table[top_xy];
335 } else if(IS_INTRA(mb_type)) {
342 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
343 } else if(IS_INTRA(mb_type)) {
349 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
352 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
357 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
359 for(list=0; list<h->list_count; list++){
360 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
361 /*if(!h->mv_cache_clean[list]){
362 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
363 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
364 h->mv_cache_clean[list]= 1;
368 h->mv_cache_clean[list]= 0;
370 if(USES_LIST(top_type, list)){
371 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
372 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
377 h->ref_cache[list][scan8[0] + 0 - 1*8]=
378 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
379 h->ref_cache[list][scan8[0] + 2 - 1*8]=
380 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
386 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
390 int cache_idx = scan8[0] - 1 + i*2*8;
391 if(USES_LIST(left_type[i], list)){
392 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
393 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
394 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
395 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
396 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
397 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
399 *(uint32_t*)h->mv_cache [list][cache_idx ]=
400 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
401 h->ref_cache[list][cache_idx ]=
402 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
406 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
409 if(USES_LIST(topleft_type, list)){
410 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
411 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
412 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
413 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
415 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
416 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
419 if(USES_LIST(topright_type, list)){
420 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
421 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
422 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
423 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
425 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
426 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
429 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432 h->ref_cache[list][scan8[5 ]+1] =
433 h->ref_cache[list][scan8[7 ]+1] =
434 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
435 h->ref_cache[list][scan8[4 ]] =
436 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
437 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
438 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
440 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
441 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
444 /* XXX beurk, Load mvd */
445 if(USES_LIST(top_type, list)){
446 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
457 if(USES_LIST(left_type[0], list)){
458 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
465 if(USES_LIST(left_type[1], list)){
466 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
473 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
474 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
476 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
477 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
479 if(h->slice_type_nos == FF_B_TYPE){
480 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
482 if(IS_DIRECT(top_type)){
483 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
484 }else if(IS_8X8(top_type)){
485 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
486 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
487 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
489 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
492 if(IS_DIRECT(left_type[0]))
493 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
494 else if(IS_8X8(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
497 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
499 if(IS_DIRECT(left_type[1]))
500 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
501 else if(IS_8X8(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
504 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
510 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
511 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
516 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
519 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
521 #define MAP_F2F(idx, mb_type)\
522 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
523 h->ref_cache[list][idx] <<= 1;\
524 h->mv_cache[list][idx][1] /= 2;\
525 h->mvd_cache[list][idx][1] /= 2;\
530 #define MAP_F2F(idx, mb_type)\
531 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
532 h->ref_cache[list][idx] >>= 1;\
533 h->mv_cache[list][idx][1] <<= 1;\
534 h->mvd_cache[list][idx][1] <<= 1;\
544 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
547 static inline void write_back_intra_pred_mode(H264Context *h){
548 const int mb_xy= h->mb_xy;
550 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
551 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
552 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
553 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
554 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
555 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
556 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
560 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
562 static inline int check_intra4x4_pred_mode(H264Context *h){
563 MpegEncContext * const s = &h->s;
564 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
565 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 if(!(h->top_samples_available&0x8000)){
570 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
572 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
580 if((h->left_samples_available&0x8888)!=0x8888){
581 static const int mask[4]={0x8000,0x2000,0x80,0x20};
583 if(!(h->left_samples_available&mask[i])){
584 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
586 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
596 } //FIXME cleanup like next
599 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
601 static inline int check_intra_pred_mode(H264Context *h, int mode){
602 MpegEncContext * const s = &h->s;
603 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
604 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
607 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
611 if(!(h->top_samples_available&0x8000)){
614 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
619 if((h->left_samples_available&0x8080) != 0x8080){
621 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
622 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
625 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 * gets the predicted intra4x4 prediction mode.
636 static inline int pred_intra_mode(H264Context *h, int n){
637 const int index8= scan8[n];
638 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
639 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
640 const int min= FFMIN(left, top);
642 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
644 if(min<0) return DC_PRED;
648 static inline void write_back_non_zero_count(H264Context *h){
649 const int mb_xy= h->mb_xy;
651 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
652 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
653 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
654 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
655 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
656 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
657 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
659 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
660 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
661 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
663 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
664 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
665 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
669 * gets the predicted number of non-zero coefficients.
670 * @param n block index
672 static inline int pred_non_zero_count(H264Context *h, int n){
673 const int index8= scan8[n];
674 const int left= h->non_zero_count_cache[index8 - 1];
675 const int top = h->non_zero_count_cache[index8 - 8];
678 if(i<64) i= (i+1)>>1;
680 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
685 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
686 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
687 MpegEncContext *s = &h->s;
689 /* there is no consistent mapping of mvs to neighboring locations that will
690 * make mbaff happy, so we can't move all this logic to fill_caches */
692 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
694 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
695 *C = h->mv_cache[list][scan8[0]-2];
698 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
699 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
700 if(IS_INTERLACED(mb_types[topright_xy])){
701 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
702 const int x4 = X4, y4 = Y4;\
703 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
704 if(!USES_LIST(mb_type,list))\
705 return LIST_NOT_USED;\
706 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
707 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
708 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
709 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
711 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
714 if(topright_ref == PART_NOT_AVAILABLE
715 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
716 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
718 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
719 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
722 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
724 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
725 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
731 if(topright_ref != PART_NOT_AVAILABLE){
732 *C= h->mv_cache[list][ i - 8 + part_width ];
735 tprintf(s->avctx, "topright MV not available\n");
737 *C= h->mv_cache[list][ i - 8 - 1 ];
738 return h->ref_cache[list][ i - 8 - 1 ];
743 * gets the predicted MV.
744 * @param n the block index
745 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
746 * @param mx the x component of the predicted motion vector
747 * @param my the y component of the predicted motion vector
749 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
750 const int index8= scan8[n];
751 const int top_ref= h->ref_cache[list][ index8 - 8 ];
752 const int left_ref= h->ref_cache[list][ index8 - 1 ];
753 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
754 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
756 int diagonal_ref, match_count;
758 assert(part_width==1 || part_width==2 || part_width==4);
768 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
769 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
770 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
771 if(match_count > 1){ //most common
772 *mx= mid_pred(A[0], B[0], C[0]);
773 *my= mid_pred(A[1], B[1], C[1]);
774 }else if(match_count==1){
778 }else if(top_ref==ref){
786 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
790 *mx= mid_pred(A[0], B[0], C[0]);
791 *my= mid_pred(A[1], B[1], C[1]);
795 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
799 * gets the directionally predicted 16x8 MV.
800 * @param n the block index
801 * @param mx the x component of the predicted motion vector
802 * @param my the y component of the predicted motion vector
804 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
806 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
807 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
809 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
817 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
818 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
820 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 pred_motion(h, n, 4, list, ref, mx, my);
834 * gets the directionally predicted 8x16 MV.
835 * @param n the block index
836 * @param mx the x component of the predicted motion vector
837 * @param my the y component of the predicted motion vector
839 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
841 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
842 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
844 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
855 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
857 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
859 if(diagonal_ref == ref){
867 pred_motion(h, n, 2, list, ref, mx, my);
870 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
871 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
872 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
874 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
876 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
877 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
878 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
884 pred_motion(h, 0, 4, 0, 0, mx, my);
889 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
890 int poc0 = h->ref_list[0][i].poc;
891 int td = av_clip(poc1 - poc0, -128, 127);
892 if(td == 0 || h->ref_list[0][i].long_ref){
895 int tb = av_clip(poc - poc0, -128, 127);
896 int tx = (16384 + (FFABS(td) >> 1)) / td;
897 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
901 static inline void direct_dist_scale_factor(H264Context * const h){
902 MpegEncContext * const s = &h->s;
903 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
904 const int poc1 = h->ref_list[1][0].poc;
906 for(field=0; field<2; field++){
907 const int poc = h->s.current_picture_ptr->field_poc[field];
908 const int poc1 = h->ref_list[1][0].field_poc[field];
909 for(i=0; i < 2*h->ref_count[0]; i++)
910 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
913 for(i=0; i<h->ref_count[0]; i++){
914 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
918 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 int j, old_ref, rfield;
922 int start= mbafi ? 16 : 0;
923 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
924 int interl= mbafi || s->picture_structure != PICT_FRAME;
926 /* bogus; fills in for missing frames */
927 memset(map[list], 0, sizeof(map[list]));
929 for(rfield=0; rfield<2; rfield++){
930 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
931 int poc = ref1->ref_poc[colfield][list][old_ref];
935 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
936 poc= (poc&~3) + rfield + 1;
938 for(j=start; j<end; j++){
939 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
940 int cur_ref= mbafi ? (j-16)^field : j;
941 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
943 map[list][old_ref] = cur_ref;
951 static inline void direct_ref_list_init(H264Context * const h){
952 MpegEncContext * const s = &h->s;
953 Picture * const ref1 = &h->ref_list[1][0];
954 Picture * const cur = s->current_picture_ptr;
956 int sidx= (s->picture_structure&1)^1;
957 int ref1sidx= (ref1->reference&1)^1;
959 for(list=0; list<2; list++){
960 cur->ref_count[sidx][list] = h->ref_count[list];
961 for(j=0; j<h->ref_count[list]; j++)
962 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
965 if(s->picture_structure == PICT_FRAME){
966 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
967 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
970 cur->mbaff= FRAME_MBAFF;
972 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
975 for(list=0; list<2; list++){
976 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
977 for(field=0; field<2; field++)
978 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
982 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
983 MpegEncContext * const s = &h->s;
984 int b8_stride = h->b8_stride;
985 int b4_stride = h->b_stride;
986 int mb_xy = h->mb_xy;
988 const int16_t (*l1mv0)[2], (*l1mv1)[2];
989 const int8_t *l1ref0, *l1ref1;
990 const int is_b8x8 = IS_8X8(*mb_type);
991 unsigned int sub_mb_type;
994 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
996 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
997 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
998 int cur_poc = s->current_picture_ptr->poc;
999 int *col_poc = h->ref_list[1]->field_poc;
1000 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1001 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1003 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1004 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1005 mb_xy += s->mb_stride*fieldoff;
1008 }else{ // AFL/AFR/FR/FL -> AFR/FR
1009 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1010 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1011 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1012 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1015 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1016 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1017 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1019 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1020 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1022 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1023 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1025 }else{ // AFR/FR -> AFR/FR
1028 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1029 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1030 /* FIXME save sub mb types from previous frames (or derive from MVs)
1031 * so we know exactly what block size to use */
1032 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1033 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1034 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1035 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1036 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1038 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1039 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1044 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1045 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1046 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1047 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1050 l1ref0 += h->b8_stride;
1051 l1ref1 += h->b8_stride;
1052 l1mv0 += 2*b4_stride;
1053 l1mv1 += 2*b4_stride;
1057 if(h->direct_spatial_mv_pred){
1062 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1064 /* ref = min(neighbors) */
1065 for(list=0; list<2; list++){
1066 int refa = h->ref_cache[list][scan8[0] - 1];
1067 int refb = h->ref_cache[list][scan8[0] - 8];
1068 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1069 if(refc == PART_NOT_AVAILABLE)
1070 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1071 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1076 if(ref[0] < 0 && ref[1] < 0){
1077 ref[0] = ref[1] = 0;
1078 mv[0][0] = mv[0][1] =
1079 mv[1][0] = mv[1][1] = 0;
1081 for(list=0; list<2; list++){
1083 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1085 mv[list][0] = mv[list][1] = 0;
1091 *mb_type &= ~MB_TYPE_L1;
1092 sub_mb_type &= ~MB_TYPE_L1;
1093 }else if(ref[0] < 0){
1095 *mb_type &= ~MB_TYPE_L0;
1096 sub_mb_type &= ~MB_TYPE_L0;
1099 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1100 for(i8=0; i8<4; i8++){
1103 int xy8 = x8+y8*b8_stride;
1104 int xy4 = 3*x8+y8*b4_stride;
1107 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1109 h->sub_mb_type[i8] = sub_mb_type;
1111 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1112 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1113 if(!IS_INTRA(mb_type_col[y8])
1114 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1115 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1117 a= pack16to32(mv[0][0],mv[0][1]);
1119 b= pack16to32(mv[1][0],mv[1][1]);
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 b= pack16to32(mv[1][0],mv[1][1]);
1124 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1127 }else if(IS_16X16(*mb_type)){
1130 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1131 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1132 if(!IS_INTRA(mb_type_col[0])
1133 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1134 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1135 && (h->x264_build>33 || !h->x264_build)))){
1137 a= pack16to32(mv[0][0],mv[0][1]);
1139 b= pack16to32(mv[1][0],mv[1][1]);
1141 a= pack16to32(mv[0][0],mv[0][1]);
1142 b= pack16to32(mv[1][0],mv[1][1]);
1144 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1145 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1147 for(i8=0; i8<4; i8++){
1148 const int x8 = i8&1;
1149 const int y8 = i8>>1;
1151 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1153 h->sub_mb_type[i8] = sub_mb_type;
1155 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1156 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1157 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1158 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1161 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1162 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1163 && (h->x264_build>33 || !h->x264_build)))){
1164 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1165 if(IS_SUB_8X8(sub_mb_type)){
1166 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1167 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1169 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1171 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 for(i4=0; i4<4; i4++){
1175 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1176 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1178 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1180 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1186 }else{ /* direct temporal mv pred */
1187 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1188 const int *dist_scale_factor = h->dist_scale_factor;
1191 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1192 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1193 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1194 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1196 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1199 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1200 /* FIXME assumes direct_8x8_inference == 1 */
1201 int y_shift = 2*!IS_INTERLACED(*mb_type);
1203 for(i8=0; i8<4; i8++){
1204 const int x8 = i8&1;
1205 const int y8 = i8>>1;
1207 const int16_t (*l1mv)[2]= l1mv0;
1209 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1211 h->sub_mb_type[i8] = sub_mb_type;
1213 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1214 if(IS_INTRA(mb_type_col[y8])){
1215 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1216 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1217 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 ref0 = l1ref0[x8 + y8*b8_stride];
1223 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1225 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1228 scale = dist_scale_factor[ref0];
1229 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1232 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1233 int my_col = (mv_col[1]<<y_shift)/2;
1234 int mx = (scale * mv_col[0] + 128) >> 8;
1235 int my = (scale * my_col + 128) >> 8;
1236 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1237 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1243 /* one-to-one mv scaling */
1245 if(IS_16X16(*mb_type)){
1248 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1249 if(IS_INTRA(mb_type_col[0])){
1252 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1253 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1254 const int scale = dist_scale_factor[ref0];
1255 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1257 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1258 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1260 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1261 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1263 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1264 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1265 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1267 for(i8=0; i8<4; i8++){
1268 const int x8 = i8&1;
1269 const int y8 = i8>>1;
1271 const int16_t (*l1mv)[2]= l1mv0;
1273 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1275 h->sub_mb_type[i8] = sub_mb_type;
1276 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1277 if(IS_INTRA(mb_type_col[0])){
1278 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1279 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1280 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1286 ref0 = map_col_to_list0[0][ref0];
1288 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1291 scale = dist_scale_factor[ref0];
1293 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1294 if(IS_SUB_8X8(sub_mb_type)){
1295 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1296 int mx = (scale * mv_col[0] + 128) >> 8;
1297 int my = (scale * mv_col[1] + 128) >> 8;
1298 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1299 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1301 for(i4=0; i4<4; i4++){
1302 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1303 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1304 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1305 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1306 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1307 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1314 static inline void write_back_motion(H264Context *h, int mb_type){
1315 MpegEncContext * const s = &h->s;
1316 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1317 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1320 if(!USES_LIST(mb_type, 0))
1321 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1323 for(list=0; list<h->list_count; list++){
1325 if(!USES_LIST(mb_type, list))
1329 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1332 if( h->pps.cabac ) {
1333 if(IS_SKIP(mb_type))
1334 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1337 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1343 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1344 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1345 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1346 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1347 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1351 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1352 if(IS_8X8(mb_type)){
1353 uint8_t *direct_table = &h->direct_table[b8_xy];
1354 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1355 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1356 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1362 * Decodes a network abstraction layer unit.
1363 * @param consumed is the number of bytes used as input
1364 * @param length is the length of the array
1365 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1366 * @returns decoded bytes, might be src+1 if no escapes
1368 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1373 // src[0]&0x80; //forbidden bit
1374 h->nal_ref_idc= src[0]>>5;
1375 h->nal_unit_type= src[0]&0x1F;
1379 for(i=0; i<length; i++)
1380 printf("%2X ", src[i]);
1383 #ifdef HAVE_FAST_UNALIGNED
1384 # ifdef HAVE_FAST_64BIT
1386 for(i=0; i+1<length; i+=9){
1387 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1390 for(i=0; i+1<length; i+=5){
1391 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1394 if(i>0 && !src[i]) i--;
1398 for(i=0; i+1<length; i+=2){
1399 if(src[i]) continue;
1400 if(i>0 && src[i-1]==0) i--;
1402 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1404 /* startcode, so we must be past the end */
1412 if(i>=length-1){ //no escaped 0
1413 *dst_length= length;
1414 *consumed= length+1; //+1 for the header
1418 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1419 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1420 dst= h->rbsp_buffer[bufidx];
1426 //printf("decoding esc\n");
1427 memcpy(dst, src, i);
1430 //remove escapes (very rare 1:2^22)
1432 dst[di++]= src[si++];
1433 dst[di++]= src[si++];
1434 }else if(src[si]==0 && src[si+1]==0){
1435 if(src[si+2]==3){ //escape
1440 }else //next start code
1444 dst[di++]= src[si++];
1447 dst[di++]= src[si++];
1450 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1453 *consumed= si + 1;//+1 for the header
1454 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1459 * identifies the exact end of the bitstream
1460 * @return the length of the trailing, or 0 if damaged
1462 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1466 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1476 * IDCT transforms the 16 dc values and dequantizes them.
1477 * @param qp quantization parameter
1479 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1482 int temp[16]; //FIXME check if this is a good idea
1483 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1484 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1486 //memset(block, 64, 2*256);
1489 const int offset= y_offset[i];
1490 const int z0= block[offset+stride*0] + block[offset+stride*4];
1491 const int z1= block[offset+stride*0] - block[offset+stride*4];
1492 const int z2= block[offset+stride*1] - block[offset+stride*5];
1493 const int z3= block[offset+stride*1] + block[offset+stride*5];
1502 const int offset= x_offset[i];
1503 const int z0= temp[4*0+i] + temp[4*2+i];
1504 const int z1= temp[4*0+i] - temp[4*2+i];
1505 const int z2= temp[4*1+i] - temp[4*3+i];
1506 const int z3= temp[4*1+i] + temp[4*3+i];
1508 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1509 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1510 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1511 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1517 * DCT transforms the 16 dc values.
1518 * @param qp quantization parameter ??? FIXME
1520 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1521 // const int qmul= dequant_coeff[qp][0];
1523 int temp[16]; //FIXME check if this is a good idea
1524 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1525 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1528 const int offset= y_offset[i];
1529 const int z0= block[offset+stride*0] + block[offset+stride*4];
1530 const int z1= block[offset+stride*0] - block[offset+stride*4];
1531 const int z2= block[offset+stride*1] - block[offset+stride*5];
1532 const int z3= block[offset+stride*1] + block[offset+stride*5];
1541 const int offset= x_offset[i];
1542 const int z0= temp[4*0+i] + temp[4*2+i];
1543 const int z1= temp[4*0+i] - temp[4*2+i];
1544 const int z2= temp[4*1+i] - temp[4*3+i];
1545 const int z3= temp[4*1+i] + temp[4*3+i];
1547 block[stride*0 +offset]= (z0 + z3)>>1;
1548 block[stride*2 +offset]= (z1 + z2)>>1;
1549 block[stride*8 +offset]= (z1 - z2)>>1;
1550 block[stride*10+offset]= (z0 - z3)>>1;
1558 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1559 const int stride= 16*2;
1560 const int xStride= 16;
1563 a= block[stride*0 + xStride*0];
1564 b= block[stride*0 + xStride*1];
1565 c= block[stride*1 + xStride*0];
1566 d= block[stride*1 + xStride*1];
1573 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1574 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1575 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1576 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1580 static void chroma_dc_dct_c(DCTELEM *block){
1581 const int stride= 16*2;
1582 const int xStride= 16;
1585 a= block[stride*0 + xStride*0];
1586 b= block[stride*0 + xStride*1];
1587 c= block[stride*1 + xStride*0];
1588 d= block[stride*1 + xStride*1];
1595 block[stride*0 + xStride*0]= (a+c);
1596 block[stride*0 + xStride*1]= (e+b);
1597 block[stride*1 + xStride*0]= (a-c);
1598 block[stride*1 + xStride*1]= (e-b);
1603 * gets the chroma qp.
1605 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1606 return h->pps.chroma_qp_table[t][qscale];
1609 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1610 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1611 int src_x_offset, int src_y_offset,
1612 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1613 MpegEncContext * const s = &h->s;
1614 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1615 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1616 const int luma_xy= (mx&3) + ((my&3)<<2);
1617 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1618 uint8_t * src_cb, * src_cr;
1619 int extra_width= h->emu_edge_width;
1620 int extra_height= h->emu_edge_height;
1622 const int full_mx= mx>>2;
1623 const int full_my= my>>2;
1624 const int pic_width = 16*s->mb_width;
1625 const int pic_height = 16*s->mb_height >> MB_FIELD;
1627 if(mx&7) extra_width -= 3;
1628 if(my&7) extra_height -= 3;
1630 if( full_mx < 0-extra_width
1631 || full_my < 0-extra_height
1632 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1633 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1634 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1635 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1639 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1641 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1644 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1647 // chroma offset when predicting from a field of opposite parity
1648 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1649 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1651 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1652 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1655 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1656 src_cb= s->edge_emu_buffer;
1658 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1661 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1662 src_cr= s->edge_emu_buffer;
1664 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1667 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1668 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1669 int x_offset, int y_offset,
1670 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1671 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1672 int list0, int list1){
1673 MpegEncContext * const s = &h->s;
1674 qpel_mc_func *qpix_op= qpix_put;
1675 h264_chroma_mc_func chroma_op= chroma_put;
1677 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1678 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1679 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1680 x_offset += 8*s->mb_x;
1681 y_offset += 8*(s->mb_y >> MB_FIELD);
1684 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1685 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1686 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1687 qpix_op, chroma_op);
1690 chroma_op= chroma_avg;
1694 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1695 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1696 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1697 qpix_op, chroma_op);
1701 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1702 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1703 int x_offset, int y_offset,
1704 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1705 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1706 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1707 int list0, int list1){
1708 MpegEncContext * const s = &h->s;
1710 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1711 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1712 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1713 x_offset += 8*s->mb_x;
1714 y_offset += 8*(s->mb_y >> MB_FIELD);
1717 /* don't optimize for luma-only case, since B-frames usually
1718 * use implicit weights => chroma too. */
1719 uint8_t *tmp_cb = s->obmc_scratchpad;
1720 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1721 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1722 int refn0 = h->ref_cache[0][ scan8[n] ];
1723 int refn1 = h->ref_cache[1][ scan8[n] ];
1725 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1726 dest_y, dest_cb, dest_cr,
1727 x_offset, y_offset, qpix_put, chroma_put);
1728 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1729 tmp_y, tmp_cb, tmp_cr,
1730 x_offset, y_offset, qpix_put, chroma_put);
1732 if(h->use_weight == 2){
1733 int weight0 = h->implicit_weight[refn0][refn1];
1734 int weight1 = 64 - weight0;
1735 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1736 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1737 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1739 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1740 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1741 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1742 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1743 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1744 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1745 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1746 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1747 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1750 int list = list1 ? 1 : 0;
1751 int refn = h->ref_cache[list][ scan8[n] ];
1752 Picture *ref= &h->ref_list[list][refn];
1753 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1754 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1755 qpix_put, chroma_put);
1757 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1758 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1759 if(h->use_weight_chroma){
1760 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1761 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1762 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1763 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1768 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1769 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1770 int x_offset, int y_offset,
1771 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1772 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1773 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1774 int list0, int list1){
1775 if((h->use_weight==2 && list0 && list1
1776 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1777 || h->use_weight==1)
1778 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1779 x_offset, y_offset, qpix_put, chroma_put,
1780 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1782 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1783 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1786 static inline void prefetch_motion(H264Context *h, int list){
1787 /* fetch pixels for estimated mv 4 macroblocks ahead
1788 * optimized for 64byte cache lines */
1789 MpegEncContext * const s = &h->s;
1790 const int refn = h->ref_cache[list][scan8[0]];
1792 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1793 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1794 uint8_t **src= h->ref_list[list][refn].data;
1795 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1796 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1797 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1798 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1802 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1803 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1804 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1805 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1806 MpegEncContext * const s = &h->s;
1807 const int mb_xy= h->mb_xy;
1808 const int mb_type= s->current_picture.mb_type[mb_xy];
1810 assert(IS_INTER(mb_type));
1812 prefetch_motion(h, 0);
1814 if(IS_16X16(mb_type)){
1815 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1816 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1817 &weight_op[0], &weight_avg[0],
1818 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1819 }else if(IS_16X8(mb_type)){
1820 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1821 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1822 &weight_op[1], &weight_avg[1],
1823 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1824 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1825 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1826 &weight_op[1], &weight_avg[1],
1827 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1828 }else if(IS_8X16(mb_type)){
1829 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1830 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1831 &weight_op[2], &weight_avg[2],
1832 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1833 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1834 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1835 &weight_op[2], &weight_avg[2],
1836 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1840 assert(IS_8X8(mb_type));
1843 const int sub_mb_type= h->sub_mb_type[i];
1845 int x_offset= (i&1)<<2;
1846 int y_offset= (i&2)<<1;
1848 if(IS_SUB_8X8(sub_mb_type)){
1849 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1850 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1851 &weight_op[3], &weight_avg[3],
1852 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853 }else if(IS_SUB_8X4(sub_mb_type)){
1854 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1855 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1856 &weight_op[4], &weight_avg[4],
1857 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1858 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1859 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1860 &weight_op[4], &weight_avg[4],
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1862 }else if(IS_SUB_4X8(sub_mb_type)){
1863 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1864 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1865 &weight_op[5], &weight_avg[5],
1866 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1867 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1868 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1869 &weight_op[5], &weight_avg[5],
1870 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1873 assert(IS_SUB_4X4(sub_mb_type));
1875 int sub_x_offset= x_offset + 2*(j&1);
1876 int sub_y_offset= y_offset + (j&2);
1877 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1878 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1879 &weight_op[6], &weight_avg[6],
1880 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1886 prefetch_motion(h, 1);
1889 static av_cold void init_cavlc_level_tab(void){
1890 int suffix_length, mask;
1893 for(suffix_length=0; suffix_length<7; suffix_length++){
1894 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1895 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1896 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1898 mask= -(level_code&1);
1899 level_code= (((2+level_code)>>1) ^ mask) - mask;
1900 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1901 cavlc_level_tab[suffix_length][i][0]= level_code;
1902 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1903 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1904 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1905 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1907 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1908 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1914 static av_cold void decode_init_vlc(void){
1915 static int done = 0;
1922 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1923 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1924 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1925 &chroma_dc_coeff_token_len [0], 1, 1,
1926 &chroma_dc_coeff_token_bits[0], 1, 1,
1927 INIT_VLC_USE_NEW_STATIC);
1931 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1932 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1933 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1934 &coeff_token_len [i][0], 1, 1,
1935 &coeff_token_bits[i][0], 1, 1,
1936 INIT_VLC_USE_NEW_STATIC);
1937 offset += coeff_token_vlc_tables_size[i];
1940 * This is a one time safety check to make sure that
1941 * the packed static coeff_token_vlc table sizes
1942 * were initialized correctly.
1944 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1947 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1948 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1949 init_vlc(&chroma_dc_total_zeros_vlc[i],
1950 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1951 &chroma_dc_total_zeros_len [i][0], 1, 1,
1952 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1953 INIT_VLC_USE_NEW_STATIC);
1955 for(i=0; i<15; i++){
1956 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1957 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1958 init_vlc(&total_zeros_vlc[i],
1959 TOTAL_ZEROS_VLC_BITS, 16,
1960 &total_zeros_len [i][0], 1, 1,
1961 &total_zeros_bits[i][0], 1, 1,
1962 INIT_VLC_USE_NEW_STATIC);
1966 run_vlc[i].table = run_vlc_tables[i];
1967 run_vlc[i].table_allocated = run_vlc_tables_size;
1968 init_vlc(&run_vlc[i],
1970 &run_len [i][0], 1, 1,
1971 &run_bits[i][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 run7_vlc.table = run7_vlc_table,
1975 run7_vlc.table_allocated = run7_vlc_table_size;
1976 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1977 &run_len [6][0], 1, 1,
1978 &run_bits[6][0], 1, 1,
1979 INIT_VLC_USE_NEW_STATIC);
1981 init_cavlc_level_tab();
1985 static void free_tables(H264Context *h){
1988 av_freep(&h->intra4x4_pred_mode);
1989 av_freep(&h->chroma_pred_mode_table);
1990 av_freep(&h->cbp_table);
1991 av_freep(&h->mvd_table[0]);
1992 av_freep(&h->mvd_table[1]);
1993 av_freep(&h->direct_table);
1994 av_freep(&h->non_zero_count);
1995 av_freep(&h->slice_table_base);
1996 h->slice_table= NULL;
1998 av_freep(&h->mb2b_xy);
1999 av_freep(&h->mb2b8_xy);
2001 for(i = 0; i < h->s.avctx->thread_count; i++) {
2002 hx = h->thread_context[i];
2004 av_freep(&hx->top_borders[1]);
2005 av_freep(&hx->top_borders[0]);
2006 av_freep(&hx->s.obmc_scratchpad);
2010 static void init_dequant8_coeff_table(H264Context *h){
2012 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2013 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2014 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2016 for(i=0; i<2; i++ ){
2017 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2018 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2022 for(q=0; q<52; q++){
2023 int shift = div6[q];
2026 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2027 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2028 h->pps.scaling_matrix8[i][x]) << shift;
2033 static void init_dequant4_coeff_table(H264Context *h){
2035 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2036 for(i=0; i<6; i++ ){
2037 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2039 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2040 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2047 for(q=0; q<52; q++){
2048 int shift = div6[q] + 2;
2051 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2052 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2053 h->pps.scaling_matrix4[i][x]) << shift;
2058 static void init_dequant_tables(H264Context *h){
2060 init_dequant4_coeff_table(h);
2061 if(h->pps.transform_8x8_mode)
2062 init_dequant8_coeff_table(h);
2063 if(h->sps.transform_bypass){
2066 h->dequant4_coeff[i][0][x] = 1<<6;
2067 if(h->pps.transform_8x8_mode)
2070 h->dequant8_coeff[i][0][x] = 1<<6;
2077 * needs width/height
2079 static int alloc_tables(H264Context *h){
2080 MpegEncContext * const s = &h->s;
2081 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2084 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2086 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2087 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2088 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2090 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2091 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2092 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2093 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2095 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2096 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2098 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2099 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2100 for(y=0; y<s->mb_height; y++){
2101 for(x=0; x<s->mb_width; x++){
2102 const int mb_xy= x + y*s->mb_stride;
2103 const int b_xy = 4*x + 4*y*h->b_stride;
2104 const int b8_xy= 2*x + 2*y*h->b8_stride;
2106 h->mb2b_xy [mb_xy]= b_xy;
2107 h->mb2b8_xy[mb_xy]= b8_xy;
2111 s->obmc_scratchpad = NULL;
2113 if(!h->dequant4_coeff[0])
2114 init_dequant_tables(h);
2123 * Mimic alloc_tables(), but for every context thread.
2125 static void clone_tables(H264Context *dst, H264Context *src){
2126 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2127 dst->non_zero_count = src->non_zero_count;
2128 dst->slice_table = src->slice_table;
2129 dst->cbp_table = src->cbp_table;
2130 dst->mb2b_xy = src->mb2b_xy;
2131 dst->mb2b8_xy = src->mb2b8_xy;
2132 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2133 dst->mvd_table[0] = src->mvd_table[0];
2134 dst->mvd_table[1] = src->mvd_table[1];
2135 dst->direct_table = src->direct_table;
2137 dst->s.obmc_scratchpad = NULL;
2138 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2143 * Allocate buffers which are not shared amongst multiple threads.
2145 static int context_init(H264Context *h){
2146 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2147 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2151 return -1; // free_tables will clean up for us
2154 static av_cold void common_init(H264Context *h){
2155 MpegEncContext * const s = &h->s;
2157 s->width = s->avctx->width;
2158 s->height = s->avctx->height;
2159 s->codec_id= s->avctx->codec->id;
2161 ff_h264_pred_init(&h->hpc, s->codec_id);
2163 h->dequant_coeff_pps= -1;
2164 s->unrestricted_mv=1;
2165 s->decode=1; //FIXME
2167 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2169 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2170 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2173 static av_cold int decode_init(AVCodecContext *avctx){
2174 H264Context *h= avctx->priv_data;
2175 MpegEncContext * const s = &h->s;
2177 MPV_decode_defaults(s);
2182 s->out_format = FMT_H264;
2183 s->workaround_bugs= avctx->workaround_bugs;
2186 // s->decode_mb= ff_h263_decode_mb;
2187 s->quarter_sample = 1;
2190 if(avctx->codec_id == CODEC_ID_SVQ3)
2191 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2192 else if(avctx->codec_id == CODEC_ID_H264_VDPAU)
2193 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2195 avctx->pix_fmt= PIX_FMT_YUV420P;
2199 if(avctx->extradata_size > 0 && avctx->extradata &&
2200 *(char *)avctx->extradata == 1){
2207 h->thread_context[0] = h;
2208 h->outputed_poc = INT_MIN;
2209 h->prev_poc_msb= 1<<16;
2213 static int frame_start(H264Context *h){
2214 MpegEncContext * const s = &h->s;
2217 if(MPV_frame_start(s, s->avctx) < 0)
2219 ff_er_frame_start(s);
2221 * MPV_frame_start uses pict_type to derive key_frame.
2222 * This is incorrect for H.264; IDR markings must be used.
2223 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2224 * See decode_nal_units().
2226 s->current_picture_ptr->key_frame= 0;
2228 assert(s->linesize && s->uvlinesize);
2230 for(i=0; i<16; i++){
2231 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2232 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2235 h->block_offset[16+i]=
2236 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2237 h->block_offset[24+16+i]=
2238 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2241 /* can't be in alloc_tables because linesize isn't known there.
2242 * FIXME: redo bipred weight to not require extra buffer? */
2243 for(i = 0; i < s->avctx->thread_count; i++)
2244 if(!h->thread_context[i]->s.obmc_scratchpad)
2245 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2247 /* some macroblocks will be accessed before they're available */
2248 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2249 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2251 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2253 // We mark the current picture as non-reference after allocating it, so
2254 // that if we break out due to an error it can be released automatically
2255 // in the next MPV_frame_start().
2256 // SVQ3 as well as most other codecs have only last/next/current and thus
2257 // get released even with set reference, besides SVQ3 and others do not
2258 // mark frames as reference later "naturally".
2259 if(s->codec_id != CODEC_ID_SVQ3)
2260 s->current_picture_ptr->reference= 0;
2262 s->current_picture_ptr->field_poc[0]=
2263 s->current_picture_ptr->field_poc[1]= INT_MAX;
2264 assert(s->current_picture_ptr->long_ref==0);
2269 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2270 MpegEncContext * const s = &h->s;
2279 src_cb -= uvlinesize;
2280 src_cr -= uvlinesize;
2282 if(!simple && FRAME_MBAFF){
2284 offset = MB_MBAFF ? 1 : 17;
2285 uvoffset= MB_MBAFF ? 1 : 9;
2287 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2288 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2289 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2290 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2296 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2297 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2298 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2299 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2305 top_idx = MB_MBAFF ? 0 : 1;
2307 step= MB_MBAFF ? 2 : 1;
2310 // There are two lines saved, the line above the the top macroblock of a pair,
2311 // and the line above the bottom macroblock
2312 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2313 for(i=1; i<17 - skiplast; i++){
2314 h->left_border[offset+i*step]= src_y[15+i* linesize];
2317 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2318 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2320 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2321 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2322 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2323 for(i=1; i<9 - skiplast; i++){
2324 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2325 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2327 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2328 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2332 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2333 MpegEncContext * const s = &h->s;
2344 if(!simple && FRAME_MBAFF){
2346 offset = MB_MBAFF ? 1 : 17;
2347 uvoffset= MB_MBAFF ? 1 : 9;
2351 top_idx = MB_MBAFF ? 0 : 1;
2353 step= MB_MBAFF ? 2 : 1;
2356 if(h->deblocking_filter == 2) {
2358 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2359 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2361 deblock_left = (s->mb_x > 0);
2362 deblock_top = (s->mb_y > !!MB_FIELD);
2365 src_y -= linesize + 1;
2366 src_cb -= uvlinesize + 1;
2367 src_cr -= uvlinesize + 1;
2369 #define XCHG(a,b,t,xchg)\
2376 for(i = !deblock_top; i<16; i++){
2377 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2379 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2383 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2384 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2385 if(s->mb_x+1 < s->mb_width){
2386 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2390 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2392 for(i = !deblock_top; i<8; i++){
2393 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2394 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2396 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2397 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2400 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2401 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2406 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2407 MpegEncContext * const s = &h->s;
2408 const int mb_x= s->mb_x;
2409 const int mb_y= s->mb_y;
2410 const int mb_xy= h->mb_xy;
2411 const int mb_type= s->current_picture.mb_type[mb_xy];
2412 uint8_t *dest_y, *dest_cb, *dest_cr;
2413 int linesize, uvlinesize /*dct_offset*/;
2415 int *block_offset = &h->block_offset[0];
2416 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2417 /* is_h264 should always be true if SVQ3 is disabled. */
2418 const int is_h264 = !ENABLE_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2419 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2420 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2422 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2423 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2424 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2426 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2427 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2429 if (!simple && MB_FIELD) {
2430 linesize = h->mb_linesize = s->linesize * 2;
2431 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2432 block_offset = &h->block_offset[24];
2433 if(mb_y&1){ //FIXME move out of this function?
2434 dest_y -= s->linesize*15;
2435 dest_cb-= s->uvlinesize*7;
2436 dest_cr-= s->uvlinesize*7;
2440 for(list=0; list<h->list_count; list++){
2441 if(!USES_LIST(mb_type, list))
2443 if(IS_16X16(mb_type)){
2444 int8_t *ref = &h->ref_cache[list][scan8[0]];
2445 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2447 for(i=0; i<16; i+=4){
2448 int ref = h->ref_cache[list][scan8[i]];
2450 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2456 linesize = h->mb_linesize = s->linesize;
2457 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2458 // dct_offset = s->linesize * 16;
2461 if (!simple && IS_INTRA_PCM(mb_type)) {
2462 for (i=0; i<16; i++) {
2463 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2465 for (i=0; i<8; i++) {
2466 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2467 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2470 if(IS_INTRA(mb_type)){
2471 if(h->deblocking_filter)
2472 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2474 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2475 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2476 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2479 if(IS_INTRA4x4(mb_type)){
2480 if(simple || !s->encoding){
2481 if(IS_8x8DCT(mb_type)){
2482 if(transform_bypass){
2484 idct_add = s->dsp.add_pixels8;
2486 idct_dc_add = s->dsp.h264_idct8_dc_add;
2487 idct_add = s->dsp.h264_idct8_add;
2489 for(i=0; i<16; i+=4){
2490 uint8_t * const ptr= dest_y + block_offset[i];
2491 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2492 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2493 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2495 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2496 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2497 (h->topright_samples_available<<i)&0x4000, linesize);
2499 if(nnz == 1 && h->mb[i*16])
2500 idct_dc_add(ptr, h->mb + i*16, linesize);
2502 idct_add (ptr, h->mb + i*16, linesize);
2507 if(transform_bypass){
2509 idct_add = s->dsp.add_pixels4;
2511 idct_dc_add = s->dsp.h264_idct_dc_add;
2512 idct_add = s->dsp.h264_idct_add;
2514 for(i=0; i<16; i++){
2515 uint8_t * const ptr= dest_y + block_offset[i];
2516 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2518 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2519 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2523 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2524 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2525 assert(mb_y || linesize <= block_offset[i]);
2526 if(!topright_avail){
2527 tr= ptr[3 - linesize]*0x01010101;
2528 topright= (uint8_t*) &tr;
2530 topright= ptr + 4 - linesize;
2534 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2535 nnz = h->non_zero_count_cache[ scan8[i] ];
2538 if(nnz == 1 && h->mb[i*16])
2539 idct_dc_add(ptr, h->mb + i*16, linesize);
2541 idct_add (ptr, h->mb + i*16, linesize);
2543 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2550 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2552 if(!transform_bypass)
2553 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2555 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2557 if(h->deblocking_filter)
2558 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2560 hl_motion(h, dest_y, dest_cb, dest_cr,
2561 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2562 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2563 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2567 if(!IS_INTRA4x4(mb_type)){
2569 if(IS_INTRA16x16(mb_type)){
2570 if(transform_bypass){
2571 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2572 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2574 for(i=0; i<16; i++){
2575 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2576 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2580 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2582 }else if(h->cbp&15){
2583 if(transform_bypass){
2584 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2585 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2586 for(i=0; i<16; i+=di){
2587 if(h->non_zero_count_cache[ scan8[i] ]){
2588 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2592 if(IS_8x8DCT(mb_type)){
2593 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2595 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2600 for(i=0; i<16; i++){
2601 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2602 uint8_t * const ptr= dest_y + block_offset[i];
2603 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2609 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2610 uint8_t *dest[2] = {dest_cb, dest_cr};
2611 if(transform_bypass){
2612 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2613 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2614 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2616 idct_add = s->dsp.add_pixels4;
2617 for(i=16; i<16+8; i++){
2618 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2619 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2623 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2624 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2626 idct_add = s->dsp.h264_idct_add;
2627 idct_dc_add = s->dsp.h264_idct_dc_add;
2628 for(i=16; i<16+8; i++){
2629 if(h->non_zero_count_cache[ scan8[i] ])
2630 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2631 else if(h->mb[i*16])
2632 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2635 for(i=16; i<16+8; i++){
2636 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2637 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2638 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2645 if(h->cbp || IS_INTRA(mb_type))
2646 s->dsp.clear_blocks(h->mb);
2648 if(h->deblocking_filter) {
2649 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2650 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2651 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2652 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2653 if (!simple && FRAME_MBAFF) {
2654 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2656 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2662 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2664 static void hl_decode_mb_simple(H264Context *h){
2665 hl_decode_mb_internal(h, 1);
2669 * Process a macroblock; this handles edge cases, such as interlacing.
2671 static void av_noinline hl_decode_mb_complex(H264Context *h){
2672 hl_decode_mb_internal(h, 0);
2675 static void hl_decode_mb(H264Context *h){
2676 MpegEncContext * const s = &h->s;
2677 const int mb_xy= h->mb_xy;
2678 const int mb_type= s->current_picture.mb_type[mb_xy];
2679 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2681 if(ENABLE_H264_ENCODER && !s->decode)
2685 hl_decode_mb_complex(h);
2686 else hl_decode_mb_simple(h);
2689 static void pic_as_field(Picture *pic, const int parity){
2691 for (i = 0; i < 4; ++i) {
2692 if (parity == PICT_BOTTOM_FIELD)
2693 pic->data[i] += pic->linesize[i];
2694 pic->reference = parity;
2695 pic->linesize[i] *= 2;
2697 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2700 static int split_field_copy(Picture *dest, Picture *src,
2701 int parity, int id_add){
2702 int match = !!(src->reference & parity);
2706 if(parity != PICT_FRAME){
2707 pic_as_field(dest, parity);
2709 dest->pic_id += id_add;
2716 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2720 while(i[0]<len || i[1]<len){
2721 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2723 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2726 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2727 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2730 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2731 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2738 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2743 best_poc= dir ? INT_MIN : INT_MAX;
2745 for(i=0; i<len; i++){
2746 const int poc= src[i]->poc;
2747 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2749 sorted[out_i]= src[i];
2752 if(best_poc == (dir ? INT_MIN : INT_MAX))
2754 limit= sorted[out_i++]->poc - dir;
2760 * fills the default_ref_list.
2762 static int fill_default_ref_list(H264Context *h){
2763 MpegEncContext * const s = &h->s;
2766 if(h->slice_type_nos==FF_B_TYPE){
2767 Picture *sorted[32];
2772 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2774 cur_poc= s->current_picture_ptr->poc;
2776 for(list= 0; list<2; list++){
2777 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2778 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2780 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2781 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2784 if(len < h->ref_count[list])
2785 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2789 if(lens[0] == lens[1] && lens[1] > 1){
2790 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2792 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2795 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2796 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2798 if(len < h->ref_count[0])
2799 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2802 for (i=0; i<h->ref_count[0]; i++) {
2803 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2805 if(h->slice_type_nos==FF_B_TYPE){
2806 for (i=0; i<h->ref_count[1]; i++) {
2807 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2814 static void print_short_term(H264Context *h);
2815 static void print_long_term(H264Context *h);
2818 * Extract structure information about the picture described by pic_num in
2819 * the current decoding context (frame or field). Note that pic_num is
2820 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2821 * @param pic_num picture number for which to extract structure information
2822 * @param structure one of PICT_XXX describing structure of picture
2824 * @return frame number (short term) or long term index of picture
2825 * described by pic_num
2827 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2828 MpegEncContext * const s = &h->s;
2830 *structure = s->picture_structure;
2833 /* opposite field */
2834 *structure ^= PICT_FRAME;
2841 static int decode_ref_pic_list_reordering(H264Context *h){
2842 MpegEncContext * const s = &h->s;
2843 int list, index, pic_structure;
2845 print_short_term(h);
2848 for(list=0; list<h->list_count; list++){
2849 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2851 if(get_bits1(&s->gb)){
2852 int pred= h->curr_pic_num;
2854 for(index=0; ; index++){
2855 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2856 unsigned int pic_id;
2858 Picture *ref = NULL;
2860 if(reordering_of_pic_nums_idc==3)
2863 if(index >= h->ref_count[list]){
2864 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2868 if(reordering_of_pic_nums_idc<3){
2869 if(reordering_of_pic_nums_idc<2){
2870 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2873 if(abs_diff_pic_num > h->max_pic_num){
2874 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2878 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2879 else pred+= abs_diff_pic_num;
2880 pred &= h->max_pic_num - 1;
2882 frame_num = pic_num_extract(h, pred, &pic_structure);
2884 for(i= h->short_ref_count-1; i>=0; i--){
2885 ref = h->short_ref[i];
2886 assert(ref->reference);
2887 assert(!ref->long_ref);
2889 ref->frame_num == frame_num &&
2890 (ref->reference & pic_structure)
2898 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2900 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2903 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2906 ref = h->long_ref[long_idx];
2907 assert(!(ref && !ref->reference));
2908 if(ref && (ref->reference & pic_structure)){
2909 ref->pic_id= pic_id;
2910 assert(ref->long_ref);
2918 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2919 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2921 for(i=index; i+1<h->ref_count[list]; i++){
2922 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2925 for(; i > index; i--){
2926 h->ref_list[list][i]= h->ref_list[list][i-1];
2928 h->ref_list[list][index]= *ref;
2930 pic_as_field(&h->ref_list[list][index], pic_structure);
2934 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2940 for(list=0; list<h->list_count; list++){
2941 for(index= 0; index < h->ref_count[list]; index++){
2942 if(!h->ref_list[list][index].data[0]){
2943 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2944 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2952 static void fill_mbaff_ref_list(H264Context *h){
2954 for(list=0; list<2; list++){ //FIXME try list_count
2955 for(i=0; i<h->ref_count[list]; i++){
2956 Picture *frame = &h->ref_list[list][i];
2957 Picture *field = &h->ref_list[list][16+2*i];
2960 field[0].linesize[j] <<= 1;
2961 field[0].reference = PICT_TOP_FIELD;
2962 field[0].poc= field[0].field_poc[0];
2963 field[1] = field[0];
2965 field[1].data[j] += frame->linesize[j];
2966 field[1].reference = PICT_BOTTOM_FIELD;
2967 field[1].poc= field[1].field_poc[1];
2969 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2970 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2972 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2973 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2977 for(j=0; j<h->ref_count[1]; j++){
2978 for(i=0; i<h->ref_count[0]; i++)
2979 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2980 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2981 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2985 static int pred_weight_table(H264Context *h){
2986 MpegEncContext * const s = &h->s;
2988 int luma_def, chroma_def;
2991 h->use_weight_chroma= 0;
2992 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2993 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2994 luma_def = 1<<h->luma_log2_weight_denom;
2995 chroma_def = 1<<h->chroma_log2_weight_denom;
2997 for(list=0; list<2; list++){
2998 for(i=0; i<h->ref_count[list]; i++){
2999 int luma_weight_flag, chroma_weight_flag;
3001 luma_weight_flag= get_bits1(&s->gb);
3002 if(luma_weight_flag){
3003 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3004 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3005 if( h->luma_weight[list][i] != luma_def
3006 || h->luma_offset[list][i] != 0)
3009 h->luma_weight[list][i]= luma_def;
3010 h->luma_offset[list][i]= 0;
3014 chroma_weight_flag= get_bits1(&s->gb);
3015 if(chroma_weight_flag){
3018 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3019 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3020 if( h->chroma_weight[list][i][j] != chroma_def
3021 || h->chroma_offset[list][i][j] != 0)
3022 h->use_weight_chroma= 1;
3027 h->chroma_weight[list][i][j]= chroma_def;
3028 h->chroma_offset[list][i][j]= 0;
3033 if(h->slice_type_nos != FF_B_TYPE) break;
3035 h->use_weight= h->use_weight || h->use_weight_chroma;
3039 static void implicit_weight_table(H264Context *h){
3040 MpegEncContext * const s = &h->s;
3042 int cur_poc = s->current_picture_ptr->poc;
3044 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3045 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3047 h->use_weight_chroma= 0;
3052 h->use_weight_chroma= 2;
3053 h->luma_log2_weight_denom= 5;
3054 h->chroma_log2_weight_denom= 5;
3056 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3057 int poc0 = h->ref_list[0][ref0].poc;
3058 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3059 int poc1 = h->ref_list[1][ref1].poc;
3060 int td = av_clip(poc1 - poc0, -128, 127);
3062 int tb = av_clip(cur_poc - poc0, -128, 127);
3063 int tx = (16384 + (FFABS(td) >> 1)) / td;
3064 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3065 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3066 h->implicit_weight[ref0][ref1] = 32;
3068 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3070 h->implicit_weight[ref0][ref1] = 32;
3076 * Mark a picture as no longer needed for reference. The refmask
3077 * argument allows unreferencing of individual fields or the whole frame.
3078 * If the picture becomes entirely unreferenced, but is being held for
3079 * display purposes, it is marked as such.
3080 * @param refmask mask of fields to unreference; the mask is bitwise
3081 * anded with the reference marking of pic
3082 * @return non-zero if pic becomes entirely unreferenced (except possibly
3083 * for display purposes) zero if one of the fields remains in
3086 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3088 if (pic->reference &= refmask) {
3091 for(i = 0; h->delayed_pic[i]; i++)
3092 if(pic == h->delayed_pic[i]){
3093 pic->reference=DELAYED_PIC_REF;
3101 * instantaneous decoder refresh.
3103 static void idr(H264Context *h){
3106 for(i=0; i<16; i++){
3107 remove_long(h, i, 0);
3109 assert(h->long_ref_count==0);
3111 for(i=0; i<h->short_ref_count; i++){
3112 unreference_pic(h, h->short_ref[i], 0);
3113 h->short_ref[i]= NULL;
3115 h->short_ref_count=0;
3116 h->prev_frame_num= 0;
3117 h->prev_frame_num_offset= 0;
3122 /* forget old pics after a seek */
3123 static void flush_dpb(AVCodecContext *avctx){
3124 H264Context *h= avctx->priv_data;
3126 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3127 if(h->delayed_pic[i])
3128 h->delayed_pic[i]->reference= 0;
3129 h->delayed_pic[i]= NULL;
3131 h->outputed_poc= INT_MIN;
3133 if(h->s.current_picture_ptr)
3134 h->s.current_picture_ptr->reference= 0;
3135 h->s.first_field= 0;
3136 ff_mpeg_flush(avctx);
3140 * Find a Picture in the short term reference list by frame number.
3141 * @param frame_num frame number to search for
3142 * @param idx the index into h->short_ref where returned picture is found
3143 * undefined if no picture found.
3144 * @return pointer to the found picture, or NULL if no pic with the provided
3145 * frame number is found
3147 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3148 MpegEncContext * const s = &h->s;
3151 for(i=0; i<h->short_ref_count; i++){
3152 Picture *pic= h->short_ref[i];
3153 if(s->avctx->debug&FF_DEBUG_MMCO)
3154 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3155 if(pic->frame_num == frame_num) {
3164 * Remove a picture from the short term reference list by its index in
3165 * that list. This does no checking on the provided index; it is assumed
3166 * to be valid. Other list entries are shifted down.
3167 * @param i index into h->short_ref of picture to remove.
3169 static void remove_short_at_index(H264Context *h, int i){
3170 assert(i >= 0 && i < h->short_ref_count);
3171 h->short_ref[i]= NULL;
3172 if (--h->short_ref_count)
3173 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3178 * @return the removed picture or NULL if an error occurs
3180 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3181 MpegEncContext * const s = &h->s;
3185 if(s->avctx->debug&FF_DEBUG_MMCO)
3186 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3188 pic = find_short(h, frame_num, &i);
3190 if(unreference_pic(h, pic, ref_mask))
3191 remove_short_at_index(h, i);
3198 * Remove a picture from the long term reference list by its index in
3200 * @return the removed picture or NULL if an error occurs
3202 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3205 pic= h->long_ref[i];
3207 if(unreference_pic(h, pic, ref_mask)){
3208 assert(h->long_ref[i]->long_ref == 1);
3209 h->long_ref[i]->long_ref= 0;
3210 h->long_ref[i]= NULL;
3211 h->long_ref_count--;
3219 * print short term list
3221 static void print_short_term(H264Context *h) {
3223 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3224 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3225 for(i=0; i<h->short_ref_count; i++){
3226 Picture *pic= h->short_ref[i];
3227 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3233 * print long term list
3235 static void print_long_term(H264Context *h) {
3237 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3238 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3239 for(i = 0; i < 16; i++){
3240 Picture *pic= h->long_ref[i];
3242 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3249 * Executes the reference picture marking (memory management control operations).
3251 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3252 MpegEncContext * const s = &h->s;
3254 int current_ref_assigned=0;
3257 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3258 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3260 for(i=0; i<mmco_count; i++){
3261 int structure, frame_num;
3262 if(s->avctx->debug&FF_DEBUG_MMCO)
3263 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3265 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3266 || mmco[i].opcode == MMCO_SHORT2LONG){
3267 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3268 pic = find_short(h, frame_num, &j);
3270 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3271 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3272 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3277 switch(mmco[i].opcode){
3278 case MMCO_SHORT2UNUSED:
3279 if(s->avctx->debug&FF_DEBUG_MMCO)
3280 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3281 remove_short(h, frame_num, structure ^ PICT_FRAME);
3283 case MMCO_SHORT2LONG:
3284 if (h->long_ref[mmco[i].long_arg] != pic)
3285 remove_long(h, mmco[i].long_arg, 0);
3287 remove_short_at_index(h, j);
3288 h->long_ref[ mmco[i].long_arg ]= pic;
3289 if (h->long_ref[ mmco[i].long_arg ]){
3290 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3291 h->long_ref_count++;
3294 case MMCO_LONG2UNUSED:
3295 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3296 pic = h->long_ref[j];
3298 remove_long(h, j, structure ^ PICT_FRAME);
3299 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3300 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3303 // Comment below left from previous code as it is an interresting note.
3304 /* First field in pair is in short term list or
3305 * at a different long term index.
3306 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3307 * Report the problem and keep the pair where it is,
3308 * and mark this field valid.
3311 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3312 remove_long(h, mmco[i].long_arg, 0);
3314 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3315 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3316 h->long_ref_count++;
3319 s->current_picture_ptr->reference |= s->picture_structure;
3320 current_ref_assigned=1;
3322 case MMCO_SET_MAX_LONG:
3323 assert(mmco[i].long_arg <= 16);
3324 // just remove the long term which index is greater than new max
3325 for(j = mmco[i].long_arg; j<16; j++){
3326 remove_long(h, j, 0);
3330 while(h->short_ref_count){
3331 remove_short(h, h->short_ref[0]->frame_num, 0);
3333 for(j = 0; j < 16; j++) {
3334 remove_long(h, j, 0);
3336 s->current_picture_ptr->poc=
3337 s->current_picture_ptr->field_poc[0]=
3338 s->current_picture_ptr->field_poc[1]=
3342 s->current_picture_ptr->frame_num= 0;
3348 if (!current_ref_assigned) {
3349 /* Second field of complementary field pair; the first field of
3350 * which is already referenced. If short referenced, it
3351 * should be first entry in short_ref. If not, it must exist
3352 * in long_ref; trying to put it on the short list here is an
3353 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3355 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3356 /* Just mark the second field valid */
3357 s->current_picture_ptr->reference = PICT_FRAME;
3358 } else if (s->current_picture_ptr->long_ref) {
3359 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3360 "assignment for second field "
3361 "in complementary field pair "
3362 "(first field is long term)\n");
3364 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3366 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3369 if(h->short_ref_count)
3370 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3372 h->short_ref[0]= s->current_picture_ptr;
3373 h->short_ref_count++;
3374 s->current_picture_ptr->reference |= s->picture_structure;
3378 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3380 /* We have too many reference frames, probably due to corrupted
3381 * stream. Need to discard one frame. Prevents overrun of the
3382 * short_ref and long_ref buffers.
3384 av_log(h->s.avctx, AV_LOG_ERROR,
3385 "number of reference frames exceeds max (probably "
3386 "corrupt input), discarding one\n");
3388 if (h->long_ref_count && !h->short_ref_count) {
3389 for (i = 0; i < 16; ++i)
3394 remove_long(h, i, 0);
3396 pic = h->short_ref[h->short_ref_count - 1];
3397 remove_short(h, pic->frame_num, 0);
3401 print_short_term(h);
3406 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3407 MpegEncContext * const s = &h->s;
3411 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3412 s->broken_link= get_bits1(gb) -1;
3414 h->mmco[0].opcode= MMCO_LONG;
3415 h->mmco[0].long_arg= 0;
3419 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3420 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3421 MMCOOpcode opcode= get_ue_golomb_31(gb);
3423 h->mmco[i].opcode= opcode;
3424 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3425 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3426 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3427 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3431 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3432 unsigned int long_arg= get_ue_golomb_31(gb);
3433 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3434 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3437 h->mmco[i].long_arg= long_arg;
3440 if(opcode > (unsigned)MMCO_LONG){
3441 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3444 if(opcode == MMCO_END)
3449 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3451 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3452 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3453 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3454 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3456 if (FIELD_PICTURE) {
3457 h->mmco[0].short_pic_num *= 2;
3458 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3459 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3469 static int init_poc(H264Context *h){
3470 MpegEncContext * const s = &h->s;
3471 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3473 Picture *cur = s->current_picture_ptr;
3475 h->frame_num_offset= h->prev_frame_num_offset;
3476 if(h->frame_num < h->prev_frame_num)
3477 h->frame_num_offset += max_frame_num;
3479 if(h->sps.poc_type==0){
3480 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3482 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3483 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3484 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3485 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3487 h->poc_msb = h->prev_poc_msb;
3488 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3490 field_poc[1] = h->poc_msb + h->poc_lsb;
3491 if(s->picture_structure == PICT_FRAME)
3492 field_poc[1] += h->delta_poc_bottom;
3493 }else if(h->sps.poc_type==1){
3494 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3497 if(h->sps.poc_cycle_length != 0)
3498 abs_frame_num = h->frame_num_offset + h->frame_num;
3502 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3505 expected_delta_per_poc_cycle = 0;
3506 for(i=0; i < h->sps.poc_cycle_length; i++)
3507 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3509 if(abs_frame_num > 0){
3510 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3511 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3513 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3514 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3515 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3519 if(h->nal_ref_idc == 0)
3520 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3522 field_poc[0] = expectedpoc + h->delta_poc[0];
3523 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3525 if(s->picture_structure == PICT_FRAME)
3526 field_poc[1] += h->delta_poc[1];
3528 int poc= 2*(h->frame_num_offset + h->frame_num);
3537 if(s->picture_structure != PICT_BOTTOM_FIELD)
3538 s->current_picture_ptr->field_poc[0]= field_poc[0];
3539 if(s->picture_structure != PICT_TOP_FIELD)
3540 s->current_picture_ptr->field_poc[1]= field_poc[1];
3541 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3548 * initialize scan tables
3550 static void init_scan_tables(H264Context *h){
3551 MpegEncContext * const s = &h->s;
3553 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3554 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3555 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3557 for(i=0; i<16; i++){
3558 #define T(x) (x>>2) | ((x<<2) & 0xF)
3559 h->zigzag_scan[i] = T(zigzag_scan[i]);
3560 h-> field_scan[i] = T( field_scan[i]);
3564 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3565 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3566 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3567 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3568 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3570 for(i=0; i<64; i++){
3571 #define T(x) (x>>3) | ((x&7)<<3)
3572 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3573 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3574 h->field_scan8x8[i] = T(field_scan8x8[i]);
3575 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3579 if(h->sps.transform_bypass){ //FIXME same ugly
3580 h->zigzag_scan_q0 = zigzag_scan;
3581 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3582 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3583 h->field_scan_q0 = field_scan;
3584 h->field_scan8x8_q0 = field_scan8x8;
3585 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3587 h->zigzag_scan_q0 = h->zigzag_scan;
3588 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3589 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3590 h->field_scan_q0 = h->field_scan;
3591 h->field_scan8x8_q0 = h->field_scan8x8;
3592 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3597 * Replicates H264 "master" context to thread contexts.
3599 static void clone_slice(H264Context *dst, H264Context *src)
3601 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3602 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3603 dst->s.current_picture = src->s.current_picture;
3604 dst->s.linesize = src->s.linesize;
3605 dst->s.uvlinesize = src->s.uvlinesize;
3606 dst->s.first_field = src->s.first_field;
3608 dst->prev_poc_msb = src->prev_poc_msb;
3609 dst->prev_poc_lsb = src->prev_poc_lsb;
3610 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3611 dst->prev_frame_num = src->prev_frame_num;
3612 dst->short_ref_count = src->short_ref_count;
3614 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3615 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3616 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3617 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3619 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3620 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3624 * decodes a slice header.
3625 * This will also call MPV_common_init() and frame_start() as needed.
3627 * @param h h264context
3628 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3630 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3632 static int decode_slice_header(H264Context *h, H264Context *h0){
3633 MpegEncContext * const s = &h->s;
3634 MpegEncContext * const s0 = &h0->s;
3635 unsigned int first_mb_in_slice;
3636 unsigned int pps_id;
3637 int num_ref_idx_active_override_flag;
3638 unsigned int slice_type, tmp, i, j;
3639 int default_ref_list_done = 0;
3640 int last_pic_structure;
3642 s->dropable= h->nal_ref_idc == 0;
3644 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3645 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3646 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3648 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3649 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3652 first_mb_in_slice= get_ue_golomb(&s->gb);
3654 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3655 h0->current_slice = 0;
3656 if (!s0->first_field)
3657 s->current_picture_ptr= NULL;
3660 slice_type= get_ue_golomb_31(&s->gb);
3662 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3667 h->slice_type_fixed=1;
3669 h->slice_type_fixed=0;
3671 slice_type= golomb_to_pict_type[ slice_type ];
3672 if (slice_type == FF_I_TYPE
3673 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3674 default_ref_list_done = 1;
3676 h->slice_type= slice_type;
3677 h->slice_type_nos= slice_type & 3;
3679 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3680 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3681 av_log(h->s.avctx, AV_LOG_ERROR,
3682 "B picture before any references, skipping\n");
3686 pps_id= get_ue_golomb(&s->gb);
3687 if(pps_id>=MAX_PPS_COUNT){
3688 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3691 if(!h0->pps_buffers[pps_id]) {
3692 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3695 h->pps= *h0->pps_buffers[pps_id];
3697 if(!h0->sps_buffers[h->pps.sps_id]) {
3698 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3701 h->sps = *h0->sps_buffers[h->pps.sps_id];
3703 if(h == h0 && h->dequant_coeff_pps != pps_id){
3704 h->dequant_coeff_pps = pps_id;
3705 init_dequant_tables(h);
3708 s->mb_width= h->sps.mb_width;
3709 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3711 h->b_stride= s->mb_width*4;
3712 h->b8_stride= s->mb_width*2;
3714 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3715 if(h->sps.frame_mbs_only_flag)
3716 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3718 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3720 if (s->context_initialized
3721 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3723 return -1; // width / height changed during parallelized decoding
3725 flush_dpb(s->avctx);
3728 if (!s->context_initialized) {
3730 return -1; // we cant (re-)initialize context during parallel decoding
3731 if (MPV_common_init(s) < 0)
3735 init_scan_tables(h);
3738 for(i = 1; i < s->avctx->thread_count; i++) {
3740 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3741 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3742 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3745 init_scan_tables(c);
3749 for(i = 0; i < s->avctx->thread_count; i++)
3750 if(context_init(h->thread_context[i]) < 0)
3753 s->avctx->width = s->width;
3754 s->avctx->height = s->height;
3755 s->avctx->sample_aspect_ratio= h->sps.sar;
3756 if(!s->avctx->sample_aspect_ratio.den)
3757 s->avctx->sample_aspect_ratio.den = 1;
3759 if(h->sps.timing_info_present_flag){
3760 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3761 if(h->x264_build > 0 && h->x264_build < 44)
3762 s->avctx->time_base.den *= 2;
3763 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3764 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3768 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3771 h->mb_aff_frame = 0;
3772 last_pic_structure = s0->picture_structure;
3773 if(h->sps.frame_mbs_only_flag){
3774 s->picture_structure= PICT_FRAME;
3776 if(get_bits1(&s->gb)) { //field_pic_flag
3777 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3779 s->picture_structure= PICT_FRAME;
3780 h->mb_aff_frame = h->sps.mb_aff;
3783 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3785 if(h0->current_slice == 0){
3786 while(h->frame_num != h->prev_frame_num &&
3787 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3788 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3790 h->prev_frame_num++;
3791 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3792 s->current_picture_ptr->frame_num= h->prev_frame_num;
3793 execute_ref_pic_marking(h, NULL, 0);
3796 /* See if we have a decoded first field looking for a pair... */
3797 if (s0->first_field) {
3798 assert(s0->current_picture_ptr);
3799 assert(s0->current_picture_ptr->data[0]);
3800 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3802 /* figure out if we have a complementary field pair */
3803 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3805 * Previous field is unmatched. Don't display it, but let it
3806 * remain for reference if marked as such.
3808 s0->current_picture_ptr = NULL;
3809 s0->first_field = FIELD_PICTURE;
3812 if (h->nal_ref_idc &&
3813 s0->current_picture_ptr->reference &&
3814 s0->current_picture_ptr->frame_num != h->frame_num) {
3816 * This and previous field were reference, but had
3817 * different frame_nums. Consider this field first in
3818 * pair. Throw away previous field except for reference
3821 s0->first_field = 1;
3822 s0->current_picture_ptr = NULL;
3825 /* Second field in complementary pair */
3826 s0->first_field = 0;
3831 /* Frame or first field in a potentially complementary pair */
3832 assert(!s0->current_picture_ptr);
3833 s0->first_field = FIELD_PICTURE;
3836 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3837 s0->first_field = 0;
3844 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3846 assert(s->mb_num == s->mb_width * s->mb_height);
3847 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3848 first_mb_in_slice >= s->mb_num){
3849 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3852 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3853 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3854 if (s->picture_structure == PICT_BOTTOM_FIELD)
3855 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3856 assert(s->mb_y < s->mb_height);
3858 if(s->picture_structure==PICT_FRAME){
3859 h->curr_pic_num= h->frame_num;
3860 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3862 h->curr_pic_num= 2*h->frame_num + 1;
3863 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3866 if(h->nal_unit_type == NAL_IDR_SLICE){
3867 get_ue_golomb(&s->gb); /* idr_pic_id */
3870 if(h->sps.poc_type==0){
3871 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3873 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3874 h->delta_poc_bottom= get_se_golomb(&s->gb);
3878 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3879 h->delta_poc[0]= get_se_golomb(&s->gb);
3881 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3882 h->delta_poc[1]= get_se_golomb(&s->gb);
3887 if(h->pps.redundant_pic_cnt_present){
3888 h->redundant_pic_count= get_ue_golomb(&s->gb);
3891 //set defaults, might be overridden a few lines later
3892 h->ref_count[0]= h->pps.ref_count[0];
3893 h->ref_count[1]= h->pps.ref_count[1];
3895 if(h->slice_type_nos != FF_I_TYPE){
3896 if(h->slice_type_nos == FF_B_TYPE){
3897 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3899 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3901 if(num_ref_idx_active_override_flag){
3902 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3903 if(h->slice_type_nos==FF_B_TYPE)
3904 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3906 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3907 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3908 h->ref_count[0]= h->ref_count[1]= 1;
3912 if(h->slice_type_nos == FF_B_TYPE)
3919 if(!default_ref_list_done){
3920 fill_default_ref_list(h);
3923 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3926 if(h->slice_type_nos!=FF_I_TYPE){
3927 s->last_picture_ptr= &h->ref_list[0][0];
3928 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3930 if(h->slice_type_nos==FF_B_TYPE){
3931 s->next_picture_ptr= &h->ref_list[1][0];
3932 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3935 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3936 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3937 pred_weight_table(h);
3938 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3939 implicit_weight_table(h);
3944 decode_ref_pic_marking(h0, &s->gb);
3947 fill_mbaff_ref_list(h);
3949 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3950 direct_dist_scale_factor(h);
3951 direct_ref_list_init(h);
3953 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3954 tmp = get_ue_golomb_31(&s->gb);
3956 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3959 h->cabac_init_idc= tmp;
3962 h->last_qscale_diff = 0;
3963 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3965 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3969 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3970 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3971 //FIXME qscale / qp ... stuff
3972 if(h->slice_type == FF_SP_TYPE){
3973 get_bits1(&s->gb); /* sp_for_switch_flag */
3975 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3976 get_se_golomb(&s->gb); /* slice_qs_delta */
3979 h->deblocking_filter = 1;
3980 h->slice_alpha_c0_offset = 0;
3981 h->slice_beta_offset = 0;
3982 if( h->pps.deblocking_filter_parameters_present ) {
3983 tmp= get_ue_golomb_31(&s->gb);
3985 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3988 h->deblocking_filter= tmp;
3989 if(h->deblocking_filter < 2)
3990 h->deblocking_filter^= 1; // 1<->0
3992 if( h->deblocking_filter ) {
3993 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3994 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3998 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3999 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4000 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4001 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4002 h->deblocking_filter= 0;
4004 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4005 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4006 /* Cheat slightly for speed:
4007 Do not bother to deblock across slices. */
4008 h->deblocking_filter = 2;
4010 h0->max_contexts = 1;
4011 if(!h0->single_decode_warning) {
4012 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4013 h0->single_decode_warning = 1;
4016 return 1; // deblocking switched inside frame
4021 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4022 slice_group_change_cycle= get_bits(&s->gb, ?);
4025 h0->last_slice_type = slice_type;
4026 h->slice_num = ++h0->current_slice;
4027 if(h->slice_num >= MAX_SLICES){
4028 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4032 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4036 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4037 +(h->ref_list[j][i].reference&3);
4040 for(i=16; i<48; i++)
4041 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4042 +(h->ref_list[j][i].reference&3);
4045 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4046 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4048 s->avctx->refs= h->sps.ref_frame_count;
4050 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4051 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4053 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4055 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4056 pps_id, h->frame_num,
4057 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4058 h->ref_count[0], h->ref_count[1],
4060 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4062 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4063 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4073 static inline int get_level_prefix(GetBitContext *gb){
4077 OPEN_READER(re, gb);
4078 UPDATE_CACHE(re, gb);
4079 buf=GET_CACHE(re, gb);
4081 log= 32 - av_log2(buf);
4083 print_bin(buf>>(32-log), log);
4084 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4087 LAST_SKIP_BITS(re, gb, log);
4088 CLOSE_READER(re, gb);
4093 static inline int get_dct8x8_allowed(H264Context *h){
4094 if(h->sps.direct_8x8_inference_flag)
4095 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4097 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4101 * decodes a residual block.
4102 * @param n block index
4103 * @param scantable scantable
4104 * @param max_coeff number of coefficients in the block
4105 * @return <0 if an error occurred
4107 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4108 MpegEncContext * const s = &h->s;
4109 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4111 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4113 //FIXME put trailing_onex into the context
4115 if(n == CHROMA_DC_BLOCK_INDEX){
4116 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4117 total_coeff= coeff_token>>2;
4119 if(n == LUMA_DC_BLOCK_INDEX){
4120 total_coeff= pred_non_zero_count(h, 0);
4121 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4122 total_coeff= coeff_token>>2;
4124 total_coeff= pred_non_zero_count(h, n);
4125 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4126 total_coeff= coeff_token>>2;
4127 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4131 //FIXME set last_non_zero?
4135 if(total_coeff > (unsigned)max_coeff) {
4136 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4140 trailing_ones= coeff_token&3;
4141 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4142 assert(total_coeff<=16);
4144 i = show_bits(gb, 3);
4145 skip_bits(gb, trailing_ones);
4146 level[0] = 1-((i&4)>>1);
4147 level[1] = 1-((i&2) );
4148 level[2] = 1-((i&1)<<1);
4150 if(trailing_ones<total_coeff) {
4152 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4153 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4154 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4156 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4157 if(level_code >= 100){
4158 prefix= level_code - 100;
4159 if(prefix == LEVEL_TAB_BITS)
4160 prefix += get_level_prefix(gb);
4162 //first coefficient has suffix_length equal to 0 or 1
4163 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4165 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4167 level_code= (prefix<<suffix_length); //part
4168 }else if(prefix==14){
4170 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4172 level_code= prefix + get_bits(gb, 4); //part
4174 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4175 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4177 level_code += (1<<(prefix-3))-4096;
4180 if(trailing_ones < 3) level_code += 2;
4183 mask= -(level_code&1);
4184 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4186 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4189 if(level_code + 3U > 6U)
4191 level[trailing_ones]= level_code;
4194 //remaining coefficients have suffix_length > 0
4195 for(i=trailing_ones+1;i<total_coeff;i++) {
4196 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4197 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4198 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4200 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4201 if(level_code >= 100){
4202 prefix= level_code - 100;
4203 if(prefix == LEVEL_TAB_BITS){
4204 prefix += get_level_prefix(gb);
4207 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4209 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4211 level_code += (1<<(prefix-3))-4096;
4213 mask= -(level_code&1);
4214 level_code= (((2+level_code)>>1) ^ mask) - mask;
4216 level[i]= level_code;
4218 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4223 if(total_coeff == max_coeff)
4226 if(n == CHROMA_DC_BLOCK_INDEX)
4227 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4229 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4232 coeff_num = zeros_left + total_coeff - 1;
4233 j = scantable[coeff_num];
4235 block[j] = level[0];
4236 for(i=1;i<total_coeff;i++) {
4239 else if(zeros_left < 7){
4240 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4242 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4244 zeros_left -= run_before;
4245 coeff_num -= 1 + run_before;
4246 j= scantable[ coeff_num ];
4251 block[j] = (level[0] * qmul[j] + 32)>>6;
4252 for(i=1;i<total_coeff;i++) {
4255 else if(zeros_left < 7){
4256 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4258 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4260 zeros_left -= run_before;
4261 coeff_num -= 1 + run_before;
4262 j= scantable[ coeff_num ];
4264 block[j]= (level[i] * qmul[j] + 32)>>6;
4269 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4276 static void predict_field_decoding_flag(H264Context *h){
4277 MpegEncContext * const s = &h->s;
4278 const int mb_xy= h->mb_xy;
4279 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4280 ? s->current_picture.mb_type[mb_xy-1]
4281 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4282 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4284 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4288 * decodes a P_SKIP or B_SKIP macroblock
4290 static void decode_mb_skip(H264Context *h){
4291 MpegEncContext * const s = &h->s;
4292 const int mb_xy= h->mb_xy;
4295 memset(h->non_zero_count[mb_xy], 0, 16);
4296 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4299 mb_type|= MB_TYPE_INTERLACED;
4301 if( h->slice_type_nos == FF_B_TYPE )
4303 // just for fill_caches. pred_direct_motion will set the real mb_type
4304 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4306 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4307 pred_direct_motion(h, &mb_type);
4308 mb_type|= MB_TYPE_SKIP;
4313 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4315 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4316 pred_pskip_motion(h, &mx, &my);
4317 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4318 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4321 write_back_motion(h, mb_type);
4322 s->current_picture.mb_type[mb_xy]= mb_type;
4323 s->current_picture.qscale_table[mb_xy]= s->qscale;
4324 h->slice_table[ mb_xy ]= h->slice_num;
4325 h->prev_mb_skipped= 1;
4329 * decodes a macroblock
4330 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4332 static int decode_mb_cavlc(H264Context *h){
4333 MpegEncContext * const s = &h->s;
4335 int partition_count;
4336 unsigned int mb_type, cbp;
4337 int dct8x8_allowed= h->pps.transform_8x8_mode;
4339 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4341 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4342 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4344 if(h->slice_type_nos != FF_I_TYPE){
4345 if(s->mb_skip_run==-1)
4346 s->mb_skip_run= get_ue_golomb(&s->gb);
4348 if (s->mb_skip_run--) {
4349 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4350 if(s->mb_skip_run==0)
4351 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4353 predict_field_decoding_flag(h);
4360 if( (s->mb_y&1) == 0 )
4361 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4364 h->prev_mb_skipped= 0;
4366 mb_type= get_ue_golomb(&s->gb);
4367 if(h->slice_type_nos == FF_B_TYPE){
4369 partition_count= b_mb_type_info[mb_type].partition_count;
4370 mb_type= b_mb_type_info[mb_type].type;
4373 goto decode_intra_mb;
4375 }else if(h->slice_type_nos == FF_P_TYPE){
4377 partition_count= p_mb_type_info[mb_type].partition_count;
4378 mb_type= p_mb_type_info[mb_type].type;
4381 goto decode_intra_mb;
4384 assert(h->slice_type_nos == FF_I_TYPE);
4385 if(h->slice_type == FF_SI_TYPE && mb_type)
4389 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4393 cbp= i_mb_type_info[mb_type].cbp;
4394 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4395 mb_type= i_mb_type_info[mb_type].type;
4399 mb_type |= MB_TYPE_INTERLACED;
4401 h->slice_table[ mb_xy ]= h->slice_num;
4403 if(IS_INTRA_PCM(mb_type)){
4406 // We assume these blocks are very rare so we do not optimize it.
4407 align_get_bits(&s->gb);
4409 // The pixels are stored in the same order as levels in h->mb array.
4410 for(x=0; x < (CHROMA ? 384 : 256); x++){
4411 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4414 // In deblocking, the quantizer is 0
4415 s->current_picture.qscale_table[mb_xy]= 0;
4416 // All coeffs are present
4417 memset(h->non_zero_count[mb_xy], 16, 16);
4419 s->current_picture.mb_type[mb_xy]= mb_type;
4424 h->ref_count[0] <<= 1;
4425 h->ref_count[1] <<= 1;
4428 fill_caches(h, mb_type, 0);
4431 if(IS_INTRA(mb_type)){
4433 // init_top_left_availability(h);
4434 if(IS_INTRA4x4(mb_type)){
4437 if(dct8x8_allowed && get_bits1(&s->gb)){
4438 mb_type |= MB_TYPE_8x8DCT;
4442 // fill_intra4x4_pred_table(h);
4443 for(i=0; i<16; i+=di){
4444 int mode= pred_intra_mode(h, i);
4446 if(!get_bits1(&s->gb)){
4447 const int rem_mode= get_bits(&s->gb, 3);
4448 mode = rem_mode + (rem_mode >= mode);
4452 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4454 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4456 write_back_intra_pred_mode(h);
4457 if( check_intra4x4_pred_mode(h) < 0)
4460 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4461 if(h->intra16x16_pred_mode < 0)
4465 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4468 h->chroma_pred_mode= pred_mode;
4470 }else if(partition_count==4){
4471 int i, j, sub_partition_count[4], list, ref[2][4];
4473 if(h->slice_type_nos == FF_B_TYPE){
4475 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4476 if(h->sub_mb_type[i] >=13){
4477 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4480 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4481 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4483 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4484 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4485 pred_direct_motion(h, &mb_type);
4486 h->ref_cache[0][scan8[4]] =
4487 h->ref_cache[1][scan8[4]] =
4488 h->ref_cache[0][scan8[12]] =
4489 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4492 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4494 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4495 if(h->sub_mb_type[i] >=4){
4496 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4499 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4500 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4504 for(list=0; list<h->list_count; list++){
4505 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4507 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4508 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4512 }else if(ref_count == 2){
4513 tmp= get_bits1(&s->gb)^1;
4515 tmp= get_ue_golomb_31(&s->gb);
4517 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4530 dct8x8_allowed = get_dct8x8_allowed(h);
4532 for(list=0; list<h->list_count; list++){
4534 if(IS_DIRECT(h->sub_mb_type[i])) {
4535 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4538 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4539 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4541 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4542 const int sub_mb_type= h->sub_mb_type[i];
4543 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4544 for(j=0; j<sub_partition_count[i]; j++){
4546 const int index= 4*i + block_width*j;
4547 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4548 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4549 mx += get_se_golomb(&s->gb);
4550 my += get_se_golomb(&s->gb);
4551 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4553 if(IS_SUB_8X8(sub_mb_type)){
4555 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4557 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4558 }else if(IS_SUB_8X4(sub_mb_type)){
4559 mv_cache[ 1 ][0]= mx;
4560 mv_cache[ 1 ][1]= my;
4561 }else if(IS_SUB_4X8(sub_mb_type)){
4562 mv_cache[ 8 ][0]= mx;
4563 mv_cache[ 8 ][1]= my;
4565 mv_cache[ 0 ][0]= mx;
4566 mv_cache[ 0 ][1]= my;
4569 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4575 }else if(IS_DIRECT(mb_type)){
4576 pred_direct_motion(h, &mb_type);
4577 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4579 int list, mx, my, i;
4580 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4581 if(IS_16X16(mb_type)){
4582 for(list=0; list<h->list_count; list++){
4584 if(IS_DIR(mb_type, 0, list)){
4585 if(h->ref_count[list]==1){
4587 }else if(h->ref_count[list]==2){
4588 val= get_bits1(&s->gb)^1;
4590 val= get_ue_golomb_31(&s->gb);
4591 if(val >= h->ref_count[list]){
4592 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4597 val= LIST_NOT_USED&0xFF;
4598 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4600 for(list=0; list<h->list_count; list++){
4602 if(IS_DIR(mb_type, 0, list)){
4603 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4604 mx += get_se_golomb(&s->gb);
4605 my += get_se_golomb(&s->gb);
4606 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4608 val= pack16to32(mx,my);
4611 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4614 else if(IS_16X8(mb_type)){
4615 for(list=0; list<h->list_count; list++){
4618 if(IS_DIR(mb_type, i, list)){
4619 if(h->ref_count[list] == 1){
4621 }else if(h->ref_count[list] == 2){
4622 val= get_bits1(&s->gb)^1;
4624 val= get_ue_golomb_31(&s->gb);
4625 if(val >= h->ref_count[list]){
4626 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4631 val= LIST_NOT_USED&0xFF;
4632 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4635 for(list=0; list<h->list_count; list++){
4638 if(IS_DIR(mb_type, i, list)){
4639 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4640 mx += get_se_golomb(&s->gb);
4641 my += get_se_golomb(&s->gb);
4642 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4644 val= pack16to32(mx,my);
4647 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4651 assert(IS_8X16(mb_type));
4652 for(list=0; list<h->list_count; list++){
4655 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4656 if(h->ref_count[list]==1){
4658 }else if(h->ref_count[list]==2){
4659 val= get_bits1(&s->gb)^1;
4661 val= get_ue_golomb_31(&s->gb);
4662 if(val >= h->ref_count[list]){
4663 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4668 val= LIST_NOT_USED&0xFF;
4669 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4672 for(list=0; list<h->list_count; list++){
4675 if(IS_DIR(mb_type, i, list)){
4676 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4677 mx += get_se_golomb(&s->gb);
4678 my += get_se_golomb(&s->gb);
4679 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4681 val= pack16to32(mx,my);
4684 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4690 if(IS_INTER(mb_type))
4691 write_back_motion(h, mb_type);
4693 if(!IS_INTRA16x16(mb_type)){
4694 cbp= get_ue_golomb(&s->gb);
4696 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4701 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4702 else cbp= golomb_to_inter_cbp [cbp];
4704 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4705 else cbp= golomb_to_inter_cbp_gray[cbp];
4710 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4711 if(get_bits1(&s->gb)){
4712 mb_type |= MB_TYPE_8x8DCT;
4713 h->cbp_table[mb_xy]= cbp;
4716 s->current_picture.mb_type[mb_xy]= mb_type;
4718 if(cbp || IS_INTRA16x16(mb_type)){
4719 int i8x8, i4x4, chroma_idx;
4721 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4722 const uint8_t *scan, *scan8x8, *dc_scan;
4724 // fill_non_zero_count_cache(h);
4726 if(IS_INTERLACED(mb_type)){
4727 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4728 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4729 dc_scan= luma_dc_field_scan;
4731 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4732 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4733 dc_scan= luma_dc_zigzag_scan;
4736 dquant= get_se_golomb(&s->gb);
4738 if( dquant > 25 || dquant < -26 ){
4739 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4743 s->qscale += dquant;
4744 if(((unsigned)s->qscale) > 51){
4745 if(s->qscale<0) s->qscale+= 52;
4746 else s->qscale-= 52;
4749 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4750 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4751 if(IS_INTRA16x16(mb_type)){
4752 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4753 return -1; //FIXME continue if partitioned and other return -1 too
4756 assert((cbp&15) == 0 || (cbp&15) == 15);
4759 for(i8x8=0; i8x8<4; i8x8++){
4760 for(i4x4=0; i4x4<4; i4x4++){
4761 const int index= i4x4 + 4*i8x8;
4762 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4768 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4771 for(i8x8=0; i8x8<4; i8x8++){
4772 if(cbp & (1<<i8x8)){
4773 if(IS_8x8DCT(mb_type)){
4774 DCTELEM *buf = &h->mb[64*i8x8];
4776 for(i4x4=0; i4x4<4; i4x4++){
4777 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4778 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4781 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4782 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4784 for(i4x4=0; i4x4<4; i4x4++){
4785 const int index= i4x4 + 4*i8x8;
4787 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4793 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4794 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4800 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4801 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4807 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4808 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4809 for(i4x4=0; i4x4<4; i4x4++){
4810 const int index= 16 + 4*chroma_idx + i4x4;
4811 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4817 uint8_t * const nnz= &h->non_zero_count_cache[0];
4818 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4819 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4822 uint8_t * const nnz= &h->non_zero_count_cache[0];
4823 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4824 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4825 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4827 s->current_picture.qscale_table[mb_xy]= s->qscale;
4828 write_back_non_zero_count(h);
4831 h->ref_count[0] >>= 1;
4832 h->ref_count[1] >>= 1;
4838 static int decode_cabac_field_decoding_flag(H264Context *h) {
4839 MpegEncContext * const s = &h->s;
4840 const int mb_x = s->mb_x;
4841 const int mb_y = s->mb_y & ~1;
4842 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4843 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4845 unsigned int ctx = 0;
4847 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4850 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4854 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4857 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4858 uint8_t *state= &h->cabac_state[ctx_base];
4862 MpegEncContext * const s = &h->s;
4863 const int mba_xy = h->left_mb_xy[0];
4864 const int mbb_xy = h->top_mb_xy;
4866 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4868 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4870 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4871 return 0; /* I4x4 */
4874 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4875 return 0; /* I4x4 */
4878 if( get_cabac_terminate( &h->cabac ) )
4879 return 25; /* PCM */
4881 mb_type = 1; /* I16x16 */
4882 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4883 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4884 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4885 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4886 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4890 static int decode_cabac_mb_type_b( H264Context *h ) {
4891 MpegEncContext * const s = &h->s;
4893 const int mba_xy = h->left_mb_xy[0];
4894 const int mbb_xy = h->top_mb_xy;
4897 assert(h->slice_type_nos == FF_B_TYPE);
4899 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4901 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4904 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4905 return 0; /* B_Direct_16x16 */
4907 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4908 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4911 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4912 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4913 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4914 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4916 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4917 else if( bits == 13 ) {
4918 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4919 } else if( bits == 14 )
4920 return 11; /* B_L1_L0_8x16 */
4921 else if( bits == 15 )
4922 return 22; /* B_8x8 */
4924 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4925 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4928 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4929 MpegEncContext * const s = &h->s;
4933 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4934 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4937 && h->slice_table[mba_xy] == h->slice_num
4938 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4939 mba_xy += s->mb_stride;
4941 mbb_xy = mb_xy - s->mb_stride;
4943 && h->slice_table[mbb_xy] == h->slice_num
4944 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4945 mbb_xy -= s->mb_stride;
4947 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4949 int mb_xy = h->mb_xy;
4951 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4954 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4956 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4959 if( h->slice_type_nos == FF_B_TYPE )
4961 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4964 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4967 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4970 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4971 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4972 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4974 if( mode >= pred_mode )
4980 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4981 const int mba_xy = h->left_mb_xy[0];
4982 const int mbb_xy = h->top_mb_xy;
4986 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4987 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4990 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4993 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4996 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4998 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5004 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5005 int cbp_b, cbp_a, ctx, cbp = 0;
5007 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5008 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5010 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5011 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5012 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5013 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5014 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5015 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5016 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5017 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5020 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5024 cbp_a = (h->left_cbp>>4)&0x03;
5025 cbp_b = (h-> top_cbp>>4)&0x03;
5028 if( cbp_a > 0 ) ctx++;
5029 if( cbp_b > 0 ) ctx += 2;
5030 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5034 if( cbp_a == 2 ) ctx++;
5035 if( cbp_b == 2 ) ctx += 2;
5036 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5038 static int decode_cabac_mb_dqp( H264Context *h) {
5039 int ctx= h->last_qscale_diff != 0;
5042 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5045 if(val > 102) //prevent infinite loop
5050 return (val + 1)>>1 ;
5052 return -((val + 1)>>1);
5054 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5055 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5057 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5059 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5063 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5065 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5066 return 0; /* B_Direct_8x8 */
5067 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5068 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5070 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5071 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5072 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5075 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5076 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5080 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5081 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5084 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5085 int refa = h->ref_cache[list][scan8[n] - 1];
5086 int refb = h->ref_cache[list][scan8[n] - 8];
5090 if( h->slice_type_nos == FF_B_TYPE) {
5091 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5093 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5102 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5105 if(ref >= 32 /*h->ref_list[list]*/){
5112 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5113 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5114 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5115 int ctxbase = (l == 0) ? 40 : 47;
5117 int ctx = (amvd>2) + (amvd>32);
5119 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5124 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5132 while( get_cabac_bypass( &h->cabac ) ) {
5136 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5141 if( get_cabac_bypass( &h->cabac ) )
5145 return get_cabac_bypass_sign( &h->cabac, -mvd );
5148 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5154 nza = h->left_cbp&0x100;
5155 nzb = h-> top_cbp&0x100;
5157 nza = (h->left_cbp>>(6+idx))&0x01;
5158 nzb = (h-> top_cbp>>(6+idx))&0x01;
5161 assert(cat == 1 || cat == 2 || cat == 4);
5162 nza = h->non_zero_count_cache[scan8[idx] - 1];
5163 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5172 return ctx + 4 * cat;
5175 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5176 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5177 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5178 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5179 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5182 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5183 static const int significant_coeff_flag_offset[2][6] = {
5184 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5185 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5187 static const int last_coeff_flag_offset[2][6] = {
5188 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5189 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5191 static const int coeff_abs_level_m1_offset[6] = {
5192 227+0, 227+10, 227+20, 227+30, 227+39, 426
5194 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5195 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5196 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5197 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5198 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5199 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5200 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5201 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5202 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5204 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5205 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5206 * map node ctx => cabac ctx for level=1 */
5207 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5208 /* map node ctx => cabac ctx for level>1 */
5209 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5210 static const uint8_t coeff_abs_level_transition[2][8] = {
5211 /* update node ctx after decoding a level=1 */
5212 { 1, 2, 3, 3, 4, 5, 6, 7 },
5213 /* update node ctx after decoding a level>1 */
5214 { 4, 4, 4, 4, 5, 6, 7, 7 }
5220 int coeff_count = 0;
5223 uint8_t *significant_coeff_ctx_base;
5224 uint8_t *last_coeff_ctx_base;
5225 uint8_t *abs_level_m1_ctx_base;
5228 #define CABAC_ON_STACK
5230 #ifdef CABAC_ON_STACK
5233 cc.range = h->cabac.range;
5234 cc.low = h->cabac.low;
5235 cc.bytestream= h->cabac.bytestream;
5237 #define CC &h->cabac
5241 /* cat: 0-> DC 16x16 n = 0
5242 * 1-> AC 16x16 n = luma4x4idx
5243 * 2-> Luma4x4 n = luma4x4idx
5244 * 3-> DC Chroma n = iCbCr
5245 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5246 * 5-> Luma8x8 n = 4 * luma8x8idx
5249 /* read coded block flag */
5250 if( is_dc || cat != 5 ) {
5251 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5253 h->non_zero_count_cache[scan8[n]] = 0;
5255 #ifdef CABAC_ON_STACK
5256 h->cabac.range = cc.range ;
5257 h->cabac.low = cc.low ;
5258 h->cabac.bytestream= cc.bytestream;
5264 significant_coeff_ctx_base = h->cabac_state
5265 + significant_coeff_flag_offset[MB_FIELD][cat];
5266 last_coeff_ctx_base = h->cabac_state
5267 + last_coeff_flag_offset[MB_FIELD][cat];
5268 abs_level_m1_ctx_base = h->cabac_state
5269 + coeff_abs_level_m1_offset[cat];
5271 if( !is_dc && cat == 5 ) {
5272 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5273 for(last= 0; last < coefs; last++) { \
5274 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5275 if( get_cabac( CC, sig_ctx )) { \
5276 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5277 index[coeff_count++] = last; \
5278 if( get_cabac( CC, last_ctx ) ) { \
5284 if( last == max_coeff -1 ) {\
5285 index[coeff_count++] = last;\
5287 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5288 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5289 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5291 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5293 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5295 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5298 assert(coeff_count > 0);
5302 h->cbp_table[h->mb_xy] |= 0x100;
5304 h->cbp_table[h->mb_xy] |= 0x40 << n;
5307 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5309 assert( cat == 1 || cat == 2 || cat == 4 );
5310 h->non_zero_count_cache[scan8[n]] = coeff_count;
5315 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5317 int j= scantable[index[--coeff_count]];
5319 if( get_cabac( CC, ctx ) == 0 ) {
5320 node_ctx = coeff_abs_level_transition[0][node_ctx];
5322 block[j] = get_cabac_bypass_sign( CC, -1);
5324 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5328 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5329 node_ctx = coeff_abs_level_transition[1][node_ctx];
5331 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5335 if( coeff_abs >= 15 ) {
5337 while( get_cabac_bypass( CC ) ) {
5343 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5349 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5351 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5354 } while( coeff_count );
5355 #ifdef CABAC_ON_STACK
5356 h->cabac.range = cc.range ;
5357 h->cabac.low = cc.low ;
5358 h->cabac.bytestream= cc.bytestream;
5363 #ifndef CONFIG_SMALL
5364 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5365 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5368 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5369 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5373 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5375 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5377 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5378 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5382 static inline void compute_mb_neighbors(H264Context *h)
5384 MpegEncContext * const s = &h->s;
5385 const int mb_xy = h->mb_xy;
5386 h->top_mb_xy = mb_xy - s->mb_stride;
5387 h->left_mb_xy[0] = mb_xy - 1;
5389 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5390 const int top_pair_xy = pair_xy - s->mb_stride;
5391 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5392 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5393 const int curr_mb_field_flag = MB_FIELD;
5394 const int bottom = (s->mb_y & 1);
5396 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5397 h->top_mb_xy -= s->mb_stride;
5399 if (!left_mb_field_flag == curr_mb_field_flag) {
5400 h->left_mb_xy[0] = pair_xy - 1;
5402 } else if (FIELD_PICTURE) {
5403 h->top_mb_xy -= s->mb_stride;
5409 * decodes a macroblock
5410 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5412 static int decode_mb_cabac(H264Context *h) {
5413 MpegEncContext * const s = &h->s;
5415 int mb_type, partition_count, cbp = 0;
5416 int dct8x8_allowed= h->pps.transform_8x8_mode;
5418 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5420 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5421 if( h->slice_type_nos != FF_I_TYPE ) {
5423 /* a skipped mb needs the aff flag from the following mb */
5424 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5425 predict_field_decoding_flag(h);
5426 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5427 skip = h->next_mb_skipped;
5429 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5430 /* read skip flags */
5432 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5433 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5434 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5435 if(!h->next_mb_skipped)
5436 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5441 h->cbp_table[mb_xy] = 0;
5442 h->chroma_pred_mode_table[mb_xy] = 0;
5443 h->last_qscale_diff = 0;
5450 if( (s->mb_y&1) == 0 )
5452 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5455 h->prev_mb_skipped = 0;
5457 compute_mb_neighbors(h);
5459 if( h->slice_type_nos == FF_B_TYPE ) {
5460 mb_type = decode_cabac_mb_type_b( h );
5462 partition_count= b_mb_type_info[mb_type].partition_count;
5463 mb_type= b_mb_type_info[mb_type].type;
5466 goto decode_intra_mb;
5468 } else if( h->slice_type_nos == FF_P_TYPE ) {
5469 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5471 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5472 /* P_L0_D16x16, P_8x8 */
5473 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5475 /* P_L0_D8x16, P_L0_D16x8 */
5476 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5478 partition_count= p_mb_type_info[mb_type].partition_count;
5479 mb_type= p_mb_type_info[mb_type].type;
5481 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5482 goto decode_intra_mb;
5485 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5486 if(h->slice_type == FF_SI_TYPE && mb_type)
5488 assert(h->slice_type_nos == FF_I_TYPE);
5490 partition_count = 0;
5491 cbp= i_mb_type_info[mb_type].cbp;
5492 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5493 mb_type= i_mb_type_info[mb_type].type;
5496 mb_type |= MB_TYPE_INTERLACED;
5498 h->slice_table[ mb_xy ]= h->slice_num;
5500 if(IS_INTRA_PCM(mb_type)) {
5503 // We assume these blocks are very rare so we do not optimize it.
5504 // FIXME The two following lines get the bitstream position in the cabac
5505 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5506 ptr= h->cabac.bytestream;
5507 if(h->cabac.low&0x1) ptr--;
5509 if(h->cabac.low&0x1FF) ptr--;
5512 // The pixels are stored in the same order as levels in h->mb array.
5513 memcpy(h->mb, ptr, 256); ptr+=256;
5515 memcpy(h->mb+128, ptr, 128); ptr+=128;
5518 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5520 // All blocks are present
5521 h->cbp_table[mb_xy] = 0x1ef;
5522 h->chroma_pred_mode_table[mb_xy] = 0;
5523 // In deblocking, the quantizer is 0
5524 s->current_picture.qscale_table[mb_xy]= 0;
5525 // All coeffs are present
5526 memset(h->non_zero_count[mb_xy], 16, 16);
5527 s->current_picture.mb_type[mb_xy]= mb_type;
5528 h->last_qscale_diff = 0;
5533 h->ref_count[0] <<= 1;
5534 h->ref_count[1] <<= 1;
5537 fill_caches(h, mb_type, 0);
5539 if( IS_INTRA( mb_type ) ) {
5541 if( IS_INTRA4x4( mb_type ) ) {
5542 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5543 mb_type |= MB_TYPE_8x8DCT;
5544 for( i = 0; i < 16; i+=4 ) {
5545 int pred = pred_intra_mode( h, i );
5546 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5547 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5550 for( i = 0; i < 16; i++ ) {
5551 int pred = pred_intra_mode( h, i );
5552 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5554 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5557 write_back_intra_pred_mode(h);
5558 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5560 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5561 if( h->intra16x16_pred_mode < 0 ) return -1;
5564 h->chroma_pred_mode_table[mb_xy] =
5565 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5567 pred_mode= check_intra_pred_mode( h, pred_mode );
5568 if( pred_mode < 0 ) return -1;
5569 h->chroma_pred_mode= pred_mode;
5571 } else if( partition_count == 4 ) {
5572 int i, j, sub_partition_count[4], list, ref[2][4];
5574 if( h->slice_type_nos == FF_B_TYPE ) {
5575 for( i = 0; i < 4; i++ ) {
5576 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5577 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5578 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5580 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5581 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5582 pred_direct_motion(h, &mb_type);
5583 h->ref_cache[0][scan8[4]] =
5584 h->ref_cache[1][scan8[4]] =
5585 h->ref_cache[0][scan8[12]] =
5586 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5587 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5588 for( i = 0; i < 4; i++ )
5589 if( IS_DIRECT(h->sub_mb_type[i]) )
5590 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5594 for( i = 0; i < 4; i++ ) {
5595 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5596 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5597 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5601 for( list = 0; list < h->list_count; list++ ) {
5602 for( i = 0; i < 4; i++ ) {
5603 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5604 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5605 if( h->ref_count[list] > 1 ){
5606 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5607 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5608 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5616 h->ref_cache[list][ scan8[4*i]+1 ]=
5617 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5622 dct8x8_allowed = get_dct8x8_allowed(h);
5624 for(list=0; list<h->list_count; list++){
5626 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5627 if(IS_DIRECT(h->sub_mb_type[i])){
5628 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5632 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5633 const int sub_mb_type= h->sub_mb_type[i];
5634 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5635 for(j=0; j<sub_partition_count[i]; j++){
5638 const int index= 4*i + block_width*j;
5639 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5640 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5641 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5643 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5644 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5645 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5647 if(IS_SUB_8X8(sub_mb_type)){
5649 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5651 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5654 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5656 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5657 }else if(IS_SUB_8X4(sub_mb_type)){
5658 mv_cache[ 1 ][0]= mx;
5659 mv_cache[ 1 ][1]= my;
5661 mvd_cache[ 1 ][0]= mx - mpx;
5662 mvd_cache[ 1 ][1]= my - mpy;
5663 }else if(IS_SUB_4X8(sub_mb_type)){
5664 mv_cache[ 8 ][0]= mx;
5665 mv_cache[ 8 ][1]= my;
5667 mvd_cache[ 8 ][0]= mx - mpx;
5668 mvd_cache[ 8 ][1]= my - mpy;
5670 mv_cache[ 0 ][0]= mx;
5671 mv_cache[ 0 ][1]= my;
5673 mvd_cache[ 0 ][0]= mx - mpx;
5674 mvd_cache[ 0 ][1]= my - mpy;
5677 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5678 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5679 p[0] = p[1] = p[8] = p[9] = 0;
5680 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5684 } else if( IS_DIRECT(mb_type) ) {
5685 pred_direct_motion(h, &mb_type);
5686 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5687 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5688 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5690 int list, mx, my, i, mpx, mpy;
5691 if(IS_16X16(mb_type)){
5692 for(list=0; list<h->list_count; list++){
5693 if(IS_DIR(mb_type, 0, list)){
5695 if(h->ref_count[list] > 1){
5696 ref= decode_cabac_mb_ref(h, list, 0);
5697 if(ref >= (unsigned)h->ref_count[list]){
5698 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5703 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5705 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5707 for(list=0; list<h->list_count; list++){
5708 if(IS_DIR(mb_type, 0, list)){
5709 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5711 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5712 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5713 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5715 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5716 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5718 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5721 else if(IS_16X8(mb_type)){
5722 for(list=0; list<h->list_count; list++){
5724 if(IS_DIR(mb_type, i, list)){
5726 if(h->ref_count[list] > 1){
5727 ref= decode_cabac_mb_ref( h, list, 8*i );
5728 if(ref >= (unsigned)h->ref_count[list]){
5729 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5734 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5736 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5739 for(list=0; list<h->list_count; list++){
5741 if(IS_DIR(mb_type, i, list)){
5742 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5743 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5744 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5745 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5747 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5748 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5750 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5751 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5756 assert(IS_8X16(mb_type));
5757 for(list=0; list<h->list_count; list++){
5759 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5761 if(h->ref_count[list] > 1){
5762 ref= decode_cabac_mb_ref( h, list, 4*i );
5763 if(ref >= (unsigned)h->ref_count[list]){
5764 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5769 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5771 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5774 for(list=0; list<h->list_count; list++){
5776 if(IS_DIR(mb_type, i, list)){
5777 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5778 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5779 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5781 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5782 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5783 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5785 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5786 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5793 if( IS_INTER( mb_type ) ) {
5794 h->chroma_pred_mode_table[mb_xy] = 0;
5795 write_back_motion( h, mb_type );
5798 if( !IS_INTRA16x16( mb_type ) ) {
5799 cbp = decode_cabac_mb_cbp_luma( h );
5801 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5804 h->cbp_table[mb_xy] = h->cbp = cbp;
5806 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5807 if( decode_cabac_mb_transform_size( h ) )
5808 mb_type |= MB_TYPE_8x8DCT;
5810 s->current_picture.mb_type[mb_xy]= mb_type;
5812 if( cbp || IS_INTRA16x16( mb_type ) ) {
5813 const uint8_t *scan, *scan8x8, *dc_scan;
5814 const uint32_t *qmul;
5817 if(IS_INTERLACED(mb_type)){
5818 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5819 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5820 dc_scan= luma_dc_field_scan;
5822 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5823 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5824 dc_scan= luma_dc_zigzag_scan;
5827 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5828 if( dqp == INT_MIN ){
5829 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5833 if(((unsigned)s->qscale) > 51){
5834 if(s->qscale<0) s->qscale+= 52;
5835 else s->qscale-= 52;
5837 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5838 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5840 if( IS_INTRA16x16( mb_type ) ) {
5842 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5843 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5846 qmul = h->dequant4_coeff[0][s->qscale];
5847 for( i = 0; i < 16; i++ ) {
5848 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5849 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5852 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5856 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5857 if( cbp & (1<<i8x8) ) {
5858 if( IS_8x8DCT(mb_type) ) {
5859 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5860 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5862 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5863 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5864 const int index = 4*i8x8 + i4x4;
5865 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5867 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5868 //STOP_TIMER("decode_residual")
5872 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5873 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5880 for( c = 0; c < 2; c++ ) {
5881 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5882 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5888 for( c = 0; c < 2; c++ ) {
5889 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5890 for( i = 0; i < 4; i++ ) {
5891 const int index = 16 + 4 * c + i;
5892 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5893 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5897 uint8_t * const nnz= &h->non_zero_count_cache[0];
5898 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5899 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5902 uint8_t * const nnz= &h->non_zero_count_cache[0];
5903 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5904 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5905 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5906 h->last_qscale_diff = 0;
5909 s->current_picture.qscale_table[mb_xy]= s->qscale;
5910 write_back_non_zero_count(h);
5913 h->ref_count[0] >>= 1;
5914 h->ref_count[1] >>= 1;
5921 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5922 const int index_a = qp + h->slice_alpha_c0_offset;
5923 const int alpha = (alpha_table+52)[index_a];
5924 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5928 tc[0] = (tc0_table+52)[index_a][bS[0]];
5929 tc[1] = (tc0_table+52)[index_a][bS[1]];
5930 tc[2] = (tc0_table+52)[index_a][bS[2]];
5931 tc[3] = (tc0_table+52)[index_a][bS[3]];
5932 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5934 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5937 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5938 const int index_a = qp + h->slice_alpha_c0_offset;
5939 const int alpha = (alpha_table+52)[index_a];
5940 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5944 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5945 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5946 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5947 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5948 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5950 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5954 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5956 for( i = 0; i < 16; i++, pix += stride) {
5962 int bS_index = (i >> 1);
5965 bS_index |= (i & 1);
5968 if( bS[bS_index] == 0 ) {
5972 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5973 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5974 alpha = (alpha_table+52)[index_a];
5975 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5977 if( bS[bS_index] < 4 ) {
5978 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5979 const int p0 = pix[-1];
5980 const int p1 = pix[-2];
5981 const int p2 = pix[-3];
5982 const int q0 = pix[0];
5983 const int q1 = pix[1];
5984 const int q2 = pix[2];
5986 if( FFABS( p0 - q0 ) < alpha &&
5987 FFABS( p1 - p0 ) < beta &&
5988 FFABS( q1 - q0 ) < beta ) {
5992 if( FFABS( p2 - p0 ) < beta ) {
5993 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5996 if( FFABS( q2 - q0 ) < beta ) {
5997 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6001 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6002 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6003 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6004 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6007 const int p0 = pix[-1];
6008 const int p1 = pix[-2];
6009 const int p2 = pix[-3];
6011 const int q0 = pix[0];
6012 const int q1 = pix[1];
6013 const int q2 = pix[2];
6015 if( FFABS( p0 - q0 ) < alpha &&
6016 FFABS( p1 - p0 ) < beta &&
6017 FFABS( q1 - q0 ) < beta ) {
6019 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6020 if( FFABS( p2 - p0 ) < beta)
6022 const int p3 = pix[-4];
6024 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6025 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6026 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6029 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6031 if( FFABS( q2 - q0 ) < beta)
6033 const int q3 = pix[3];
6035 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6036 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6037 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6040 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6044 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6045 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6047 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6052 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6054 for( i = 0; i < 8; i++, pix += stride) {
6062 if( bS[bS_index] == 0 ) {
6066 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6067 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6068 alpha = (alpha_table+52)[index_a];
6069 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6071 if( bS[bS_index] < 4 ) {
6072 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6073 const int p0 = pix[-1];
6074 const int p1 = pix[-2];
6075 const int q0 = pix[0];
6076 const int q1 = pix[1];
6078 if( FFABS( p0 - q0 ) < alpha &&
6079 FFABS( p1 - p0 ) < beta &&
6080 FFABS( q1 - q0 ) < beta ) {
6081 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6083 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6084 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6085 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6088 const int p0 = pix[-1];
6089 const int p1 = pix[-2];
6090 const int q0 = pix[0];
6091 const int q1 = pix[1];
6093 if( FFABS( p0 - q0 ) < alpha &&
6094 FFABS( p1 - p0 ) < beta &&
6095 FFABS( q1 - q0 ) < beta ) {
6097 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6098 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6099 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6105 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6106 const int index_a = qp + h->slice_alpha_c0_offset;
6107 const int alpha = (alpha_table+52)[index_a];
6108 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6112 tc[0] = (tc0_table+52)[index_a][bS[0]];
6113 tc[1] = (tc0_table+52)[index_a][bS[1]];
6114 tc[2] = (tc0_table+52)[index_a][bS[2]];
6115 tc[3] = (tc0_table+52)[index_a][bS[3]];
6116 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6118 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6122 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6123 const int index_a = qp + h->slice_alpha_c0_offset;
6124 const int alpha = (alpha_table+52)[index_a];
6125 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6129 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6130 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6131 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6132 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6133 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6135 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6139 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6140 MpegEncContext * const s = &h->s;
6141 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6143 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6147 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6148 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6149 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6150 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6151 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6154 assert(!FRAME_MBAFF);
6156 mb_type = s->current_picture.mb_type[mb_xy];
6157 qp = s->current_picture.qscale_table[mb_xy];
6158 qp0 = s->current_picture.qscale_table[mb_xy-1];
6159 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6160 qpc = get_chroma_qp( h, 0, qp );
6161 qpc0 = get_chroma_qp( h, 0, qp0 );
6162 qpc1 = get_chroma_qp( h, 0, qp1 );
6163 qp0 = (qp + qp0 + 1) >> 1;
6164 qp1 = (qp + qp1 + 1) >> 1;
6165 qpc0 = (qpc + qpc0 + 1) >> 1;
6166 qpc1 = (qpc + qpc1 + 1) >> 1;
6167 qp_thresh = 15 - h->slice_alpha_c0_offset;
6168 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6169 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6172 if( IS_INTRA(mb_type) ) {
6173 int16_t bS4[4] = {4,4,4,4};
6174 int16_t bS3[4] = {3,3,3,3};
6175 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6176 if( IS_8x8DCT(mb_type) ) {
6177 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6178 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6179 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6180 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6182 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6183 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6184 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6185 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6186 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6187 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6188 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6189 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6191 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6192 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6193 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6194 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6195 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6196 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6197 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6198 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6201 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6202 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6204 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6206 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6208 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6209 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6210 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6211 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6213 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6214 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6215 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6216 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6218 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6219 bSv[0][0] = 0x0004000400040004ULL;
6220 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6221 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6223 #define FILTER(hv,dir,edge)\
6224 if(bSv[dir][edge]) {\
6225 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6227 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6228 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6234 } else if( IS_8x8DCT(mb_type) ) {
6254 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6255 MpegEncContext * const s = &h->s;
6257 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6258 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6259 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6260 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6261 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6263 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6264 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6265 // how often to recheck mv-based bS when iterating between edges
6266 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6267 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6268 // how often to recheck mv-based bS when iterating along each edge
6269 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6271 if (first_vertical_edge_done) {
6275 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6278 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6279 && !IS_INTERLACED(mb_type)
6280 && IS_INTERLACED(mbm_type)
6282 // This is a special case in the norm where the filtering must
6283 // be done twice (one each of the field) even if we are in a
6284 // frame macroblock.
6286 static const int nnz_idx[4] = {4,5,6,3};
6287 unsigned int tmp_linesize = 2 * linesize;
6288 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6289 int mbn_xy = mb_xy - 2 * s->mb_stride;
6294 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6295 if( IS_INTRA(mb_type) ||
6296 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6297 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6299 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6300 for( i = 0; i < 4; i++ ) {
6301 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6302 mbn_nnz[nnz_idx[i]] != 0 )
6308 // Do not use s->qscale as luma quantizer because it has not the same
6309 // value in IPCM macroblocks.
6310 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6311 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6312 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6313 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6314 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6315 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6316 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6317 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6324 for( edge = start; edge < edges; edge++ ) {
6325 /* mbn_xy: neighbor macroblock */
6326 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6327 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6328 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6332 if( (edge&1) && IS_8x8DCT(mb_type) )
6335 if( IS_INTRA(mb_type) ||
6336 IS_INTRA(mbn_type) ) {
6339 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6340 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6349 bS[0] = bS[1] = bS[2] = bS[3] = value;
6354 if( edge & mask_edge ) {
6355 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6358 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6359 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6362 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6363 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6364 int bn_idx= b_idx - (dir ? 8:1);
6367 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6368 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6369 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6370 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6373 if(h->slice_type_nos == FF_B_TYPE && v){
6375 for( l = 0; !v && l < 2; l++ ) {
6377 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6378 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6379 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6383 bS[0] = bS[1] = bS[2] = bS[3] = v;
6389 for( i = 0; i < 4; i++ ) {
6390 int x = dir == 0 ? edge : i;
6391 int y = dir == 0 ? i : edge;
6392 int b_idx= 8 + 4 + x + 8*y;
6393 int bn_idx= b_idx - (dir ? 8:1);
6395 if( h->non_zero_count_cache[b_idx] |
6396 h->non_zero_count_cache[bn_idx] ) {
6402 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6403 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6404 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6405 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6411 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6413 for( l = 0; l < 2; l++ ) {
6415 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6416 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6417 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6426 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6431 // Do not use s->qscale as luma quantizer because it has not the same
6432 // value in IPCM macroblocks.
6433 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6434 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6435 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6436 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6438 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6439 if( (edge&1) == 0 ) {
6440 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6441 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6442 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6443 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6446 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6447 if( (edge&1) == 0 ) {
6448 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6449 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6450 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6451 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6457 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6458 MpegEncContext * const s = &h->s;
6459 const int mb_xy= mb_x + mb_y*s->mb_stride;
6460 const int mb_type = s->current_picture.mb_type[mb_xy];
6461 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6462 int first_vertical_edge_done = 0;
6465 //for sufficiently low qp, filtering wouldn't do anything
6466 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6468 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6469 int qp = s->current_picture.qscale_table[mb_xy];
6471 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6472 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6477 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6478 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6479 int top_type, left_type[2];
6480 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6481 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6482 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6484 if(IS_8x8DCT(top_type)){
6485 h->non_zero_count_cache[4+8*0]=
6486 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6487 h->non_zero_count_cache[6+8*0]=
6488 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6490 if(IS_8x8DCT(left_type[0])){
6491 h->non_zero_count_cache[3+8*1]=
6492 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6494 if(IS_8x8DCT(left_type[1])){
6495 h->non_zero_count_cache[3+8*3]=
6496 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6499 if(IS_8x8DCT(mb_type)){
6500 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6501 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6503 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6504 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6506 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6507 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6509 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6510 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6515 // left mb is in picture
6516 && h->slice_table[mb_xy-1] != 0xFFFF
6517 // and current and left pair do not have the same interlaced type
6518 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6519 // and left mb is in the same slice if deblocking_filter == 2
6520 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6521 /* First vertical edge is different in MBAFF frames
6522 * There are 8 different bS to compute and 2 different Qp
6524 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6525 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6530 int mb_qp, mbn0_qp, mbn1_qp;
6532 first_vertical_edge_done = 1;
6534 if( IS_INTRA(mb_type) )
6535 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6537 for( i = 0; i < 8; i++ ) {
6538 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6540 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6542 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6543 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6544 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6546 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6553 mb_qp = s->current_picture.qscale_table[mb_xy];
6554 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6555 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6556 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6557 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6558 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6559 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6560 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6561 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6562 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6563 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6564 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6565 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6568 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6569 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6570 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6571 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6572 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6576 for( dir = 0; dir < 2; dir++ )
6577 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6579 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6580 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6584 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6585 H264Context *h = *(void**)arg;
6586 MpegEncContext * const s = &h->s;
6587 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6591 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6592 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6594 if( h->pps.cabac ) {
6598 align_get_bits( &s->gb );
6601 ff_init_cabac_states( &h->cabac);
6602 ff_init_cabac_decoder( &h->cabac,
6603 s->gb.buffer + get_bits_count(&s->gb)/8,
6604 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6605 /* calculate pre-state */
6606 for( i= 0; i < 460; i++ ) {
6608 if( h->slice_type_nos == FF_I_TYPE )
6609 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6611 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6614 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6616 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6621 int ret = decode_mb_cabac(h);
6623 //STOP_TIMER("decode_mb_cabac")
6625 if(ret>=0) hl_decode_mb(h);
6627 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6630 ret = decode_mb_cabac(h);
6632 if(ret>=0) hl_decode_mb(h);
6635 eos = get_cabac_terminate( &h->cabac );
6637 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6638 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6639 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6643 if( ++s->mb_x >= s->mb_width ) {
6645 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6647 if(FIELD_OR_MBAFF_PICTURE) {
6652 if( eos || s->mb_y >= s->mb_height ) {
6653 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6654 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6661 int ret = decode_mb_cavlc(h);
6663 if(ret>=0) hl_decode_mb(h);
6665 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6667 ret = decode_mb_cavlc(h);
6669 if(ret>=0) hl_decode_mb(h);
6674 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6675 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6680 if(++s->mb_x >= s->mb_width){
6682 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6684 if(FIELD_OR_MBAFF_PICTURE) {
6687 if(s->mb_y >= s->mb_height){
6688 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6690 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6691 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6695 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6702 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6703 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6704 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6705 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6709 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6718 for(;s->mb_y < s->mb_height; s->mb_y++){
6719 for(;s->mb_x < s->mb_width; s->mb_x++){
6720 int ret= decode_mb(h);
6725 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6726 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6731 if(++s->mb_x >= s->mb_width){
6733 if(++s->mb_y >= s->mb_height){
6734 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6735 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6739 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6746 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6747 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6748 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6752 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6759 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6762 return -1; //not reached
6765 static int decode_picture_timing(H264Context *h){
6766 MpegEncContext * const s = &h->s;
6767 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6768 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6769 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6771 if(h->sps.pic_struct_present_flag){
6772 unsigned int i, num_clock_ts;
6773 h->sei_pic_struct = get_bits(&s->gb, 4);
6775 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6778 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6780 for (i = 0 ; i < num_clock_ts ; i++){
6781 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6782 unsigned int full_timestamp_flag;
6783 skip_bits(&s->gb, 2); /* ct_type */
6784 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6785 skip_bits(&s->gb, 5); /* counting_type */
6786 full_timestamp_flag = get_bits(&s->gb, 1);
6787 skip_bits(&s->gb, 1); /* discontinuity_flag */
6788 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6789 skip_bits(&s->gb, 8); /* n_frames */
6790 if(full_timestamp_flag){
6791 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6792 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6793 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6795 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6796 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6797 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6798 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6799 if(get_bits(&s->gb, 1)) /* hours_flag */
6800 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6804 if(h->sps.time_offset_length > 0)
6805 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6812 static int decode_unregistered_user_data(H264Context *h, int size){
6813 MpegEncContext * const s = &h->s;
6814 uint8_t user_data[16+256];
6820 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6821 user_data[i]= get_bits(&s->gb, 8);
6825 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6826 if(e==1 && build>=0)
6827 h->x264_build= build;
6829 if(s->avctx->debug & FF_DEBUG_BUGS)
6830 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6833 skip_bits(&s->gb, 8);
6838 static int decode_sei(H264Context *h){
6839 MpegEncContext * const s = &h->s;
6841 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6846 type+= show_bits(&s->gb, 8);
6847 }while(get_bits(&s->gb, 8) == 255);
6851 size+= show_bits(&s->gb, 8);
6852 }while(get_bits(&s->gb, 8) == 255);
6855 case 1: // Picture timing SEI
6856 if(decode_picture_timing(h) < 0)
6860 if(decode_unregistered_user_data(h, size) < 0)
6864 skip_bits(&s->gb, 8*size);
6867 //FIXME check bits here
6868 align_get_bits(&s->gb);
6874 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6875 MpegEncContext * const s = &h->s;
6877 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6879 if(cpb_count > 32U){
6880 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6884 get_bits(&s->gb, 4); /* bit_rate_scale */
6885 get_bits(&s->gb, 4); /* cpb_size_scale */
6886 for(i=0; i<cpb_count; i++){
6887 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6888 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6889 get_bits1(&s->gb); /* cbr_flag */
6891 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6892 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6893 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6894 sps->time_offset_length = get_bits(&s->gb, 5);
6898 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6899 MpegEncContext * const s = &h->s;
6900 int aspect_ratio_info_present_flag;
6901 unsigned int aspect_ratio_idc;
6903 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6905 if( aspect_ratio_info_present_flag ) {
6906 aspect_ratio_idc= get_bits(&s->gb, 8);
6907 if( aspect_ratio_idc == EXTENDED_SAR ) {
6908 sps->sar.num= get_bits(&s->gb, 16);
6909 sps->sar.den= get_bits(&s->gb, 16);
6910 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6911 sps->sar= pixel_aspect[aspect_ratio_idc];
6913 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6920 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6922 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6923 get_bits1(&s->gb); /* overscan_appropriate_flag */
6926 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6927 get_bits(&s->gb, 3); /* video_format */
6928 get_bits1(&s->gb); /* video_full_range_flag */
6929 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6930 get_bits(&s->gb, 8); /* colour_primaries */
6931 get_bits(&s->gb, 8); /* transfer_characteristics */
6932 get_bits(&s->gb, 8); /* matrix_coefficients */
6936 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6937 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6938 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6941 sps->timing_info_present_flag = get_bits1(&s->gb);
6942 if(sps->timing_info_present_flag){
6943 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6944 sps->time_scale = get_bits_long(&s->gb, 32);
6945 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6948 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6949 if(sps->nal_hrd_parameters_present_flag)
6950 if(decode_hrd_parameters(h, sps) < 0)
6952 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6953 if(sps->vcl_hrd_parameters_present_flag)
6954 if(decode_hrd_parameters(h, sps) < 0)
6956 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6957 get_bits1(&s->gb); /* low_delay_hrd_flag */
6958 sps->pic_struct_present_flag = get_bits1(&s->gb);
6960 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6961 if(sps->bitstream_restriction_flag){
6962 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6963 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6964 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6965 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6966 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6967 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6968 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6970 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6971 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6979 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6980 const uint8_t *jvt_list, const uint8_t *fallback_list){
6981 MpegEncContext * const s = &h->s;
6982 int i, last = 8, next = 8;
6983 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6984 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6985 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6987 for(i=0;i<size;i++){
6989 next = (last + get_se_golomb(&s->gb)) & 0xff;
6990 if(!i && !next){ /* matrix not written, we use the preset one */
6991 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6994 last = factors[scan[i]] = next ? next : last;
6998 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6999 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7000 MpegEncContext * const s = &h->s;
7001 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7002 const uint8_t *fallback[4] = {
7003 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7004 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7005 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7006 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7008 if(get_bits1(&s->gb)){
7009 sps->scaling_matrix_present |= is_sps;
7010 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7011 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7012 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7013 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7014 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7015 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7016 if(is_sps || pps->transform_8x8_mode){
7017 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7018 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7023 static inline int decode_seq_parameter_set(H264Context *h){
7024 MpegEncContext * const s = &h->s;
7025 int profile_idc, level_idc;
7026 unsigned int sps_id;
7030 profile_idc= get_bits(&s->gb, 8);
7031 get_bits1(&s->gb); //constraint_set0_flag
7032 get_bits1(&s->gb); //constraint_set1_flag
7033 get_bits1(&s->gb); //constraint_set2_flag
7034 get_bits1(&s->gb); //constraint_set3_flag
7035 get_bits(&s->gb, 4); // reserved
7036 level_idc= get_bits(&s->gb, 8);
7037 sps_id= get_ue_golomb_31(&s->gb);
7039 if(sps_id >= MAX_SPS_COUNT) {
7040 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7043 sps= av_mallocz(sizeof(SPS));
7047 sps->profile_idc= profile_idc;
7048 sps->level_idc= level_idc;
7050 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7051 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7052 sps->scaling_matrix_present = 0;
7054 if(sps->profile_idc >= 100){ //high profile
7055 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7056 if(sps->chroma_format_idc == 3)
7057 get_bits1(&s->gb); //residual_color_transform_flag
7058 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7059 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7060 sps->transform_bypass = get_bits1(&s->gb);
7061 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7063 sps->chroma_format_idc= 1;
7066 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7067 sps->poc_type= get_ue_golomb_31(&s->gb);
7069 if(sps->poc_type == 0){ //FIXME #define
7070 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7071 } else if(sps->poc_type == 1){//FIXME #define
7072 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7073 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7074 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7075 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7077 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7078 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7082 for(i=0; i<sps->poc_cycle_length; i++)
7083 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7084 }else if(sps->poc_type != 2){
7085 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7089 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7090 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7091 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7094 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7095 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7096 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7097 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7098 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7099 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7103 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7104 if(!sps->frame_mbs_only_flag)
7105 sps->mb_aff= get_bits1(&s->gb);
7109 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7111 #ifndef ALLOW_INTERLACE
7113 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7115 sps->crop= get_bits1(&s->gb);
7117 sps->crop_left = get_ue_golomb(&s->gb);
7118 sps->crop_right = get_ue_golomb(&s->gb);
7119 sps->crop_top = get_ue_golomb(&s->gb);
7120 sps->crop_bottom= get_ue_golomb(&s->gb);
7121 if(sps->crop_left || sps->crop_top){
7122 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7124 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7125 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7131 sps->crop_bottom= 0;
7134 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7135 if( sps->vui_parameters_present_flag )
7136 decode_vui_parameters(h, sps);
7138 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7139 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7140 sps_id, sps->profile_idc, sps->level_idc,
7142 sps->ref_frame_count,
7143 sps->mb_width, sps->mb_height,
7144 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7145 sps->direct_8x8_inference_flag ? "8B8" : "",
7146 sps->crop_left, sps->crop_right,
7147 sps->crop_top, sps->crop_bottom,
7148 sps->vui_parameters_present_flag ? "VUI" : "",
7149 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7152 av_free(h->sps_buffers[sps_id]);
7153 h->sps_buffers[sps_id]= sps;
7161 build_qp_table(PPS *pps, int t, int index)
7164 for(i = 0; i < 52; i++)
7165 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7168 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7169 MpegEncContext * const s = &h->s;
7170 unsigned int pps_id= get_ue_golomb(&s->gb);
7173 if(pps_id >= MAX_PPS_COUNT) {
7174 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7178 pps= av_mallocz(sizeof(PPS));
7181 pps->sps_id= get_ue_golomb_31(&s->gb);
7182 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7183 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7187 pps->cabac= get_bits1(&s->gb);
7188 pps->pic_order_present= get_bits1(&s->gb);
7189 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7190 if(pps->slice_group_count > 1 ){
7191 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7192 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7193 switch(pps->mb_slice_group_map_type){
7196 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7197 | run_length[ i ] |1 |ue(v) |
7202 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7204 | top_left_mb[ i ] |1 |ue(v) |
7205 | bottom_right_mb[ i ] |1 |ue(v) |
7213 | slice_group_change_direction_flag |1 |u(1) |
7214 | slice_group_change_rate_minus1 |1 |ue(v) |
7219 | slice_group_id_cnt_minus1 |1 |ue(v) |
7220 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7222 | slice_group_id[ i ] |1 |u(v) |
7227 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7228 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7229 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7230 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7234 pps->weighted_pred= get_bits1(&s->gb);
7235 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7236 pps->init_qp= get_se_golomb(&s->gb) + 26;
7237 pps->init_qs= get_se_golomb(&s->gb) + 26;
7238 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7239 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7240 pps->constrained_intra_pred= get_bits1(&s->gb);
7241 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7243 pps->transform_8x8_mode= 0;
7244 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7245 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7246 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7248 if(get_bits_count(&s->gb) < bit_length){
7249 pps->transform_8x8_mode= get_bits1(&s->gb);
7250 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7251 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7253 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7256 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7257 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7258 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7259 h->pps.chroma_qp_diff= 1;
7261 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7262 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7263 pps_id, pps->sps_id,
7264 pps->cabac ? "CABAC" : "CAVLC",
7265 pps->slice_group_count,
7266 pps->ref_count[0], pps->ref_count[1],
7267 pps->weighted_pred ? "weighted" : "",
7268 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7269 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7270 pps->constrained_intra_pred ? "CONSTR" : "",
7271 pps->redundant_pic_cnt_present ? "REDU" : "",
7272 pps->transform_8x8_mode ? "8x8DCT" : ""
7276 av_free(h->pps_buffers[pps_id]);
7277 h->pps_buffers[pps_id]= pps;
7285 * Call decode_slice() for each context.
7287 * @param h h264 master context
7288 * @param context_count number of contexts to execute
7290 static void execute_decode_slices(H264Context *h, int context_count){
7291 MpegEncContext * const s = &h->s;
7292 AVCodecContext * const avctx= s->avctx;
7296 if(avctx->codec_id == CODEC_ID_H264_VDPAU)
7298 if(context_count == 1) {
7299 decode_slice(avctx, &h);
7301 for(i = 1; i < context_count; i++) {
7302 hx = h->thread_context[i];
7303 hx->s.error_recognition = avctx->error_recognition;
7304 hx->s.error_count = 0;
7307 avctx->execute(avctx, (void *)decode_slice,
7308 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7310 /* pull back stuff from slices to master context */
7311 hx = h->thread_context[context_count - 1];
7312 s->mb_x = hx->s.mb_x;
7313 s->mb_y = hx->s.mb_y;
7314 s->dropable = hx->s.dropable;
7315 s->picture_structure = hx->s.picture_structure;
7316 for(i = 1; i < context_count; i++)
7317 h->s.error_count += h->thread_context[i]->s.error_count;
7322 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7323 MpegEncContext * const s = &h->s;
7324 AVCodecContext * const avctx= s->avctx;
7326 H264Context *hx; ///< thread context
7327 int context_count = 0;
7329 h->max_contexts = avctx->thread_count;
7332 for(i=0; i<50; i++){
7333 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7336 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7337 h->current_slice = 0;
7338 if (!s->first_field)
7339 s->current_picture_ptr= NULL;
7351 if(buf_index >= buf_size) break;
7353 for(i = 0; i < h->nal_length_size; i++)
7354 nalsize = (nalsize << 8) | buf[buf_index++];
7355 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7360 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7365 // start code prefix search
7366 for(; buf_index + 3 < buf_size; buf_index++){
7367 // This should always succeed in the first iteration.
7368 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7372 if(buf_index+3 >= buf_size) break;
7377 hx = h->thread_context[context_count];
7379 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7380 if (ptr==NULL || dst_length < 0){
7383 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7385 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7387 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7388 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7391 if (h->is_avc && (nalsize != consumed)){
7392 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7396 buf_index += consumed;
7398 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7399 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7404 switch(hx->nal_unit_type){
7406 if (h->nal_unit_type != NAL_IDR_SLICE) {
7407 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7410 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7412 init_get_bits(&hx->s.gb, ptr, bit_length);
7414 hx->inter_gb_ptr= &hx->s.gb;
7415 hx->s.data_partitioning = 0;
7417 if((err = decode_slice_header(hx, h)))
7420 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7421 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7422 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7423 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7424 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7425 && avctx->skip_frame < AVDISCARD_ALL){
7426 if(ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU){
7427 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7428 ff_vdpau_h264_add_data_chunk(h, start_code, sizeof(start_code));
7429 ff_vdpau_h264_add_data_chunk(h, &buf[buf_index - consumed], consumed );
7435 init_get_bits(&hx->s.gb, ptr, bit_length);
7437 hx->inter_gb_ptr= NULL;
7438 hx->s.data_partitioning = 1;
7440 err = decode_slice_header(hx, h);
7443 init_get_bits(&hx->intra_gb, ptr, bit_length);
7444 hx->intra_gb_ptr= &hx->intra_gb;
7447 init_get_bits(&hx->inter_gb, ptr, bit_length);
7448 hx->inter_gb_ptr= &hx->inter_gb;
7450 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7451 && s->context_initialized
7453 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7454 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7455 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7456 && avctx->skip_frame < AVDISCARD_ALL)
7460 init_get_bits(&s->gb, ptr, bit_length);
7464 init_get_bits(&s->gb, ptr, bit_length);
7465 decode_seq_parameter_set(h);
7467 if(s->flags& CODEC_FLAG_LOW_DELAY)
7470 if(avctx->has_b_frames < 2)
7471 avctx->has_b_frames= !s->low_delay;
7474 init_get_bits(&s->gb, ptr, bit_length);
7476 decode_picture_parameter_set(h, bit_length);
7480 case NAL_END_SEQUENCE:
7481 case NAL_END_STREAM:
7482 case NAL_FILLER_DATA:
7484 case NAL_AUXILIARY_SLICE:
7487 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7490 if(context_count == h->max_contexts) {
7491 execute_decode_slices(h, context_count);
7496 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7498 /* Slice could not be decoded in parallel mode, copy down
7499 * NAL unit stuff to context 0 and restart. Note that
7500 * rbsp_buffer is not transferred, but since we no longer
7501 * run in parallel mode this should not be an issue. */
7502 h->nal_unit_type = hx->nal_unit_type;
7503 h->nal_ref_idc = hx->nal_ref_idc;
7509 execute_decode_slices(h, context_count);
7514 * returns the number of bytes consumed for building the current frame
7516 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7517 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7518 if(pos+10>buf_size) pos=buf_size; // oops ;)
7523 static int decode_frame(AVCodecContext *avctx,
7524 void *data, int *data_size,
7525 const uint8_t *buf, int buf_size)
7527 H264Context *h = avctx->priv_data;
7528 MpegEncContext *s = &h->s;
7529 AVFrame *pict = data;
7532 s->flags= avctx->flags;
7533 s->flags2= avctx->flags2;
7535 /* end of stream, output what is still in the buffers */
7536 if (buf_size == 0) {
7540 //FIXME factorize this with the output code below
7541 out = h->delayed_pic[0];
7543 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7544 if(h->delayed_pic[i]->poc < out->poc){
7545 out = h->delayed_pic[i];
7549 for(i=out_idx; h->delayed_pic[i]; i++)
7550 h->delayed_pic[i] = h->delayed_pic[i+1];
7553 *data_size = sizeof(AVFrame);
7554 *pict= *(AVFrame*)out;
7560 if(h->is_avc && !h->got_avcC) {
7561 int i, cnt, nalsize;
7562 unsigned char *p = avctx->extradata;
7563 if(avctx->extradata_size < 7) {
7564 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7568 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7571 /* sps and pps in the avcC always have length coded with 2 bytes,
7572 so put a fake nal_length_size = 2 while parsing them */
7573 h->nal_length_size = 2;
7574 // Decode sps from avcC
7575 cnt = *(p+5) & 0x1f; // Number of sps
7577 for (i = 0; i < cnt; i++) {
7578 nalsize = AV_RB16(p) + 2;
7579 if(decode_nal_units(h, p, nalsize) < 0) {
7580 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7585 // Decode pps from avcC
7586 cnt = *(p++); // Number of pps
7587 for (i = 0; i < cnt; i++) {
7588 nalsize = AV_RB16(p) + 2;
7589 if(decode_nal_units(h, p, nalsize) != nalsize) {
7590 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7595 // Now store right nal length size, that will be use to parse all other nals
7596 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7597 // Do not reparse avcC
7601 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7602 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7607 buf_index=decode_nal_units(h, buf, buf_size);
7611 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7612 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7613 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7617 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7618 Picture *out = s->current_picture_ptr;
7619 Picture *cur = s->current_picture_ptr;
7620 int i, pics, cross_idr, out_of_order, out_idx;
7624 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7625 s->current_picture_ptr->pict_type= s->pict_type;
7627 if (ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU)
7628 ff_vdpau_h264_set_reference_frames(h);
7631 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7632 h->prev_poc_msb= h->poc_msb;
7633 h->prev_poc_lsb= h->poc_lsb;
7635 h->prev_frame_num_offset= h->frame_num_offset;
7636 h->prev_frame_num= h->frame_num;
7638 if (ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU)
7639 ff_vdpau_h264_picture_complete(h);
7642 * FIXME: Error handling code does not seem to support interlaced
7643 * when slices span multiple rows
7644 * The ff_er_add_slice calls don't work right for bottom
7645 * fields; they cause massive erroneous error concealing
7646 * Error marking covers both fields (top and bottom).
7647 * This causes a mismatched s->error_count
7648 * and a bad error table. Further, the error count goes to
7649 * INT_MAX when called for bottom field, because mb_y is
7650 * past end by one (callers fault) and resync_mb_y != 0
7651 * causes problems for the first MB line, too.
7658 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7659 /* Wait for second field. */
7663 cur->repeat_pict = 0;
7665 /* Signal interlacing information externally. */
7666 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7667 if(h->sps.pic_struct_present_flag){
7668 switch (h->sei_pic_struct)
7670 case SEI_PIC_STRUCT_FRAME:
7671 cur->interlaced_frame = 0;
7673 case SEI_PIC_STRUCT_TOP_FIELD:
7674 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7675 case SEI_PIC_STRUCT_TOP_BOTTOM:
7676 case SEI_PIC_STRUCT_BOTTOM_TOP:
7677 cur->interlaced_frame = 1;
7679 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7680 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7681 // Signal the possibility of telecined film externally (pic_struct 5,6)
7682 // From these hints, let the applications decide if they apply deinterlacing.
7683 cur->repeat_pict = 1;
7684 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7686 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7687 // Force progressive here, as doubling interlaced frame is a bad idea.
7688 cur->interlaced_frame = 0;
7689 cur->repeat_pict = 2;
7691 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7692 cur->interlaced_frame = 0;
7693 cur->repeat_pict = 4;
7697 /* Derive interlacing flag from used decoding process. */
7698 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7701 if (cur->field_poc[0] != cur->field_poc[1]){
7702 /* Derive top_field_first from field pocs. */
7703 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7705 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7706 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7707 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7708 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7709 cur->top_field_first = 1;
7711 cur->top_field_first = 0;
7713 /* Most likely progressive */
7714 cur->top_field_first = 0;
7718 //FIXME do something with unavailable reference frames
7720 /* Sort B-frames into display order */
7722 if(h->sps.bitstream_restriction_flag
7723 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7724 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7728 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7729 && !h->sps.bitstream_restriction_flag){
7730 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7735 while(h->delayed_pic[pics]) pics++;
7737 assert(pics <= MAX_DELAYED_PIC_COUNT);
7739 h->delayed_pic[pics++] = cur;
7740 if(cur->reference == 0)
7741 cur->reference = DELAYED_PIC_REF;
7743 out = h->delayed_pic[0];
7745 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7746 if(h->delayed_pic[i]->poc < out->poc){
7747 out = h->delayed_pic[i];
7750 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7752 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7754 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7756 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7758 ((!cross_idr && out->poc > h->outputed_poc + 2)
7759 || cur->pict_type == FF_B_TYPE)))
7762 s->avctx->has_b_frames++;
7765 if(out_of_order || pics > s->avctx->has_b_frames){
7766 out->reference &= ~DELAYED_PIC_REF;
7767 for(i=out_idx; h->delayed_pic[i]; i++)
7768 h->delayed_pic[i] = h->delayed_pic[i+1];
7770 if(!out_of_order && pics > s->avctx->has_b_frames){
7771 *data_size = sizeof(AVFrame);
7773 h->outputed_poc = out->poc;
7774 *pict= *(AVFrame*)out;
7776 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7781 assert(pict->data[0] || !*data_size);
7782 ff_print_debug_info(s, pict);
7783 //printf("out %d\n", (int)pict->data[0]);
7786 /* Return the Picture timestamp as the frame number */
7787 /* we subtract 1 because it is added on utils.c */
7788 avctx->frame_number = s->picture_number - 1;
7790 return get_consumed_bytes(s, buf_index, buf_size);
7793 static inline void fill_mb_avail(H264Context *h){
7794 MpegEncContext * const s = &h->s;
7795 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7798 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7799 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7800 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7806 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7807 h->mb_avail[4]= 1; //FIXME move out
7808 h->mb_avail[5]= 0; //FIXME move out
7816 #define SIZE (COUNT*40)
7822 // int int_temp[10000];
7824 AVCodecContext avctx;
7826 dsputil_init(&dsp, &avctx);
7828 init_put_bits(&pb, temp, SIZE);
7829 printf("testing unsigned exp golomb\n");
7830 for(i=0; i<COUNT; i++){
7832 set_ue_golomb(&pb, i);
7833 STOP_TIMER("set_ue_golomb");
7835 flush_put_bits(&pb);
7837 init_get_bits(&gb, temp, 8*SIZE);
7838 for(i=0; i<COUNT; i++){
7841 s= show_bits(&gb, 24);
7844 j= get_ue_golomb(&gb);
7846 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7849 STOP_TIMER("get_ue_golomb");
7853 init_put_bits(&pb, temp, SIZE);
7854 printf("testing signed exp golomb\n");
7855 for(i=0; i<COUNT; i++){
7857 set_se_golomb(&pb, i - COUNT/2);
7858 STOP_TIMER("set_se_golomb");
7860 flush_put_bits(&pb);
7862 init_get_bits(&gb, temp, 8*SIZE);
7863 for(i=0; i<COUNT; i++){
7866 s= show_bits(&gb, 24);
7869 j= get_se_golomb(&gb);
7870 if(j != i - COUNT/2){
7871 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7874 STOP_TIMER("get_se_golomb");
7878 printf("testing 4x4 (I)DCT\n");
7881 uint8_t src[16], ref[16];
7882 uint64_t error= 0, max_error=0;
7884 for(i=0; i<COUNT; i++){
7886 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7887 for(j=0; j<16; j++){
7888 ref[j]= random()%255;
7889 src[j]= random()%255;
7892 h264_diff_dct_c(block, src, ref, 4);
7895 for(j=0; j<16; j++){
7896 // printf("%d ", block[j]);
7897 block[j]= block[j]*4;
7898 if(j&1) block[j]= (block[j]*4 + 2)/5;
7899 if(j&4) block[j]= (block[j]*4 + 2)/5;
7903 s->dsp.h264_idct_add(ref, block, 4);
7904 /* for(j=0; j<16; j++){
7905 printf("%d ", ref[j]);
7909 for(j=0; j<16; j++){
7910 int diff= FFABS(src[j] - ref[j]);
7913 max_error= FFMAX(max_error, diff);
7916 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7917 printf("testing quantizer\n");
7918 for(qp=0; qp<52; qp++){
7920 src1_block[i]= src2_block[i]= random()%255;
7923 printf("Testing NAL layer\n");
7925 uint8_t bitstream[COUNT];
7926 uint8_t nal[COUNT*2];
7928 memset(&h, 0, sizeof(H264Context));
7930 for(i=0; i<COUNT; i++){
7938 for(j=0; j<COUNT; j++){
7939 bitstream[j]= (random() % 255) + 1;
7942 for(j=0; j<zeros; j++){
7943 int pos= random() % COUNT;
7944 while(bitstream[pos] == 0){
7953 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7955 printf("encoding failed\n");
7959 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7963 if(out_length != COUNT){
7964 printf("incorrect length %d %d\n", out_length, COUNT);
7968 if(consumed != nal_length){
7969 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7973 if(memcmp(bitstream, out, COUNT)){
7974 printf("mismatch\n");
7980 printf("Testing RBSP\n");
7988 static av_cold int decode_end(AVCodecContext *avctx)
7990 H264Context *h = avctx->priv_data;
7991 MpegEncContext *s = &h->s;
7994 av_freep(&h->rbsp_buffer[0]);
7995 av_freep(&h->rbsp_buffer[1]);
7996 free_tables(h); //FIXME cleanup init stuff perhaps
7998 for(i = 0; i < MAX_SPS_COUNT; i++)
7999 av_freep(h->sps_buffers + i);
8001 for(i = 0; i < MAX_PPS_COUNT; i++)
8002 av_freep(h->pps_buffers + i);
8006 // memset(h, 0, sizeof(H264Context));
8012 AVCodec h264_decoder = {
8016 sizeof(H264Context),
8021 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8023 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8026 #ifdef CONFIG_H264_VDPAU_DECODER
8027 AVCodec h264_vdpau_decoder = {
8030 CODEC_ID_H264_VDPAU,
8031 sizeof(H264Context),
8036 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8038 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8042 #ifdef CONFIG_SVQ3_DECODER