2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
42 #include "x86/h264_i386.h"
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
86 return (b&0xFFFF) + (a<<16);
88 return (a&0xFFFF) + (b<<16);
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
254 if(!(top_type & type_mask))
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
270 if(!(left_type[i] & type_mask))
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
570 if(!(h->top_samples_available&0x8000)){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if((h->left_samples_available&0x8080) != 0x8080){
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
780 }else if(top_ref==ref){
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
886 pred_motion(h, 0, 4, 0, 0, mx, my);
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
945 map[list][old_ref] = cur_ref;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
996 assert(h->ref_list[1][0].reference&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1029 }else{ // AFR/FR -> AFR/FR
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1061 if(h->direct_spatial_mv_pred){
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1073 if(refc == PART_NOT_AVAILABLE)
1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1085 for(list=0; list<2; list++){
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1089 mv[list][0] = mv[list][1] = 0;
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
1097 }else if(ref[0] < 0){
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1104 for(i8=0; i8<4; i8++){
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1113 h->sub_mb_type[i8] = sub_mb_type;
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[y8])
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1121 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1131 }else if(IS_16X16(*mb_type)){
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0])
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1139 && (h->x264_build>33 || !h->x264_build)))){
1141 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1157 h->sub_mb_type[i8] = sub_mb_type;
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1167 && (h->x264_build>33 || !h->x264_build)))){
1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1169 if(IS_SUB_8X8(sub_mb_type)){
1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 for(i4=0; i4<4; i4++){
1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1211 const int16_t (*l1mv)[2]= l1mv0;
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1215 h->sub_mb_type[i8] = sub_mb_type;
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type)){
1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col[0])){
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1258 const int scale = dist_scale_factor[ref0];
1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
1275 const int16_t (*l1mv)[2]= l1mv0;
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1279 h->sub_mb_type[i8] = sub_mb_type;
1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col[0])){
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1290 ref0 = map_col_to_list0[0][ref0];
1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1295 scale = dist_scale_factor[ref0];
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1298 if(IS_SUB_8X8(sub_mb_type)){
1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1305 for(i4=0; i4<4; i4++){
1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1318 static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1327 for(list=0; list<h->list_count; list++){
1329 if(!USES_LIST(mb_type, list))
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1336 if( h->pps.cabac ) {
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1356 if(IS_8X8(mb_type)){
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1370 // src[0]&0x80; //forbidden bit
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1383 for(i=0; i+1<length; i+=9){
1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1387 for(i=0; i+1<length; i+=5){
1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1391 if(i>0 && !src[i]) i--;
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1401 /* startcode, so we must be past the end */
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1417 dst= h->rbsp_buffer[bufidx];
1423 //printf("decoding esc\n");
1424 memcpy(dst, src, i);
1427 //remove escapes (very rare 1:2^22)
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
1432 if(src[si+2]==3){ //escape
1437 }else //next start code
1441 dst[di++]= src[si++];
1444 dst[di++]= src[si++];
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1450 *consumed= si + 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 //memset(block, 64, 2*256);
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1551 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1573 static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1599 return h->pps.chroma_qp_table[t][qscale];
1602 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1609 const int luma_xy= (mx&3) + ((my&3)<<2);
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
1617 const int pic_width = 16*s->mb_width;
1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1640 // chroma offset when predicting from a field of opposite parity
1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1649 src_cb= s->edge_emu_buffer;
1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cr= s->edge_emu_buffer;
1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1673 x_offset += 8*s->mb_x;
1674 y_offset += 8*(s->mb_y >> MB_FIELD);
1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1683 chroma_op= chroma_avg;
1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1694 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1706 x_offset += 8*s->mb_x;
1707 y_offset += 8*(s->mb_y >> MB_FIELD);
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1761 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1779 static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1795 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1799 MpegEncContext * const s = &h->s;
1800 const int mb_xy= h->mb_xy;
1801 const int mb_type= s->current_picture.mb_type[mb_xy];
1803 assert(IS_INTER(mb_type));
1805 prefetch_motion(h, 0);
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1810 &weight_op[0], &weight_avg[0],
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1819 &weight_op[1], &weight_avg[1],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1828 &weight_op[2], &weight_avg[2],
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1833 assert(IS_8X8(mb_type));
1836 const int sub_mb_type= h->sub_mb_type[i];
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1844 &weight_op[3], &weight_avg[3],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1853 &weight_op[4], &weight_avg[4],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[5], &weight_avg[5],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1866 assert(IS_SUB_4X4(sub_mb_type));
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1872 &weight_op[6], &weight_avg[6],
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1879 prefetch_motion(h, 1);
1882 static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1907 static av_cold void decode_init_vlc(void){
1908 static int done = 0;
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1918 &chroma_dc_coeff_token_len [0], 1, 1,
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1927 &coeff_token_len [i][0], 1, 1,
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
1948 for(i=0; i<15; i++){
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
1953 &total_zeros_len [i][0], 1, 1,
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1970 &run_len [6][0], 1, 1,
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context *h){
1981 av_freep(&h->intra4x4_pred_mode);
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
1986 av_freep(&h->direct_table);
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
1994 for(i = 0; i < MAX_THREADS; i++) {
1995 hx = h->thread_context[i];
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2000 av_freep(&hx->rbsp_buffer[1]);
2001 av_freep(&hx->rbsp_buffer[0]);
2002 hx->rbsp_buffer_size[0] = 0;
2003 hx->rbsp_buffer_size[1] = 0;
2004 if (i) av_freep(&h->thread_context[i]);
2008 static void init_dequant8_coeff_table(H264Context *h){
2010 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2011 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2012 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2014 for(i=0; i<2; i++ ){
2015 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2016 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2020 for(q=0; q<52; q++){
2021 int shift = div6[q];
2024 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2025 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2026 h->pps.scaling_matrix8[i][x]) << shift;
2031 static void init_dequant4_coeff_table(H264Context *h){
2033 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2034 for(i=0; i<6; i++ ){
2035 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2037 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2038 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2045 for(q=0; q<52; q++){
2046 int shift = div6[q] + 2;
2049 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2050 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2051 h->pps.scaling_matrix4[i][x]) << shift;
2056 static void init_dequant_tables(H264Context *h){
2058 init_dequant4_coeff_table(h);
2059 if(h->pps.transform_8x8_mode)
2060 init_dequant8_coeff_table(h);
2061 if(h->sps.transform_bypass){
2064 h->dequant4_coeff[i][0][x] = 1<<6;
2065 if(h->pps.transform_8x8_mode)
2068 h->dequant8_coeff[i][0][x] = 1<<6;
2075 * needs width/height
2077 static int alloc_tables(H264Context *h){
2078 MpegEncContext * const s = &h->s;
2079 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2082 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
2084 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t), fail)
2085 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
2086 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
2088 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
2089 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
2090 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
2091 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
2093 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2094 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2096 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
2097 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
2098 for(y=0; y<s->mb_height; y++){
2099 for(x=0; x<s->mb_width; x++){
2100 const int mb_xy= x + y*s->mb_stride;
2101 const int b_xy = 4*x + 4*y*h->b_stride;
2102 const int b8_xy= 2*x + 2*y*h->b8_stride;
2104 h->mb2b_xy [mb_xy]= b_xy;
2105 h->mb2b8_xy[mb_xy]= b8_xy;
2109 s->obmc_scratchpad = NULL;
2111 if(!h->dequant4_coeff[0])
2112 init_dequant_tables(h);
2121 * Mimic alloc_tables(), but for every context thread.
2123 static void clone_tables(H264Context *dst, H264Context *src){
2124 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2125 dst->non_zero_count = src->non_zero_count;
2126 dst->slice_table = src->slice_table;
2127 dst->cbp_table = src->cbp_table;
2128 dst->mb2b_xy = src->mb2b_xy;
2129 dst->mb2b8_xy = src->mb2b8_xy;
2130 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2131 dst->mvd_table[0] = src->mvd_table[0];
2132 dst->mvd_table[1] = src->mvd_table[1];
2133 dst->direct_table = src->direct_table;
2135 dst->s.obmc_scratchpad = NULL;
2136 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2141 * Allocate buffers which are not shared amongst multiple threads.
2143 static int context_init(H264Context *h){
2144 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
2145 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
2149 return -1; // free_tables will clean up for us
2152 static av_cold void common_init(H264Context *h){
2153 MpegEncContext * const s = &h->s;
2155 s->width = s->avctx->width;
2156 s->height = s->avctx->height;
2157 s->codec_id= s->avctx->codec->id;
2159 ff_h264_pred_init(&h->hpc, s->codec_id);
2161 h->dequant_coeff_pps= -1;
2162 s->unrestricted_mv=1;
2163 s->decode=1; //FIXME
2165 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2167 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2168 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2172 * Reset SEI values at the beginning of the frame.
2174 * @param h H.264 context.
2176 static void reset_sei(H264Context *h) {
2177 h->sei_recovery_frame_cnt = -1;
2178 h->sei_dpb_output_delay = 0;
2179 h->sei_cpb_removal_delay = -1;
2180 h->sei_buffering_period_present = 0;
2183 static av_cold int decode_init(AVCodecContext *avctx){
2184 H264Context *h= avctx->priv_data;
2185 MpegEncContext * const s = &h->s;
2187 MPV_decode_defaults(s);
2192 s->out_format = FMT_H264;
2193 s->workaround_bugs= avctx->workaround_bugs;
2196 // s->decode_mb= ff_h263_decode_mb;
2197 s->quarter_sample = 1;
2198 if(!avctx->has_b_frames)
2201 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
2205 if(avctx->extradata_size > 0 && avctx->extradata &&
2206 *(char *)avctx->extradata == 1){
2213 h->thread_context[0] = h;
2214 h->outputed_poc = INT_MIN;
2215 h->prev_poc_msb= 1<<16;
2217 if(avctx->codec_id == CODEC_ID_H264){
2218 if(avctx->ticks_per_frame == 1){
2219 s->avctx->time_base.den *=2;
2221 avctx->ticks_per_frame = 2;
2226 static int frame_start(H264Context *h){
2227 MpegEncContext * const s = &h->s;
2230 if(MPV_frame_start(s, s->avctx) < 0)
2232 ff_er_frame_start(s);
2234 * MPV_frame_start uses pict_type to derive key_frame.
2235 * This is incorrect for H.264; IDR markings must be used.
2236 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2237 * See decode_nal_units().
2239 s->current_picture_ptr->key_frame= 0;
2240 s->current_picture_ptr->mmco_reset= 0;
2242 assert(s->linesize && s->uvlinesize);
2244 for(i=0; i<16; i++){
2245 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2246 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2249 h->block_offset[16+i]=
2250 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2251 h->block_offset[24+16+i]=
2252 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2255 /* can't be in alloc_tables because linesize isn't known there.
2256 * FIXME: redo bipred weight to not require extra buffer? */
2257 for(i = 0; i < s->avctx->thread_count; i++)
2258 if(!h->thread_context[i]->s.obmc_scratchpad)
2259 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2261 /* some macroblocks will be accessed before they're available */
2262 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2263 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2265 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2267 // We mark the current picture as non-reference after allocating it, so
2268 // that if we break out due to an error it can be released automatically
2269 // in the next MPV_frame_start().
2270 // SVQ3 as well as most other codecs have only last/next/current and thus
2271 // get released even with set reference, besides SVQ3 and others do not
2272 // mark frames as reference later "naturally".
2273 if(s->codec_id != CODEC_ID_SVQ3)
2274 s->current_picture_ptr->reference= 0;
2276 s->current_picture_ptr->field_poc[0]=
2277 s->current_picture_ptr->field_poc[1]= INT_MAX;
2278 assert(s->current_picture_ptr->long_ref==0);
2283 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2284 MpegEncContext * const s = &h->s;
2293 src_cb -= uvlinesize;
2294 src_cr -= uvlinesize;
2296 if(!simple && FRAME_MBAFF){
2298 offset = MB_MBAFF ? 1 : 17;
2299 uvoffset= MB_MBAFF ? 1 : 9;
2301 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2303 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2304 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2305 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2310 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2311 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2312 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2313 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2319 top_idx = MB_MBAFF ? 0 : 1;
2321 step= MB_MBAFF ? 2 : 1;
2324 // There are two lines saved, the line above the the top macroblock of a pair,
2325 // and the line above the bottom macroblock
2326 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2327 for(i=1; i<17 - skiplast; i++){
2328 h->left_border[offset+i*step]= src_y[15+i* linesize];
2331 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2332 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2334 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2335 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2336 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2337 for(i=1; i<9 - skiplast; i++){
2338 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2339 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2341 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2342 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2346 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2347 MpegEncContext * const s = &h->s;
2358 if(!simple && FRAME_MBAFF){
2360 offset = MB_MBAFF ? 1 : 17;
2361 uvoffset= MB_MBAFF ? 1 : 9;
2365 top_idx = MB_MBAFF ? 0 : 1;
2367 step= MB_MBAFF ? 2 : 1;
2370 if(h->deblocking_filter == 2) {
2372 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2373 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2375 deblock_left = (s->mb_x > 0);
2376 deblock_top = (s->mb_y > !!MB_FIELD);
2379 src_y -= linesize + 1;
2380 src_cb -= uvlinesize + 1;
2381 src_cr -= uvlinesize + 1;
2383 #define XCHG(a,b,t,xchg)\
2390 for(i = !deblock_top; i<16; i++){
2391 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2393 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2397 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2399 if(s->mb_x+1 < s->mb_width){
2400 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2404 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2406 for(i = !deblock_top; i<8; i++){
2407 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2408 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2410 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2411 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2414 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2415 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2420 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2421 MpegEncContext * const s = &h->s;
2422 const int mb_x= s->mb_x;
2423 const int mb_y= s->mb_y;
2424 const int mb_xy= h->mb_xy;
2425 const int mb_type= s->current_picture.mb_type[mb_xy];
2426 uint8_t *dest_y, *dest_cb, *dest_cr;
2427 int linesize, uvlinesize /*dct_offset*/;
2429 int *block_offset = &h->block_offset[0];
2430 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2431 /* is_h264 should always be true if SVQ3 is disabled. */
2432 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2433 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2434 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2436 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2437 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2438 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2440 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2441 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2443 if (!simple && MB_FIELD) {
2444 linesize = h->mb_linesize = s->linesize * 2;
2445 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2446 block_offset = &h->block_offset[24];
2447 if(mb_y&1){ //FIXME move out of this function?
2448 dest_y -= s->linesize*15;
2449 dest_cb-= s->uvlinesize*7;
2450 dest_cr-= s->uvlinesize*7;
2454 for(list=0; list<h->list_count; list++){
2455 if(!USES_LIST(mb_type, list))
2457 if(IS_16X16(mb_type)){
2458 int8_t *ref = &h->ref_cache[list][scan8[0]];
2459 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2461 for(i=0; i<16; i+=4){
2462 int ref = h->ref_cache[list][scan8[i]];
2464 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2470 linesize = h->mb_linesize = s->linesize;
2471 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2472 // dct_offset = s->linesize * 16;
2475 if (!simple && IS_INTRA_PCM(mb_type)) {
2476 for (i=0; i<16; i++) {
2477 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2479 for (i=0; i<8; i++) {
2480 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2481 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2484 if(IS_INTRA(mb_type)){
2485 if(h->deblocking_filter)
2486 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2488 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2489 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2490 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2493 if(IS_INTRA4x4(mb_type)){
2494 if(simple || !s->encoding){
2495 if(IS_8x8DCT(mb_type)){
2496 if(transform_bypass){
2498 idct_add = s->dsp.add_pixels8;
2500 idct_dc_add = s->dsp.h264_idct8_dc_add;
2501 idct_add = s->dsp.h264_idct8_add;
2503 for(i=0; i<16; i+=4){
2504 uint8_t * const ptr= dest_y + block_offset[i];
2505 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2506 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2507 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2509 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2510 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2511 (h->topright_samples_available<<i)&0x4000, linesize);
2513 if(nnz == 1 && h->mb[i*16])
2514 idct_dc_add(ptr, h->mb + i*16, linesize);
2516 idct_add (ptr, h->mb + i*16, linesize);
2521 if(transform_bypass){
2523 idct_add = s->dsp.add_pixels4;
2525 idct_dc_add = s->dsp.h264_idct_dc_add;
2526 idct_add = s->dsp.h264_idct_add;
2528 for(i=0; i<16; i++){
2529 uint8_t * const ptr= dest_y + block_offset[i];
2530 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2532 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2533 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2537 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2538 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2539 assert(mb_y || linesize <= block_offset[i]);
2540 if(!topright_avail){
2541 tr= ptr[3 - linesize]*0x01010101;
2542 topright= (uint8_t*) &tr;
2544 topright= ptr + 4 - linesize;
2548 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2549 nnz = h->non_zero_count_cache[ scan8[i] ];
2552 if(nnz == 1 && h->mb[i*16])
2553 idct_dc_add(ptr, h->mb + i*16, linesize);
2555 idct_add (ptr, h->mb + i*16, linesize);
2557 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2564 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2566 if(!transform_bypass)
2567 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2569 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2571 if(h->deblocking_filter)
2572 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2574 hl_motion(h, dest_y, dest_cb, dest_cr,
2575 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2576 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2577 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2581 if(!IS_INTRA4x4(mb_type)){
2583 if(IS_INTRA16x16(mb_type)){
2584 if(transform_bypass){
2585 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2586 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2588 for(i=0; i<16; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2590 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2594 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2596 }else if(h->cbp&15){
2597 if(transform_bypass){
2598 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2599 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2600 for(i=0; i<16; i+=di){
2601 if(h->non_zero_count_cache[ scan8[i] ]){
2602 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2606 if(IS_8x8DCT(mb_type)){
2607 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2609 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2614 for(i=0; i<16; i++){
2615 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2616 uint8_t * const ptr= dest_y + block_offset[i];
2617 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2623 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2624 uint8_t *dest[2] = {dest_cb, dest_cr};
2625 if(transform_bypass){
2626 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2627 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2628 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2630 idct_add = s->dsp.add_pixels4;
2631 for(i=16; i<16+8; i++){
2632 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2633 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2637 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2638 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2640 idct_add = s->dsp.h264_idct_add;
2641 idct_dc_add = s->dsp.h264_idct_dc_add;
2642 for(i=16; i<16+8; i++){
2643 if(h->non_zero_count_cache[ scan8[i] ])
2644 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2645 else if(h->mb[i*16])
2646 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2649 for(i=16; i<16+8; i++){
2650 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2651 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2652 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2659 if(h->cbp || IS_INTRA(mb_type))
2660 s->dsp.clear_blocks(h->mb);
2662 if(h->deblocking_filter) {
2663 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2664 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2665 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2666 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2667 if (!simple && FRAME_MBAFF) {
2668 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2670 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2676 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2678 static void hl_decode_mb_simple(H264Context *h){
2679 hl_decode_mb_internal(h, 1);
2683 * Process a macroblock; this handles edge cases, such as interlacing.
2685 static void av_noinline hl_decode_mb_complex(H264Context *h){
2686 hl_decode_mb_internal(h, 0);
2689 static void hl_decode_mb(H264Context *h){
2690 MpegEncContext * const s = &h->s;
2691 const int mb_xy= h->mb_xy;
2692 const int mb_type= s->current_picture.mb_type[mb_xy];
2693 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2696 hl_decode_mb_complex(h);
2697 else hl_decode_mb_simple(h);
2700 static void pic_as_field(Picture *pic, const int parity){
2702 for (i = 0; i < 4; ++i) {
2703 if (parity == PICT_BOTTOM_FIELD)
2704 pic->data[i] += pic->linesize[i];
2705 pic->reference = parity;
2706 pic->linesize[i] *= 2;
2708 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2711 static int split_field_copy(Picture *dest, Picture *src,
2712 int parity, int id_add){
2713 int match = !!(src->reference & parity);
2717 if(parity != PICT_FRAME){
2718 pic_as_field(dest, parity);
2720 dest->pic_id += id_add;
2727 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2731 while(i[0]<len || i[1]<len){
2732 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2734 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2737 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2738 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2741 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2742 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2749 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2754 best_poc= dir ? INT_MIN : INT_MAX;
2756 for(i=0; i<len; i++){
2757 const int poc= src[i]->poc;
2758 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2760 sorted[out_i]= src[i];
2763 if(best_poc == (dir ? INT_MIN : INT_MAX))
2765 limit= sorted[out_i++]->poc - dir;
2771 * fills the default_ref_list.
2773 static int fill_default_ref_list(H264Context *h){
2774 MpegEncContext * const s = &h->s;
2777 if(h->slice_type_nos==FF_B_TYPE){
2778 Picture *sorted[32];
2783 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2785 cur_poc= s->current_picture_ptr->poc;
2787 for(list= 0; list<2; list++){
2788 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2789 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2791 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2792 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2795 if(len < h->ref_count[list])
2796 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2800 if(lens[0] == lens[1] && lens[1] > 1){
2801 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2803 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2806 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2807 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2809 if(len < h->ref_count[0])
2810 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2813 for (i=0; i<h->ref_count[0]; i++) {
2814 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2816 if(h->slice_type_nos==FF_B_TYPE){
2817 for (i=0; i<h->ref_count[1]; i++) {
2818 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2825 static void print_short_term(H264Context *h);
2826 static void print_long_term(H264Context *h);
2829 * Extract structure information about the picture described by pic_num in
2830 * the current decoding context (frame or field). Note that pic_num is
2831 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2832 * @param pic_num picture number for which to extract structure information
2833 * @param structure one of PICT_XXX describing structure of picture
2835 * @return frame number (short term) or long term index of picture
2836 * described by pic_num
2838 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2839 MpegEncContext * const s = &h->s;
2841 *structure = s->picture_structure;
2844 /* opposite field */
2845 *structure ^= PICT_FRAME;
2852 static int decode_ref_pic_list_reordering(H264Context *h){
2853 MpegEncContext * const s = &h->s;
2854 int list, index, pic_structure;
2856 print_short_term(h);
2859 for(list=0; list<h->list_count; list++){
2860 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2862 if(get_bits1(&s->gb)){
2863 int pred= h->curr_pic_num;
2865 for(index=0; ; index++){
2866 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2867 unsigned int pic_id;
2869 Picture *ref = NULL;
2871 if(reordering_of_pic_nums_idc==3)
2874 if(index >= h->ref_count[list]){
2875 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2879 if(reordering_of_pic_nums_idc<3){
2880 if(reordering_of_pic_nums_idc<2){
2881 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2884 if(abs_diff_pic_num > h->max_pic_num){
2885 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2889 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2890 else pred+= abs_diff_pic_num;
2891 pred &= h->max_pic_num - 1;
2893 frame_num = pic_num_extract(h, pred, &pic_structure);
2895 for(i= h->short_ref_count-1; i>=0; i--){
2896 ref = h->short_ref[i];
2897 assert(ref->reference);
2898 assert(!ref->long_ref);
2900 ref->frame_num == frame_num &&
2901 (ref->reference & pic_structure)
2909 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2911 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2914 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2917 ref = h->long_ref[long_idx];
2918 assert(!(ref && !ref->reference));
2919 if(ref && (ref->reference & pic_structure)){
2920 ref->pic_id= pic_id;
2921 assert(ref->long_ref);
2929 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2930 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2932 for(i=index; i+1<h->ref_count[list]; i++){
2933 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2936 for(; i > index; i--){
2937 h->ref_list[list][i]= h->ref_list[list][i-1];
2939 h->ref_list[list][index]= *ref;
2941 pic_as_field(&h->ref_list[list][index], pic_structure);
2945 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2951 for(list=0; list<h->list_count; list++){
2952 for(index= 0; index < h->ref_count[list]; index++){
2953 if(!h->ref_list[list][index].data[0]){
2954 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2955 if(h->default_ref_list[list][0].data[0])
2956 h->ref_list[list][index]= h->default_ref_list[list][0];
2966 static void fill_mbaff_ref_list(H264Context *h){
2968 for(list=0; list<2; list++){ //FIXME try list_count
2969 for(i=0; i<h->ref_count[list]; i++){
2970 Picture *frame = &h->ref_list[list][i];
2971 Picture *field = &h->ref_list[list][16+2*i];
2974 field[0].linesize[j] <<= 1;
2975 field[0].reference = PICT_TOP_FIELD;
2976 field[0].poc= field[0].field_poc[0];
2977 field[1] = field[0];
2979 field[1].data[j] += frame->linesize[j];
2980 field[1].reference = PICT_BOTTOM_FIELD;
2981 field[1].poc= field[1].field_poc[1];
2983 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2984 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2986 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2987 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2991 for(j=0; j<h->ref_count[1]; j++){
2992 for(i=0; i<h->ref_count[0]; i++)
2993 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2994 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2995 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2999 static int pred_weight_table(H264Context *h){
3000 MpegEncContext * const s = &h->s;
3002 int luma_def, chroma_def;
3005 h->use_weight_chroma= 0;
3006 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3007 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3008 luma_def = 1<<h->luma_log2_weight_denom;
3009 chroma_def = 1<<h->chroma_log2_weight_denom;
3011 for(list=0; list<2; list++){
3012 h->luma_weight_flag[list] = 0;
3013 h->chroma_weight_flag[list] = 0;
3014 for(i=0; i<h->ref_count[list]; i++){
3015 int luma_weight_flag, chroma_weight_flag;
3017 luma_weight_flag= get_bits1(&s->gb);
3018 if(luma_weight_flag){
3019 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3020 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3021 if( h->luma_weight[list][i] != luma_def
3022 || h->luma_offset[list][i] != 0) {
3024 h->luma_weight_flag[list]= 1;
3027 h->luma_weight[list][i]= luma_def;
3028 h->luma_offset[list][i]= 0;
3032 chroma_weight_flag= get_bits1(&s->gb);
3033 if(chroma_weight_flag){
3036 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3037 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3038 if( h->chroma_weight[list][i][j] != chroma_def
3039 || h->chroma_offset[list][i][j] != 0) {
3040 h->use_weight_chroma= 1;
3041 h->chroma_weight_flag[list]= 1;
3047 h->chroma_weight[list][i][j]= chroma_def;
3048 h->chroma_offset[list][i][j]= 0;
3053 if(h->slice_type_nos != FF_B_TYPE) break;
3055 h->use_weight= h->use_weight || h->use_weight_chroma;
3059 static void implicit_weight_table(H264Context *h){
3060 MpegEncContext * const s = &h->s;
3062 int cur_poc = s->current_picture_ptr->poc;
3064 for (i = 0; i < 2; i++) {
3065 h->luma_weight_flag[i] = 0;
3066 h->chroma_weight_flag[i] = 0;
3069 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3070 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3072 h->use_weight_chroma= 0;
3077 h->use_weight_chroma= 2;
3078 h->luma_log2_weight_denom= 5;
3079 h->chroma_log2_weight_denom= 5;
3081 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3082 int poc0 = h->ref_list[0][ref0].poc;
3083 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3084 int poc1 = h->ref_list[1][ref1].poc;
3085 int td = av_clip(poc1 - poc0, -128, 127);
3087 int tb = av_clip(cur_poc - poc0, -128, 127);
3088 int tx = (16384 + (FFABS(td) >> 1)) / td;
3089 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3090 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3091 h->implicit_weight[ref0][ref1] = 32;
3093 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3095 h->implicit_weight[ref0][ref1] = 32;
3101 * Mark a picture as no longer needed for reference. The refmask
3102 * argument allows unreferencing of individual fields or the whole frame.
3103 * If the picture becomes entirely unreferenced, but is being held for
3104 * display purposes, it is marked as such.
3105 * @param refmask mask of fields to unreference; the mask is bitwise
3106 * anded with the reference marking of pic
3107 * @return non-zero if pic becomes entirely unreferenced (except possibly
3108 * for display purposes) zero if one of the fields remains in
3111 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3113 if (pic->reference &= refmask) {
3116 for(i = 0; h->delayed_pic[i]; i++)
3117 if(pic == h->delayed_pic[i]){
3118 pic->reference=DELAYED_PIC_REF;
3126 * instantaneous decoder refresh.
3128 static void idr(H264Context *h){
3131 for(i=0; i<16; i++){
3132 remove_long(h, i, 0);
3134 assert(h->long_ref_count==0);
3136 for(i=0; i<h->short_ref_count; i++){
3137 unreference_pic(h, h->short_ref[i], 0);
3138 h->short_ref[i]= NULL;
3140 h->short_ref_count=0;
3141 h->prev_frame_num= 0;
3142 h->prev_frame_num_offset= 0;
3147 /* forget old pics after a seek */
3148 static void flush_dpb(AVCodecContext *avctx){
3149 H264Context *h= avctx->priv_data;
3151 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3152 if(h->delayed_pic[i])
3153 h->delayed_pic[i]->reference= 0;
3154 h->delayed_pic[i]= NULL;
3156 h->outputed_poc= INT_MIN;
3157 h->prev_interlaced_frame = 1;
3159 if(h->s.current_picture_ptr)
3160 h->s.current_picture_ptr->reference= 0;
3161 h->s.first_field= 0;
3163 ff_mpeg_flush(avctx);
3167 * Find a Picture in the short term reference list by frame number.
3168 * @param frame_num frame number to search for
3169 * @param idx the index into h->short_ref where returned picture is found
3170 * undefined if no picture found.
3171 * @return pointer to the found picture, or NULL if no pic with the provided
3172 * frame number is found
3174 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3175 MpegEncContext * const s = &h->s;
3178 for(i=0; i<h->short_ref_count; i++){
3179 Picture *pic= h->short_ref[i];
3180 if(s->avctx->debug&FF_DEBUG_MMCO)
3181 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3182 if(pic->frame_num == frame_num) {
3191 * Remove a picture from the short term reference list by its index in
3192 * that list. This does no checking on the provided index; it is assumed
3193 * to be valid. Other list entries are shifted down.
3194 * @param i index into h->short_ref of picture to remove.
3196 static void remove_short_at_index(H264Context *h, int i){
3197 assert(i >= 0 && i < h->short_ref_count);
3198 h->short_ref[i]= NULL;
3199 if (--h->short_ref_count)
3200 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3205 * @return the removed picture or NULL if an error occurs
3207 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3208 MpegEncContext * const s = &h->s;
3212 if(s->avctx->debug&FF_DEBUG_MMCO)
3213 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3215 pic = find_short(h, frame_num, &i);
3217 if(unreference_pic(h, pic, ref_mask))
3218 remove_short_at_index(h, i);
3225 * Remove a picture from the long term reference list by its index in
3227 * @return the removed picture or NULL if an error occurs
3229 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3232 pic= h->long_ref[i];
3234 if(unreference_pic(h, pic, ref_mask)){
3235 assert(h->long_ref[i]->long_ref == 1);
3236 h->long_ref[i]->long_ref= 0;
3237 h->long_ref[i]= NULL;
3238 h->long_ref_count--;
3246 * print short term list
3248 static void print_short_term(H264Context *h) {
3250 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3251 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3252 for(i=0; i<h->short_ref_count; i++){
3253 Picture *pic= h->short_ref[i];
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3260 * print long term list
3262 static void print_long_term(H264Context *h) {
3264 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3265 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3266 for(i = 0; i < 16; i++){
3267 Picture *pic= h->long_ref[i];
3269 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3276 * Executes the reference picture marking (memory management control operations).
3278 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3279 MpegEncContext * const s = &h->s;
3280 int i, av_uninit(j);
3281 int current_ref_assigned=0;
3282 Picture *av_uninit(pic);
3284 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3285 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3287 for(i=0; i<mmco_count; i++){
3288 int av_uninit(structure), av_uninit(frame_num);
3289 if(s->avctx->debug&FF_DEBUG_MMCO)
3290 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3292 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3293 || mmco[i].opcode == MMCO_SHORT2LONG){
3294 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3295 pic = find_short(h, frame_num, &j);
3297 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3298 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3299 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3304 switch(mmco[i].opcode){
3305 case MMCO_SHORT2UNUSED:
3306 if(s->avctx->debug&FF_DEBUG_MMCO)
3307 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3308 remove_short(h, frame_num, structure ^ PICT_FRAME);
3310 case MMCO_SHORT2LONG:
3311 if (h->long_ref[mmco[i].long_arg] != pic)
3312 remove_long(h, mmco[i].long_arg, 0);
3314 remove_short_at_index(h, j);
3315 h->long_ref[ mmco[i].long_arg ]= pic;
3316 if (h->long_ref[ mmco[i].long_arg ]){
3317 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3318 h->long_ref_count++;
3321 case MMCO_LONG2UNUSED:
3322 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3323 pic = h->long_ref[j];
3325 remove_long(h, j, structure ^ PICT_FRAME);
3326 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3327 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3330 // Comment below left from previous code as it is an interresting note.
3331 /* First field in pair is in short term list or
3332 * at a different long term index.
3333 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3334 * Report the problem and keep the pair where it is,
3335 * and mark this field valid.
3338 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3339 remove_long(h, mmco[i].long_arg, 0);
3341 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3342 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3343 h->long_ref_count++;
3346 s->current_picture_ptr->reference |= s->picture_structure;
3347 current_ref_assigned=1;
3349 case MMCO_SET_MAX_LONG:
3350 assert(mmco[i].long_arg <= 16);
3351 // just remove the long term which index is greater than new max
3352 for(j = mmco[i].long_arg; j<16; j++){
3353 remove_long(h, j, 0);
3357 while(h->short_ref_count){
3358 remove_short(h, h->short_ref[0]->frame_num, 0);
3360 for(j = 0; j < 16; j++) {
3361 remove_long(h, j, 0);
3363 s->current_picture_ptr->poc=
3364 s->current_picture_ptr->field_poc[0]=
3365 s->current_picture_ptr->field_poc[1]=
3369 s->current_picture_ptr->frame_num= 0;
3370 s->current_picture_ptr->mmco_reset=1;
3376 if (!current_ref_assigned) {
3377 /* Second field of complementary field pair; the first field of
3378 * which is already referenced. If short referenced, it
3379 * should be first entry in short_ref. If not, it must exist
3380 * in long_ref; trying to put it on the short list here is an
3381 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3383 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3384 /* Just mark the second field valid */
3385 s->current_picture_ptr->reference = PICT_FRAME;
3386 } else if (s->current_picture_ptr->long_ref) {
3387 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3388 "assignment for second field "
3389 "in complementary field pair "
3390 "(first field is long term)\n");
3392 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3394 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3397 if(h->short_ref_count)
3398 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3400 h->short_ref[0]= s->current_picture_ptr;
3401 h->short_ref_count++;
3402 s->current_picture_ptr->reference |= s->picture_structure;
3406 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3408 /* We have too many reference frames, probably due to corrupted
3409 * stream. Need to discard one frame. Prevents overrun of the
3410 * short_ref and long_ref buffers.
3412 av_log(h->s.avctx, AV_LOG_ERROR,
3413 "number of reference frames exceeds max (probably "
3414 "corrupt input), discarding one\n");
3416 if (h->long_ref_count && !h->short_ref_count) {
3417 for (i = 0; i < 16; ++i)
3422 remove_long(h, i, 0);
3424 pic = h->short_ref[h->short_ref_count - 1];
3425 remove_short(h, pic->frame_num, 0);
3429 print_short_term(h);
3434 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3435 MpegEncContext * const s = &h->s;
3439 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3440 s->broken_link= get_bits1(gb) -1;
3442 h->mmco[0].opcode= MMCO_LONG;
3443 h->mmco[0].long_arg= 0;
3447 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3448 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3449 MMCOOpcode opcode= get_ue_golomb_31(gb);
3451 h->mmco[i].opcode= opcode;
3452 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3453 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3454 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3455 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3459 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3460 unsigned int long_arg= get_ue_golomb_31(gb);
3461 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3462 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3465 h->mmco[i].long_arg= long_arg;
3468 if(opcode > (unsigned)MMCO_LONG){
3469 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3472 if(opcode == MMCO_END)
3477 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3479 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3480 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3481 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3482 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3484 if (FIELD_PICTURE) {
3485 h->mmco[0].short_pic_num *= 2;
3486 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3487 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3497 static int init_poc(H264Context *h){
3498 MpegEncContext * const s = &h->s;
3499 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3501 Picture *cur = s->current_picture_ptr;
3503 h->frame_num_offset= h->prev_frame_num_offset;
3504 if(h->frame_num < h->prev_frame_num)
3505 h->frame_num_offset += max_frame_num;
3507 if(h->sps.poc_type==0){
3508 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3510 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3511 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3512 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3513 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3515 h->poc_msb = h->prev_poc_msb;
3516 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3518 field_poc[1] = h->poc_msb + h->poc_lsb;
3519 if(s->picture_structure == PICT_FRAME)
3520 field_poc[1] += h->delta_poc_bottom;
3521 }else if(h->sps.poc_type==1){
3522 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3525 if(h->sps.poc_cycle_length != 0)
3526 abs_frame_num = h->frame_num_offset + h->frame_num;
3530 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3533 expected_delta_per_poc_cycle = 0;
3534 for(i=0; i < h->sps.poc_cycle_length; i++)
3535 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3537 if(abs_frame_num > 0){
3538 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3539 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3541 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3542 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3543 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3547 if(h->nal_ref_idc == 0)
3548 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3550 field_poc[0] = expectedpoc + h->delta_poc[0];
3551 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3553 if(s->picture_structure == PICT_FRAME)
3554 field_poc[1] += h->delta_poc[1];
3556 int poc= 2*(h->frame_num_offset + h->frame_num);
3565 if(s->picture_structure != PICT_BOTTOM_FIELD)
3566 s->current_picture_ptr->field_poc[0]= field_poc[0];
3567 if(s->picture_structure != PICT_TOP_FIELD)
3568 s->current_picture_ptr->field_poc[1]= field_poc[1];
3569 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3576 * initialize scan tables
3578 static void init_scan_tables(H264Context *h){
3579 MpegEncContext * const s = &h->s;
3581 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3582 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3583 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3585 for(i=0; i<16; i++){
3586 #define T(x) (x>>2) | ((x<<2) & 0xF)
3587 h->zigzag_scan[i] = T(zigzag_scan[i]);
3588 h-> field_scan[i] = T( field_scan[i]);
3592 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3593 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3594 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3595 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3596 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3598 for(i=0; i<64; i++){
3599 #define T(x) (x>>3) | ((x&7)<<3)
3600 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3601 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3602 h->field_scan8x8[i] = T(field_scan8x8[i]);
3603 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3607 if(h->sps.transform_bypass){ //FIXME same ugly
3608 h->zigzag_scan_q0 = zigzag_scan;
3609 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3610 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3611 h->field_scan_q0 = field_scan;
3612 h->field_scan8x8_q0 = field_scan8x8;
3613 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3615 h->zigzag_scan_q0 = h->zigzag_scan;
3616 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3617 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3618 h->field_scan_q0 = h->field_scan;
3619 h->field_scan8x8_q0 = h->field_scan8x8;
3620 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3624 static void field_end(H264Context *h){
3625 MpegEncContext * const s = &h->s;
3626 AVCodecContext * const avctx= s->avctx;
3629 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
3630 s->current_picture_ptr->pict_type= s->pict_type;
3632 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3633 ff_vdpau_h264_set_reference_frames(s);
3636 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
3637 h->prev_poc_msb= h->poc_msb;
3638 h->prev_poc_lsb= h->poc_lsb;
3640 h->prev_frame_num_offset= h->frame_num_offset;
3641 h->prev_frame_num= h->frame_num;
3643 if (avctx->hwaccel) {
3644 if (avctx->hwaccel->end_frame(avctx) < 0)
3645 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
3648 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3649 ff_vdpau_h264_picture_complete(s);
3652 * FIXME: Error handling code does not seem to support interlaced
3653 * when slices span multiple rows
3654 * The ff_er_add_slice calls don't work right for bottom
3655 * fields; they cause massive erroneous error concealing
3656 * Error marking covers both fields (top and bottom).
3657 * This causes a mismatched s->error_count
3658 * and a bad error table. Further, the error count goes to
3659 * INT_MAX when called for bottom field, because mb_y is
3660 * past end by one (callers fault) and resync_mb_y != 0
3661 * causes problems for the first MB line, too.
3672 * Replicates H264 "master" context to thread contexts.
3674 static void clone_slice(H264Context *dst, H264Context *src)
3676 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3677 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3678 dst->s.current_picture = src->s.current_picture;
3679 dst->s.linesize = src->s.linesize;
3680 dst->s.uvlinesize = src->s.uvlinesize;
3681 dst->s.first_field = src->s.first_field;
3683 dst->prev_poc_msb = src->prev_poc_msb;
3684 dst->prev_poc_lsb = src->prev_poc_lsb;
3685 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3686 dst->prev_frame_num = src->prev_frame_num;
3687 dst->short_ref_count = src->short_ref_count;
3689 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3690 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3691 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3692 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3694 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3695 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3699 * decodes a slice header.
3700 * This will also call MPV_common_init() and frame_start() as needed.
3702 * @param h h264context
3703 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3705 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3707 static int decode_slice_header(H264Context *h, H264Context *h0){
3708 MpegEncContext * const s = &h->s;
3709 MpegEncContext * const s0 = &h0->s;
3710 unsigned int first_mb_in_slice;
3711 unsigned int pps_id;
3712 int num_ref_idx_active_override_flag;
3713 unsigned int slice_type, tmp, i, j;
3714 int default_ref_list_done = 0;
3715 int last_pic_structure;
3717 s->dropable= h->nal_ref_idc == 0;
3719 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3720 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3721 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3723 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3724 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3727 first_mb_in_slice= get_ue_golomb(&s->gb);
3729 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
3730 if(h0->current_slice && FIELD_PICTURE){
3734 h0->current_slice = 0;
3735 if (!s0->first_field)
3736 s->current_picture_ptr= NULL;
3739 slice_type= get_ue_golomb_31(&s->gb);
3741 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3746 h->slice_type_fixed=1;
3748 h->slice_type_fixed=0;
3750 slice_type= golomb_to_pict_type[ slice_type ];
3751 if (slice_type == FF_I_TYPE
3752 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3753 default_ref_list_done = 1;
3755 h->slice_type= slice_type;
3756 h->slice_type_nos= slice_type & 3;
3758 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3759 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3760 av_log(h->s.avctx, AV_LOG_ERROR,
3761 "B picture before any references, skipping\n");
3765 pps_id= get_ue_golomb(&s->gb);
3766 if(pps_id>=MAX_PPS_COUNT){
3767 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3770 if(!h0->pps_buffers[pps_id]) {
3771 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
3774 h->pps= *h0->pps_buffers[pps_id];
3776 if(!h0->sps_buffers[h->pps.sps_id]) {
3777 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
3780 h->sps = *h0->sps_buffers[h->pps.sps_id];
3782 if(h == h0 && h->dequant_coeff_pps != pps_id){
3783 h->dequant_coeff_pps = pps_id;
3784 init_dequant_tables(h);
3787 s->mb_width= h->sps.mb_width;
3788 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3790 h->b_stride= s->mb_width*4;
3791 h->b8_stride= s->mb_width*2;
3793 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3794 if(h->sps.frame_mbs_only_flag)
3795 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3797 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3799 if (s->context_initialized
3800 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3802 return -1; // width / height changed during parallelized decoding
3804 flush_dpb(s->avctx);
3807 if (!s->context_initialized) {
3809 return -1; // we cant (re-)initialize context during parallel decoding
3811 avcodec_set_dimensions(s->avctx, s->width, s->height);
3812 s->avctx->sample_aspect_ratio= h->sps.sar;
3813 if(!s->avctx->sample_aspect_ratio.den)
3814 s->avctx->sample_aspect_ratio.den = 1;
3816 if(h->sps.timing_info_present_flag){
3817 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3818 if(h->x264_build > 0 && h->x264_build < 44)
3819 s->avctx->time_base.den *= 2;
3820 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3821 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3823 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
3824 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
3826 if (MPV_common_init(s) < 0)
3829 h->prev_interlaced_frame = 1;
3831 init_scan_tables(h);
3834 for(i = 1; i < s->avctx->thread_count; i++) {
3836 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3837 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3838 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3841 init_scan_tables(c);
3845 for(i = 0; i < s->avctx->thread_count; i++)
3846 if(context_init(h->thread_context[i]) < 0)
3850 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3853 h->mb_aff_frame = 0;
3854 last_pic_structure = s0->picture_structure;
3855 if(h->sps.frame_mbs_only_flag){
3856 s->picture_structure= PICT_FRAME;
3858 if(get_bits1(&s->gb)) { //field_pic_flag
3859 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3861 s->picture_structure= PICT_FRAME;
3862 h->mb_aff_frame = h->sps.mb_aff;
3865 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3867 if(h0->current_slice == 0){
3868 while(h->frame_num != h->prev_frame_num &&
3869 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3870 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3871 if (frame_start(h) < 0)
3873 h->prev_frame_num++;
3874 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3875 s->current_picture_ptr->frame_num= h->prev_frame_num;
3876 execute_ref_pic_marking(h, NULL, 0);
3879 /* See if we have a decoded first field looking for a pair... */
3880 if (s0->first_field) {
3881 assert(s0->current_picture_ptr);
3882 assert(s0->current_picture_ptr->data[0]);
3883 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3885 /* figure out if we have a complementary field pair */
3886 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3888 * Previous field is unmatched. Don't display it, but let it
3889 * remain for reference if marked as such.
3891 s0->current_picture_ptr = NULL;
3892 s0->first_field = FIELD_PICTURE;
3895 if (h->nal_ref_idc &&
3896 s0->current_picture_ptr->reference &&
3897 s0->current_picture_ptr->frame_num != h->frame_num) {
3899 * This and previous field were reference, but had
3900 * different frame_nums. Consider this field first in
3901 * pair. Throw away previous field except for reference
3904 s0->first_field = 1;
3905 s0->current_picture_ptr = NULL;
3908 /* Second field in complementary pair */
3909 s0->first_field = 0;
3914 /* Frame or first field in a potentially complementary pair */
3915 assert(!s0->current_picture_ptr);
3916 s0->first_field = FIELD_PICTURE;
3919 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3920 s0->first_field = 0;
3927 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3929 assert(s->mb_num == s->mb_width * s->mb_height);
3930 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3931 first_mb_in_slice >= s->mb_num){
3932 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3935 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3936 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3937 if (s->picture_structure == PICT_BOTTOM_FIELD)
3938 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3939 assert(s->mb_y < s->mb_height);
3941 if(s->picture_structure==PICT_FRAME){
3942 h->curr_pic_num= h->frame_num;
3943 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3945 h->curr_pic_num= 2*h->frame_num + 1;
3946 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3949 if(h->nal_unit_type == NAL_IDR_SLICE){
3950 get_ue_golomb(&s->gb); /* idr_pic_id */
3953 if(h->sps.poc_type==0){
3954 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3956 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3957 h->delta_poc_bottom= get_se_golomb(&s->gb);
3961 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3962 h->delta_poc[0]= get_se_golomb(&s->gb);
3964 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3965 h->delta_poc[1]= get_se_golomb(&s->gb);
3970 if(h->pps.redundant_pic_cnt_present){
3971 h->redundant_pic_count= get_ue_golomb(&s->gb);
3974 //set defaults, might be overridden a few lines later
3975 h->ref_count[0]= h->pps.ref_count[0];
3976 h->ref_count[1]= h->pps.ref_count[1];
3978 if(h->slice_type_nos != FF_I_TYPE){
3979 if(h->slice_type_nos == FF_B_TYPE){
3980 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3982 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3984 if(num_ref_idx_active_override_flag){
3985 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3986 if(h->slice_type_nos==FF_B_TYPE)
3987 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3989 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3990 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3991 h->ref_count[0]= h->ref_count[1]= 1;
3995 if(h->slice_type_nos == FF_B_TYPE)
4002 if(!default_ref_list_done){
4003 fill_default_ref_list(h);
4006 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
4009 if(h->slice_type_nos!=FF_I_TYPE){
4010 s->last_picture_ptr= &h->ref_list[0][0];
4011 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
4013 if(h->slice_type_nos==FF_B_TYPE){
4014 s->next_picture_ptr= &h->ref_list[1][0];
4015 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
4018 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4019 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4020 pred_weight_table(h);
4021 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4022 implicit_weight_table(h);
4025 for (i = 0; i < 2; i++) {
4026 h->luma_weight_flag[i] = 0;
4027 h->chroma_weight_flag[i] = 0;
4032 decode_ref_pic_marking(h0, &s->gb);
4035 fill_mbaff_ref_list(h);
4037 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
4038 direct_dist_scale_factor(h);
4039 direct_ref_list_init(h);
4041 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4042 tmp = get_ue_golomb_31(&s->gb);
4044 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4047 h->cabac_init_idc= tmp;
4050 h->last_qscale_diff = 0;
4051 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4053 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4057 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4058 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4059 //FIXME qscale / qp ... stuff
4060 if(h->slice_type == FF_SP_TYPE){
4061 get_bits1(&s->gb); /* sp_for_switch_flag */
4063 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4064 get_se_golomb(&s->gb); /* slice_qs_delta */
4067 h->deblocking_filter = 1;
4068 h->slice_alpha_c0_offset = 0;
4069 h->slice_beta_offset = 0;
4070 if( h->pps.deblocking_filter_parameters_present ) {
4071 tmp= get_ue_golomb_31(&s->gb);
4073 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4076 h->deblocking_filter= tmp;
4077 if(h->deblocking_filter < 2)
4078 h->deblocking_filter^= 1; // 1<->0
4080 if( h->deblocking_filter ) {
4081 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4082 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4086 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4087 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4088 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4089 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4090 h->deblocking_filter= 0;
4092 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4093 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4094 /* Cheat slightly for speed:
4095 Do not bother to deblock across slices. */
4096 h->deblocking_filter = 2;
4098 h0->max_contexts = 1;
4099 if(!h0->single_decode_warning) {
4100 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4101 h0->single_decode_warning = 1;
4104 return 1; // deblocking switched inside frame
4109 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4110 slice_group_change_cycle= get_bits(&s->gb, ?);
4113 h0->last_slice_type = slice_type;
4114 h->slice_num = ++h0->current_slice;
4115 if(h->slice_num >= MAX_SLICES){
4116 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4120 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4124 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4125 +(h->ref_list[j][i].reference&3);
4128 for(i=16; i<48; i++)
4129 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4130 +(h->ref_list[j][i].reference&3);
4133 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4134 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4136 s->avctx->refs= h->sps.ref_frame_count;
4138 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4139 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4141 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4143 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4144 pps_id, h->frame_num,
4145 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4146 h->ref_count[0], h->ref_count[1],
4148 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4150 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4151 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4161 static inline int get_level_prefix(GetBitContext *gb){
4165 OPEN_READER(re, gb);
4166 UPDATE_CACHE(re, gb);
4167 buf=GET_CACHE(re, gb);
4169 log= 32 - av_log2(buf);
4171 print_bin(buf>>(32-log), log);
4172 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4175 LAST_SKIP_BITS(re, gb, log);
4176 CLOSE_READER(re, gb);
4181 static inline int get_dct8x8_allowed(H264Context *h){
4182 if(h->sps.direct_8x8_inference_flag)
4183 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4185 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4189 * decodes a residual block.
4190 * @param n block index
4191 * @param scantable scantable
4192 * @param max_coeff number of coefficients in the block
4193 * @return <0 if an error occurred
4195 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4196 MpegEncContext * const s = &h->s;
4197 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4199 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4201 //FIXME put trailing_onex into the context
4203 if(n == CHROMA_DC_BLOCK_INDEX){
4204 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4205 total_coeff= coeff_token>>2;
4207 if(n == LUMA_DC_BLOCK_INDEX){
4208 total_coeff= pred_non_zero_count(h, 0);
4209 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4210 total_coeff= coeff_token>>2;
4212 total_coeff= pred_non_zero_count(h, n);
4213 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4214 total_coeff= coeff_token>>2;
4215 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4219 //FIXME set last_non_zero?
4223 if(total_coeff > (unsigned)max_coeff) {
4224 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4228 trailing_ones= coeff_token&3;
4229 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4230 assert(total_coeff<=16);
4232 i = show_bits(gb, 3);
4233 skip_bits(gb, trailing_ones);
4234 level[0] = 1-((i&4)>>1);
4235 level[1] = 1-((i&2) );
4236 level[2] = 1-((i&1)<<1);
4238 if(trailing_ones<total_coeff) {
4240 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4241 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4242 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4244 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4245 if(level_code >= 100){
4246 prefix= level_code - 100;
4247 if(prefix == LEVEL_TAB_BITS)
4248 prefix += get_level_prefix(gb);
4250 //first coefficient has suffix_length equal to 0 or 1
4251 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4253 level_code= (prefix<<1) + get_bits1(gb); //part
4255 level_code= prefix; //part
4256 }else if(prefix==14){
4258 level_code= (prefix<<1) + get_bits1(gb); //part
4260 level_code= prefix + get_bits(gb, 4); //part
4262 level_code= 30 + get_bits(gb, prefix-3); //part
4264 level_code += (1<<(prefix-3))-4096;
4267 if(trailing_ones < 3) level_code += 2;
4270 mask= -(level_code&1);
4271 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4273 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4276 if(level_code + 3U > 6U)
4278 level[trailing_ones]= level_code;
4281 //remaining coefficients have suffix_length > 0
4282 for(i=trailing_ones+1;i<total_coeff;i++) {
4283 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4284 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4285 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4287 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4288 if(level_code >= 100){
4289 prefix= level_code - 100;
4290 if(prefix == LEVEL_TAB_BITS){
4291 prefix += get_level_prefix(gb);
4294 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4296 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4298 level_code += (1<<(prefix-3))-4096;
4300 mask= -(level_code&1);
4301 level_code= (((2+level_code)>>1) ^ mask) - mask;
4303 level[i]= level_code;
4305 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4310 if(total_coeff == max_coeff)
4313 if(n == CHROMA_DC_BLOCK_INDEX)
4314 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4316 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4319 coeff_num = zeros_left + total_coeff - 1;
4320 j = scantable[coeff_num];
4322 block[j] = level[0];
4323 for(i=1;i<total_coeff;i++) {
4326 else if(zeros_left < 7){
4327 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4329 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4331 zeros_left -= run_before;
4332 coeff_num -= 1 + run_before;
4333 j= scantable[ coeff_num ];
4338 block[j] = (level[0] * qmul[j] + 32)>>6;
4339 for(i=1;i<total_coeff;i++) {
4342 else if(zeros_left < 7){
4343 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4345 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4347 zeros_left -= run_before;
4348 coeff_num -= 1 + run_before;
4349 j= scantable[ coeff_num ];
4351 block[j]= (level[i] * qmul[j] + 32)>>6;
4356 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4363 static void predict_field_decoding_flag(H264Context *h){
4364 MpegEncContext * const s = &h->s;
4365 const int mb_xy= h->mb_xy;
4366 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4367 ? s->current_picture.mb_type[mb_xy-1]
4368 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4369 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4371 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4375 * decodes a P_SKIP or B_SKIP macroblock
4377 static void decode_mb_skip(H264Context *h){
4378 MpegEncContext * const s = &h->s;
4379 const int mb_xy= h->mb_xy;
4382 memset(h->non_zero_count[mb_xy], 0, 16);
4383 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4386 mb_type|= MB_TYPE_INTERLACED;
4388 if( h->slice_type_nos == FF_B_TYPE )
4390 // just for fill_caches. pred_direct_motion will set the real mb_type
4391 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4393 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4394 pred_direct_motion(h, &mb_type);
4395 mb_type|= MB_TYPE_SKIP;
4400 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4402 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4403 pred_pskip_motion(h, &mx, &my);
4404 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4405 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4408 write_back_motion(h, mb_type);
4409 s->current_picture.mb_type[mb_xy]= mb_type;
4410 s->current_picture.qscale_table[mb_xy]= s->qscale;
4411 h->slice_table[ mb_xy ]= h->slice_num;
4412 h->prev_mb_skipped= 1;
4416 * decodes a macroblock
4417 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4419 static int decode_mb_cavlc(H264Context *h){
4420 MpegEncContext * const s = &h->s;
4422 int partition_count;
4423 unsigned int mb_type, cbp;
4424 int dct8x8_allowed= h->pps.transform_8x8_mode;
4426 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4428 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4429 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4431 if(h->slice_type_nos != FF_I_TYPE){
4432 if(s->mb_skip_run==-1)
4433 s->mb_skip_run= get_ue_golomb(&s->gb);
4435 if (s->mb_skip_run--) {
4436 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4437 if(s->mb_skip_run==0)
4438 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4440 predict_field_decoding_flag(h);
4447 if( (s->mb_y&1) == 0 )
4448 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4451 h->prev_mb_skipped= 0;
4453 mb_type= get_ue_golomb(&s->gb);
4454 if(h->slice_type_nos == FF_B_TYPE){
4456 partition_count= b_mb_type_info[mb_type].partition_count;
4457 mb_type= b_mb_type_info[mb_type].type;
4460 goto decode_intra_mb;
4462 }else if(h->slice_type_nos == FF_P_TYPE){
4464 partition_count= p_mb_type_info[mb_type].partition_count;
4465 mb_type= p_mb_type_info[mb_type].type;
4468 goto decode_intra_mb;
4471 assert(h->slice_type_nos == FF_I_TYPE);
4472 if(h->slice_type == FF_SI_TYPE && mb_type)
4476 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4480 cbp= i_mb_type_info[mb_type].cbp;
4481 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4482 mb_type= i_mb_type_info[mb_type].type;
4486 mb_type |= MB_TYPE_INTERLACED;
4488 h->slice_table[ mb_xy ]= h->slice_num;
4490 if(IS_INTRA_PCM(mb_type)){
4493 // We assume these blocks are very rare so we do not optimize it.
4494 align_get_bits(&s->gb);
4496 // The pixels are stored in the same order as levels in h->mb array.
4497 for(x=0; x < (CHROMA ? 384 : 256); x++){
4498 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4501 // In deblocking, the quantizer is 0
4502 s->current_picture.qscale_table[mb_xy]= 0;
4503 // All coeffs are present
4504 memset(h->non_zero_count[mb_xy], 16, 16);
4506 s->current_picture.mb_type[mb_xy]= mb_type;
4511 h->ref_count[0] <<= 1;
4512 h->ref_count[1] <<= 1;
4515 fill_caches(h, mb_type, 0);
4518 if(IS_INTRA(mb_type)){
4520 // init_top_left_availability(h);
4521 if(IS_INTRA4x4(mb_type)){
4524 if(dct8x8_allowed && get_bits1(&s->gb)){
4525 mb_type |= MB_TYPE_8x8DCT;
4529 // fill_intra4x4_pred_table(h);
4530 for(i=0; i<16; i+=di){
4531 int mode= pred_intra_mode(h, i);
4533 if(!get_bits1(&s->gb)){
4534 const int rem_mode= get_bits(&s->gb, 3);
4535 mode = rem_mode + (rem_mode >= mode);
4539 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4541 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4543 write_back_intra_pred_mode(h);
4544 if( check_intra4x4_pred_mode(h) < 0)
4547 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4548 if(h->intra16x16_pred_mode < 0)
4552 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4555 h->chroma_pred_mode= pred_mode;
4557 }else if(partition_count==4){
4558 int i, j, sub_partition_count[4], list, ref[2][4];
4560 if(h->slice_type_nos == FF_B_TYPE){
4562 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4563 if(h->sub_mb_type[i] >=13){
4564 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4567 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4568 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4570 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4571 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4572 pred_direct_motion(h, &mb_type);
4573 h->ref_cache[0][scan8[4]] =
4574 h->ref_cache[1][scan8[4]] =
4575 h->ref_cache[0][scan8[12]] =
4576 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4579 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4581 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4582 if(h->sub_mb_type[i] >=4){
4583 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4586 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4587 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4591 for(list=0; list<h->list_count; list++){
4592 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4594 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4595 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4599 }else if(ref_count == 2){
4600 tmp= get_bits1(&s->gb)^1;
4602 tmp= get_ue_golomb_31(&s->gb);
4604 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4617 dct8x8_allowed = get_dct8x8_allowed(h);
4619 for(list=0; list<h->list_count; list++){
4621 if(IS_DIRECT(h->sub_mb_type[i])) {
4622 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4625 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4626 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4628 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4629 const int sub_mb_type= h->sub_mb_type[i];
4630 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4631 for(j=0; j<sub_partition_count[i]; j++){
4633 const int index= 4*i + block_width*j;
4634 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4635 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4636 mx += get_se_golomb(&s->gb);
4637 my += get_se_golomb(&s->gb);
4638 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4640 if(IS_SUB_8X8(sub_mb_type)){
4642 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4644 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4645 }else if(IS_SUB_8X4(sub_mb_type)){
4646 mv_cache[ 1 ][0]= mx;
4647 mv_cache[ 1 ][1]= my;
4648 }else if(IS_SUB_4X8(sub_mb_type)){
4649 mv_cache[ 8 ][0]= mx;
4650 mv_cache[ 8 ][1]= my;
4652 mv_cache[ 0 ][0]= mx;
4653 mv_cache[ 0 ][1]= my;
4656 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4662 }else if(IS_DIRECT(mb_type)){
4663 pred_direct_motion(h, &mb_type);
4664 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4666 int list, mx, my, i;
4667 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4668 if(IS_16X16(mb_type)){
4669 for(list=0; list<h->list_count; list++){
4671 if(IS_DIR(mb_type, 0, list)){
4672 if(h->ref_count[list]==1){
4674 }else if(h->ref_count[list]==2){
4675 val= get_bits1(&s->gb)^1;
4677 val= get_ue_golomb_31(&s->gb);
4678 if(val >= h->ref_count[list]){
4679 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4684 val= LIST_NOT_USED&0xFF;
4685 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4687 for(list=0; list<h->list_count; list++){
4689 if(IS_DIR(mb_type, 0, list)){
4690 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4691 mx += get_se_golomb(&s->gb);
4692 my += get_se_golomb(&s->gb);
4693 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4695 val= pack16to32(mx,my);
4698 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4701 else if(IS_16X8(mb_type)){
4702 for(list=0; list<h->list_count; list++){
4705 if(IS_DIR(mb_type, i, list)){
4706 if(h->ref_count[list] == 1){
4708 }else if(h->ref_count[list] == 2){
4709 val= get_bits1(&s->gb)^1;
4711 val= get_ue_golomb_31(&s->gb);
4712 if(val >= h->ref_count[list]){
4713 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4718 val= LIST_NOT_USED&0xFF;
4719 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4722 for(list=0; list<h->list_count; list++){
4725 if(IS_DIR(mb_type, i, list)){
4726 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4727 mx += get_se_golomb(&s->gb);
4728 my += get_se_golomb(&s->gb);
4729 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4731 val= pack16to32(mx,my);
4734 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4738 assert(IS_8X16(mb_type));
4739 for(list=0; list<h->list_count; list++){
4742 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4743 if(h->ref_count[list]==1){
4745 }else if(h->ref_count[list]==2){
4746 val= get_bits1(&s->gb)^1;
4748 val= get_ue_golomb_31(&s->gb);
4749 if(val >= h->ref_count[list]){
4750 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4755 val= LIST_NOT_USED&0xFF;
4756 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4759 for(list=0; list<h->list_count; list++){
4762 if(IS_DIR(mb_type, i, list)){
4763 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4764 mx += get_se_golomb(&s->gb);
4765 my += get_se_golomb(&s->gb);
4766 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4768 val= pack16to32(mx,my);
4771 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4777 if(IS_INTER(mb_type))
4778 write_back_motion(h, mb_type);
4780 if(!IS_INTRA16x16(mb_type)){
4781 cbp= get_ue_golomb(&s->gb);
4783 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4788 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4789 else cbp= golomb_to_inter_cbp [cbp];
4791 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4792 else cbp= golomb_to_inter_cbp_gray[cbp];
4797 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4798 if(get_bits1(&s->gb)){
4799 mb_type |= MB_TYPE_8x8DCT;
4800 h->cbp_table[mb_xy]= cbp;
4803 s->current_picture.mb_type[mb_xy]= mb_type;
4805 if(cbp || IS_INTRA16x16(mb_type)){
4806 int i8x8, i4x4, chroma_idx;
4808 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4809 const uint8_t *scan, *scan8x8, *dc_scan;
4811 // fill_non_zero_count_cache(h);
4813 if(IS_INTERLACED(mb_type)){
4814 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4815 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4816 dc_scan= luma_dc_field_scan;
4818 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4819 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4820 dc_scan= luma_dc_zigzag_scan;
4823 dquant= get_se_golomb(&s->gb);
4825 if( dquant > 25 || dquant < -26 ){
4826 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4830 s->qscale += dquant;
4831 if(((unsigned)s->qscale) > 51){
4832 if(s->qscale<0) s->qscale+= 52;
4833 else s->qscale-= 52;
4836 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4837 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4838 if(IS_INTRA16x16(mb_type)){
4839 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4840 return -1; //FIXME continue if partitioned and other return -1 too
4843 assert((cbp&15) == 0 || (cbp&15) == 15);
4846 for(i8x8=0; i8x8<4; i8x8++){
4847 for(i4x4=0; i4x4<4; i4x4++){
4848 const int index= i4x4 + 4*i8x8;
4849 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4855 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4858 for(i8x8=0; i8x8<4; i8x8++){
4859 if(cbp & (1<<i8x8)){
4860 if(IS_8x8DCT(mb_type)){
4861 DCTELEM *buf = &h->mb[64*i8x8];
4863 for(i4x4=0; i4x4<4; i4x4++){
4864 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4865 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4868 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4869 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4871 for(i4x4=0; i4x4<4; i4x4++){
4872 const int index= i4x4 + 4*i8x8;
4874 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4880 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4881 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4887 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4888 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4894 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4895 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4896 for(i4x4=0; i4x4<4; i4x4++){
4897 const int index= 16 + 4*chroma_idx + i4x4;
4898 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4904 uint8_t * const nnz= &h->non_zero_count_cache[0];
4905 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4906 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4909 uint8_t * const nnz= &h->non_zero_count_cache[0];
4910 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4911 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4912 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4914 s->current_picture.qscale_table[mb_xy]= s->qscale;
4915 write_back_non_zero_count(h);
4918 h->ref_count[0] >>= 1;
4919 h->ref_count[1] >>= 1;
4925 static int decode_cabac_field_decoding_flag(H264Context *h) {
4926 MpegEncContext * const s = &h->s;
4927 const int mb_x = s->mb_x;
4928 const int mb_y = s->mb_y & ~1;
4929 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4930 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4932 unsigned int ctx = 0;
4934 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4937 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4941 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4944 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4945 uint8_t *state= &h->cabac_state[ctx_base];
4949 MpegEncContext * const s = &h->s;
4950 const int mba_xy = h->left_mb_xy[0];
4951 const int mbb_xy = h->top_mb_xy;
4953 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4955 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4957 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4958 return 0; /* I4x4 */
4961 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4962 return 0; /* I4x4 */
4965 if( get_cabac_terminate( &h->cabac ) )
4966 return 25; /* PCM */
4968 mb_type = 1; /* I16x16 */
4969 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4970 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4971 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4972 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4973 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4977 static int decode_cabac_mb_type_b( H264Context *h ) {
4978 MpegEncContext * const s = &h->s;
4980 const int mba_xy = h->left_mb_xy[0];
4981 const int mbb_xy = h->top_mb_xy;
4984 assert(h->slice_type_nos == FF_B_TYPE);
4986 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4988 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4991 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4992 return 0; /* B_Direct_16x16 */
4994 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4995 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4998 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4999 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5000 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5001 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5003 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5004 else if( bits == 13 ) {
5005 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5006 } else if( bits == 14 )
5007 return 11; /* B_L1_L0_8x16 */
5008 else if( bits == 15 )
5009 return 22; /* B_8x8 */
5011 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5012 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5015 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5016 MpegEncContext * const s = &h->s;
5020 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5021 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5024 && h->slice_table[mba_xy] == h->slice_num
5025 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5026 mba_xy += s->mb_stride;
5028 mbb_xy = mb_xy - s->mb_stride;
5030 && h->slice_table[mbb_xy] == h->slice_num
5031 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5032 mbb_xy -= s->mb_stride;
5034 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5036 int mb_xy = h->mb_xy;
5038 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5041 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5043 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5046 if( h->slice_type_nos == FF_B_TYPE )
5048 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5051 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5054 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5057 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5058 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5059 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5061 if( mode >= pred_mode )
5067 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5068 const int mba_xy = h->left_mb_xy[0];
5069 const int mbb_xy = h->top_mb_xy;
5073 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5074 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5077 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5080 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5083 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5085 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5091 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5092 int cbp_b, cbp_a, ctx, cbp = 0;
5094 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5095 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5097 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5098 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5099 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5100 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5101 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5102 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5103 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5104 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5107 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5111 cbp_a = (h->left_cbp>>4)&0x03;
5112 cbp_b = (h-> top_cbp>>4)&0x03;
5115 if( cbp_a > 0 ) ctx++;
5116 if( cbp_b > 0 ) ctx += 2;
5117 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5121 if( cbp_a == 2 ) ctx++;
5122 if( cbp_b == 2 ) ctx += 2;
5123 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5125 static int decode_cabac_mb_dqp( H264Context *h) {
5126 int ctx= h->last_qscale_diff != 0;
5129 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5132 if(val > 102) //prevent infinite loop
5137 return (val + 1)>>1 ;
5139 return -((val + 1)>>1);
5141 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5142 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5144 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5146 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5150 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5152 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5153 return 0; /* B_Direct_8x8 */
5154 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5155 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5157 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5158 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5159 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5162 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5163 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5167 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5168 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5171 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5172 int refa = h->ref_cache[list][scan8[n] - 1];
5173 int refb = h->ref_cache[list][scan8[n] - 8];
5177 if( h->slice_type_nos == FF_B_TYPE) {
5178 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5180 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5189 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5192 if(ref >= 32 /*h->ref_list[list]*/){
5199 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5200 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5201 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5202 int ctxbase = (l == 0) ? 40 : 47;
5204 int ctx = (amvd>2) + (amvd>32);
5206 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5211 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5219 while( get_cabac_bypass( &h->cabac ) ) {
5223 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5228 if( get_cabac_bypass( &h->cabac ) )
5232 return get_cabac_bypass_sign( &h->cabac, -mvd );
5235 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5241 nza = h->left_cbp&0x100;
5242 nzb = h-> top_cbp&0x100;
5244 nza = (h->left_cbp>>(6+idx))&0x01;
5245 nzb = (h-> top_cbp>>(6+idx))&0x01;
5248 assert(cat == 1 || cat == 2 || cat == 4);
5249 nza = h->non_zero_count_cache[scan8[idx] - 1];
5250 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5259 return ctx + 4 * cat;
5262 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5263 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5264 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5265 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5266 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5269 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5270 static const int significant_coeff_flag_offset[2][6] = {
5271 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5272 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5274 static const int last_coeff_flag_offset[2][6] = {
5275 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5276 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5278 static const int coeff_abs_level_m1_offset[6] = {
5279 227+0, 227+10, 227+20, 227+30, 227+39, 426
5281 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5282 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5283 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5284 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5285 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5286 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5287 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5288 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5289 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5291 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5292 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5293 * map node ctx => cabac ctx for level=1 */
5294 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5295 /* map node ctx => cabac ctx for level>1 */
5296 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5297 static const uint8_t coeff_abs_level_transition[2][8] = {
5298 /* update node ctx after decoding a level=1 */
5299 { 1, 2, 3, 3, 4, 5, 6, 7 },
5300 /* update node ctx after decoding a level>1 */
5301 { 4, 4, 4, 4, 5, 6, 7, 7 }
5307 int coeff_count = 0;
5310 uint8_t *significant_coeff_ctx_base;
5311 uint8_t *last_coeff_ctx_base;
5312 uint8_t *abs_level_m1_ctx_base;
5315 #define CABAC_ON_STACK
5317 #ifdef CABAC_ON_STACK
5320 cc.range = h->cabac.range;
5321 cc.low = h->cabac.low;
5322 cc.bytestream= h->cabac.bytestream;
5324 #define CC &h->cabac
5328 /* cat: 0-> DC 16x16 n = 0
5329 * 1-> AC 16x16 n = luma4x4idx
5330 * 2-> Luma4x4 n = luma4x4idx
5331 * 3-> DC Chroma n = iCbCr
5332 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5333 * 5-> Luma8x8 n = 4 * luma8x8idx
5336 /* read coded block flag */
5337 if( is_dc || cat != 5 ) {
5338 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5340 h->non_zero_count_cache[scan8[n]] = 0;
5342 #ifdef CABAC_ON_STACK
5343 h->cabac.range = cc.range ;
5344 h->cabac.low = cc.low ;
5345 h->cabac.bytestream= cc.bytestream;
5351 significant_coeff_ctx_base = h->cabac_state
5352 + significant_coeff_flag_offset[MB_FIELD][cat];
5353 last_coeff_ctx_base = h->cabac_state
5354 + last_coeff_flag_offset[MB_FIELD][cat];
5355 abs_level_m1_ctx_base = h->cabac_state
5356 + coeff_abs_level_m1_offset[cat];
5358 if( !is_dc && cat == 5 ) {
5359 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5360 for(last= 0; last < coefs; last++) { \
5361 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5362 if( get_cabac( CC, sig_ctx )) { \
5363 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5364 index[coeff_count++] = last; \
5365 if( get_cabac( CC, last_ctx ) ) { \
5371 if( last == max_coeff -1 ) {\
5372 index[coeff_count++] = last;\
5374 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5375 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5376 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5378 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5380 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5382 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5385 assert(coeff_count > 0);
5389 h->cbp_table[h->mb_xy] |= 0x100;
5391 h->cbp_table[h->mb_xy] |= 0x40 << n;
5394 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5396 assert( cat == 1 || cat == 2 || cat == 4 );
5397 h->non_zero_count_cache[scan8[n]] = coeff_count;
5402 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5404 int j= scantable[index[--coeff_count]];
5406 if( get_cabac( CC, ctx ) == 0 ) {
5407 node_ctx = coeff_abs_level_transition[0][node_ctx];
5409 block[j] = get_cabac_bypass_sign( CC, -1);
5411 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5415 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5416 node_ctx = coeff_abs_level_transition[1][node_ctx];
5418 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5422 if( coeff_abs >= 15 ) {
5424 while( get_cabac_bypass( CC ) ) {
5430 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5436 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5438 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5441 } while( coeff_count );
5442 #ifdef CABAC_ON_STACK
5443 h->cabac.range = cc.range ;
5444 h->cabac.low = cc.low ;
5445 h->cabac.bytestream= cc.bytestream;
5451 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5452 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5455 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5456 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5460 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5462 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5464 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5465 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5469 static inline void compute_mb_neighbors(H264Context *h)
5471 MpegEncContext * const s = &h->s;
5472 const int mb_xy = h->mb_xy;
5473 h->top_mb_xy = mb_xy - s->mb_stride;
5474 h->left_mb_xy[0] = mb_xy - 1;
5476 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5477 const int top_pair_xy = pair_xy - s->mb_stride;
5478 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5479 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5480 const int curr_mb_field_flag = MB_FIELD;
5481 const int bottom = (s->mb_y & 1);
5483 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5484 h->top_mb_xy -= s->mb_stride;
5486 if (!left_mb_field_flag == curr_mb_field_flag) {
5487 h->left_mb_xy[0] = pair_xy - 1;
5489 } else if (FIELD_PICTURE) {
5490 h->top_mb_xy -= s->mb_stride;
5496 * decodes a macroblock
5497 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5499 static int decode_mb_cabac(H264Context *h) {
5500 MpegEncContext * const s = &h->s;
5502 int mb_type, partition_count, cbp = 0;
5503 int dct8x8_allowed= h->pps.transform_8x8_mode;
5505 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5507 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5508 if( h->slice_type_nos != FF_I_TYPE ) {
5510 /* a skipped mb needs the aff flag from the following mb */
5511 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5512 predict_field_decoding_flag(h);
5513 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5514 skip = h->next_mb_skipped;
5516 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5517 /* read skip flags */
5519 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5520 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5521 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5522 if(!h->next_mb_skipped)
5523 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5528 h->cbp_table[mb_xy] = 0;
5529 h->chroma_pred_mode_table[mb_xy] = 0;
5530 h->last_qscale_diff = 0;
5537 if( (s->mb_y&1) == 0 )
5539 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5542 h->prev_mb_skipped = 0;
5544 compute_mb_neighbors(h);
5546 if( h->slice_type_nos == FF_B_TYPE ) {
5547 mb_type = decode_cabac_mb_type_b( h );
5549 partition_count= b_mb_type_info[mb_type].partition_count;
5550 mb_type= b_mb_type_info[mb_type].type;
5553 goto decode_intra_mb;
5555 } else if( h->slice_type_nos == FF_P_TYPE ) {
5556 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5558 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5559 /* P_L0_D16x16, P_8x8 */
5560 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5562 /* P_L0_D8x16, P_L0_D16x8 */
5563 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5565 partition_count= p_mb_type_info[mb_type].partition_count;
5566 mb_type= p_mb_type_info[mb_type].type;
5568 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5569 goto decode_intra_mb;
5572 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5573 if(h->slice_type == FF_SI_TYPE && mb_type)
5575 assert(h->slice_type_nos == FF_I_TYPE);
5577 partition_count = 0;
5578 cbp= i_mb_type_info[mb_type].cbp;
5579 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5580 mb_type= i_mb_type_info[mb_type].type;
5583 mb_type |= MB_TYPE_INTERLACED;
5585 h->slice_table[ mb_xy ]= h->slice_num;
5587 if(IS_INTRA_PCM(mb_type)) {
5590 // We assume these blocks are very rare so we do not optimize it.
5591 // FIXME The two following lines get the bitstream position in the cabac
5592 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5593 ptr= h->cabac.bytestream;
5594 if(h->cabac.low&0x1) ptr--;
5596 if(h->cabac.low&0x1FF) ptr--;
5599 // The pixels are stored in the same order as levels in h->mb array.
5600 memcpy(h->mb, ptr, 256); ptr+=256;
5602 memcpy(h->mb+128, ptr, 128); ptr+=128;
5605 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5607 // All blocks are present
5608 h->cbp_table[mb_xy] = 0x1ef;
5609 h->chroma_pred_mode_table[mb_xy] = 0;
5610 // In deblocking, the quantizer is 0
5611 s->current_picture.qscale_table[mb_xy]= 0;
5612 // All coeffs are present
5613 memset(h->non_zero_count[mb_xy], 16, 16);
5614 s->current_picture.mb_type[mb_xy]= mb_type;
5615 h->last_qscale_diff = 0;
5620 h->ref_count[0] <<= 1;
5621 h->ref_count[1] <<= 1;
5624 fill_caches(h, mb_type, 0);
5626 if( IS_INTRA( mb_type ) ) {
5628 if( IS_INTRA4x4( mb_type ) ) {
5629 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5630 mb_type |= MB_TYPE_8x8DCT;
5631 for( i = 0; i < 16; i+=4 ) {
5632 int pred = pred_intra_mode( h, i );
5633 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5634 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5637 for( i = 0; i < 16; i++ ) {
5638 int pred = pred_intra_mode( h, i );
5639 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5641 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5644 write_back_intra_pred_mode(h);
5645 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5647 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5648 if( h->intra16x16_pred_mode < 0 ) return -1;
5651 h->chroma_pred_mode_table[mb_xy] =
5652 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5654 pred_mode= check_intra_pred_mode( h, pred_mode );
5655 if( pred_mode < 0 ) return -1;
5656 h->chroma_pred_mode= pred_mode;
5658 } else if( partition_count == 4 ) {
5659 int i, j, sub_partition_count[4], list, ref[2][4];
5661 if( h->slice_type_nos == FF_B_TYPE ) {
5662 for( i = 0; i < 4; i++ ) {
5663 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5664 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5665 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5667 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5668 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5669 pred_direct_motion(h, &mb_type);
5670 h->ref_cache[0][scan8[4]] =
5671 h->ref_cache[1][scan8[4]] =
5672 h->ref_cache[0][scan8[12]] =
5673 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5674 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5675 for( i = 0; i < 4; i++ )
5676 if( IS_DIRECT(h->sub_mb_type[i]) )
5677 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5681 for( i = 0; i < 4; i++ ) {
5682 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5683 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5684 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5688 for( list = 0; list < h->list_count; list++ ) {
5689 for( i = 0; i < 4; i++ ) {
5690 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5691 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5692 if( h->ref_count[list] > 1 ){
5693 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5694 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5695 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5703 h->ref_cache[list][ scan8[4*i]+1 ]=
5704 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5709 dct8x8_allowed = get_dct8x8_allowed(h);
5711 for(list=0; list<h->list_count; list++){
5713 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5714 if(IS_DIRECT(h->sub_mb_type[i])){
5715 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5719 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5720 const int sub_mb_type= h->sub_mb_type[i];
5721 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5722 for(j=0; j<sub_partition_count[i]; j++){
5725 const int index= 4*i + block_width*j;
5726 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5727 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5728 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5730 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5731 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5732 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5734 if(IS_SUB_8X8(sub_mb_type)){
5736 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5738 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5741 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5743 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5744 }else if(IS_SUB_8X4(sub_mb_type)){
5745 mv_cache[ 1 ][0]= mx;
5746 mv_cache[ 1 ][1]= my;
5748 mvd_cache[ 1 ][0]= mx - mpx;
5749 mvd_cache[ 1 ][1]= my - mpy;
5750 }else if(IS_SUB_4X8(sub_mb_type)){
5751 mv_cache[ 8 ][0]= mx;
5752 mv_cache[ 8 ][1]= my;
5754 mvd_cache[ 8 ][0]= mx - mpx;
5755 mvd_cache[ 8 ][1]= my - mpy;
5757 mv_cache[ 0 ][0]= mx;
5758 mv_cache[ 0 ][1]= my;
5760 mvd_cache[ 0 ][0]= mx - mpx;
5761 mvd_cache[ 0 ][1]= my - mpy;
5764 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5765 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5766 p[0] = p[1] = p[8] = p[9] = 0;
5767 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5771 } else if( IS_DIRECT(mb_type) ) {
5772 pred_direct_motion(h, &mb_type);
5773 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5774 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5775 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5777 int list, mx, my, i, mpx, mpy;
5778 if(IS_16X16(mb_type)){
5779 for(list=0; list<h->list_count; list++){
5780 if(IS_DIR(mb_type, 0, list)){
5782 if(h->ref_count[list] > 1){
5783 ref= decode_cabac_mb_ref(h, list, 0);
5784 if(ref >= (unsigned)h->ref_count[list]){
5785 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5790 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5792 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5794 for(list=0; list<h->list_count; list++){
5795 if(IS_DIR(mb_type, 0, list)){
5796 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5798 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5799 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5800 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5802 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5803 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5805 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5808 else if(IS_16X8(mb_type)){
5809 for(list=0; list<h->list_count; list++){
5811 if(IS_DIR(mb_type, i, list)){
5813 if(h->ref_count[list] > 1){
5814 ref= decode_cabac_mb_ref( h, list, 8*i );
5815 if(ref >= (unsigned)h->ref_count[list]){
5816 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5821 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5823 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5826 for(list=0; list<h->list_count; list++){
5828 if(IS_DIR(mb_type, i, list)){
5829 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5830 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5831 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5832 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5834 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5835 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5837 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5838 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5843 assert(IS_8X16(mb_type));
5844 for(list=0; list<h->list_count; list++){
5846 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5848 if(h->ref_count[list] > 1){
5849 ref= decode_cabac_mb_ref( h, list, 4*i );
5850 if(ref >= (unsigned)h->ref_count[list]){
5851 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5856 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5858 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5861 for(list=0; list<h->list_count; list++){
5863 if(IS_DIR(mb_type, i, list)){
5864 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5865 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5866 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5868 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5869 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5870 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5872 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5873 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5880 if( IS_INTER( mb_type ) ) {
5881 h->chroma_pred_mode_table[mb_xy] = 0;
5882 write_back_motion( h, mb_type );
5885 if( !IS_INTRA16x16( mb_type ) ) {
5886 cbp = decode_cabac_mb_cbp_luma( h );
5888 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5891 h->cbp_table[mb_xy] = h->cbp = cbp;
5893 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5894 if( decode_cabac_mb_transform_size( h ) )
5895 mb_type |= MB_TYPE_8x8DCT;
5897 s->current_picture.mb_type[mb_xy]= mb_type;
5899 if( cbp || IS_INTRA16x16( mb_type ) ) {
5900 const uint8_t *scan, *scan8x8, *dc_scan;
5901 const uint32_t *qmul;
5904 if(IS_INTERLACED(mb_type)){
5905 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5906 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5907 dc_scan= luma_dc_field_scan;
5909 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5910 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5911 dc_scan= luma_dc_zigzag_scan;
5914 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5915 if( dqp == INT_MIN ){
5916 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5920 if(((unsigned)s->qscale) > 51){
5921 if(s->qscale<0) s->qscale+= 52;
5922 else s->qscale-= 52;
5924 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5925 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5927 if( IS_INTRA16x16( mb_type ) ) {
5929 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5930 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5933 qmul = h->dequant4_coeff[0][s->qscale];
5934 for( i = 0; i < 16; i++ ) {
5935 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5936 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5939 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5943 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5944 if( cbp & (1<<i8x8) ) {
5945 if( IS_8x8DCT(mb_type) ) {
5946 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5947 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5949 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5950 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5951 const int index = 4*i8x8 + i4x4;
5952 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5954 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5955 //STOP_TIMER("decode_residual")
5959 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5960 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5967 for( c = 0; c < 2; c++ ) {
5968 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5969 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5975 for( c = 0; c < 2; c++ ) {
5976 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5977 for( i = 0; i < 4; i++ ) {
5978 const int index = 16 + 4 * c + i;
5979 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5980 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5984 uint8_t * const nnz= &h->non_zero_count_cache[0];
5985 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5986 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5989 uint8_t * const nnz= &h->non_zero_count_cache[0];
5990 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5991 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5992 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5993 h->last_qscale_diff = 0;
5996 s->current_picture.qscale_table[mb_xy]= s->qscale;
5997 write_back_non_zero_count(h);
6000 h->ref_count[0] >>= 1;
6001 h->ref_count[1] >>= 1;
6008 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6009 const int index_a = qp + h->slice_alpha_c0_offset;
6010 const int alpha = (alpha_table+52)[index_a];
6011 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6012 if (alpha ==0 || beta == 0) return;
6016 tc[0] = (tc0_table+52)[index_a][bS[0]];
6017 tc[1] = (tc0_table+52)[index_a][bS[1]];
6018 tc[2] = (tc0_table+52)[index_a][bS[2]];
6019 tc[3] = (tc0_table+52)[index_a][bS[3]];
6020 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6022 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
6025 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6026 const int index_a = qp + h->slice_alpha_c0_offset;
6027 const int alpha = (alpha_table+52)[index_a];
6028 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6029 if (alpha ==0 || beta == 0) return;
6033 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6034 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6035 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6036 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6037 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6039 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6043 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6045 for( i = 0; i < 16; i++, pix += stride) {
6051 int bS_index = (i >> 1);
6054 bS_index |= (i & 1);
6057 if( bS[bS_index] == 0 ) {
6061 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6062 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6063 alpha = (alpha_table+52)[index_a];
6064 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6066 if( bS[bS_index] < 4 ) {
6067 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6068 const int p0 = pix[-1];
6069 const int p1 = pix[-2];
6070 const int p2 = pix[-3];
6071 const int q0 = pix[0];
6072 const int q1 = pix[1];
6073 const int q2 = pix[2];
6075 if( FFABS( p0 - q0 ) < alpha &&
6076 FFABS( p1 - p0 ) < beta &&
6077 FFABS( q1 - q0 ) < beta ) {
6081 if( FFABS( p2 - p0 ) < beta ) {
6082 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6085 if( FFABS( q2 - q0 ) < beta ) {
6086 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6090 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6091 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6092 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6093 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6096 const int p0 = pix[-1];
6097 const int p1 = pix[-2];
6098 const int p2 = pix[-3];
6100 const int q0 = pix[0];
6101 const int q1 = pix[1];
6102 const int q2 = pix[2];
6104 if( FFABS( p0 - q0 ) < alpha &&
6105 FFABS( p1 - p0 ) < beta &&
6106 FFABS( q1 - q0 ) < beta ) {
6108 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6109 if( FFABS( p2 - p0 ) < beta)
6111 const int p3 = pix[-4];
6113 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6114 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6115 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6118 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6120 if( FFABS( q2 - q0 ) < beta)
6122 const int q3 = pix[3];
6124 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6125 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6126 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6129 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6133 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6134 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6136 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6141 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6143 for( i = 0; i < 8; i++, pix += stride) {
6151 if( bS[bS_index] == 0 ) {
6155 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6156 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6157 alpha = (alpha_table+52)[index_a];
6158 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6160 if( bS[bS_index] < 4 ) {
6161 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6162 const int p0 = pix[-1];
6163 const int p1 = pix[-2];
6164 const int q0 = pix[0];
6165 const int q1 = pix[1];
6167 if( FFABS( p0 - q0 ) < alpha &&
6168 FFABS( p1 - p0 ) < beta &&
6169 FFABS( q1 - q0 ) < beta ) {
6170 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6172 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6173 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6174 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6177 const int p0 = pix[-1];
6178 const int p1 = pix[-2];
6179 const int q0 = pix[0];
6180 const int q1 = pix[1];
6182 if( FFABS( p0 - q0 ) < alpha &&
6183 FFABS( p1 - p0 ) < beta &&
6184 FFABS( q1 - q0 ) < beta ) {
6186 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6187 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6188 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6194 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6195 const int index_a = qp + h->slice_alpha_c0_offset;
6196 const int alpha = (alpha_table+52)[index_a];
6197 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6198 if (alpha ==0 || beta == 0) return;
6202 tc[0] = (tc0_table+52)[index_a][bS[0]];
6203 tc[1] = (tc0_table+52)[index_a][bS[1]];
6204 tc[2] = (tc0_table+52)[index_a][bS[2]];
6205 tc[3] = (tc0_table+52)[index_a][bS[3]];
6206 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6208 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6212 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6213 const int index_a = qp + h->slice_alpha_c0_offset;
6214 const int alpha = (alpha_table+52)[index_a];
6215 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6216 if (alpha ==0 || beta == 0) return;
6220 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6221 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6222 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6223 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6224 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6226 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6230 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6231 MpegEncContext * const s = &h->s;
6232 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6234 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6238 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6239 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6240 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6241 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6242 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6245 assert(!FRAME_MBAFF);
6247 mb_type = s->current_picture.mb_type[mb_xy];
6248 qp = s->current_picture.qscale_table[mb_xy];
6249 qp0 = s->current_picture.qscale_table[mb_xy-1];
6250 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6251 qpc = get_chroma_qp( h, 0, qp );
6252 qpc0 = get_chroma_qp( h, 0, qp0 );
6253 qpc1 = get_chroma_qp( h, 0, qp1 );
6254 qp0 = (qp + qp0 + 1) >> 1;
6255 qp1 = (qp + qp1 + 1) >> 1;
6256 qpc0 = (qpc + qpc0 + 1) >> 1;
6257 qpc1 = (qpc + qpc1 + 1) >> 1;
6258 qp_thresh = 15 - h->slice_alpha_c0_offset;
6259 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6260 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6263 if( IS_INTRA(mb_type) ) {
6264 int16_t bS4[4] = {4,4,4,4};
6265 int16_t bS3[4] = {3,3,3,3};
6266 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6267 if( IS_8x8DCT(mb_type) ) {
6268 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6269 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6270 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6271 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6273 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6274 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6275 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6276 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6277 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6278 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6279 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6280 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6282 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6283 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6284 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6285 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6286 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6287 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6288 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6289 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6292 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6293 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6295 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6297 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6299 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6300 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6301 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6302 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6304 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6305 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6306 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6307 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6309 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6310 bSv[0][0] = 0x0004000400040004ULL;
6311 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6312 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6314 #define FILTER(hv,dir,edge)\
6315 if(bSv[dir][edge]) {\
6316 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6318 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6319 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6325 } else if( IS_8x8DCT(mb_type) ) {
6345 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6346 MpegEncContext * const s = &h->s;
6348 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6349 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6350 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6351 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6352 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6354 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6355 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6356 // how often to recheck mv-based bS when iterating between edges
6357 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6358 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6359 // how often to recheck mv-based bS when iterating along each edge
6360 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6362 if (first_vertical_edge_done) {
6366 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6369 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6370 && !IS_INTERLACED(mb_type)
6371 && IS_INTERLACED(mbm_type)
6373 // This is a special case in the norm where the filtering must
6374 // be done twice (one each of the field) even if we are in a
6375 // frame macroblock.
6377 static const int nnz_idx[4] = {4,5,6,3};
6378 unsigned int tmp_linesize = 2 * linesize;
6379 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6380 int mbn_xy = mb_xy - 2 * s->mb_stride;
6385 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6386 if( IS_INTRA(mb_type) ||
6387 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6388 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6390 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6391 for( i = 0; i < 4; i++ ) {
6392 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6393 mbn_nnz[nnz_idx[i]] != 0 )
6399 // Do not use s->qscale as luma quantizer because it has not the same
6400 // value in IPCM macroblocks.
6401 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6402 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6403 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6404 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6405 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6406 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6407 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6408 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6415 for( edge = start; edge < edges; edge++ ) {
6416 /* mbn_xy: neighbor macroblock */
6417 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6418 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6419 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6423 if( (edge&1) && IS_8x8DCT(mb_type) )
6426 if( IS_INTRA(mb_type) ||
6427 IS_INTRA(mbn_type) ) {
6430 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6431 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6440 bS[0] = bS[1] = bS[2] = bS[3] = value;
6445 if( edge & mask_edge ) {
6446 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6449 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6450 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6453 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6454 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6455 int bn_idx= b_idx - (dir ? 8:1);
6458 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6459 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6460 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6461 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6464 if(h->slice_type_nos == FF_B_TYPE && v){
6466 for( l = 0; !v && l < 2; l++ ) {
6468 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6469 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6470 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6474 bS[0] = bS[1] = bS[2] = bS[3] = v;
6480 for( i = 0; i < 4; i++ ) {
6481 int x = dir == 0 ? edge : i;
6482 int y = dir == 0 ? i : edge;
6483 int b_idx= 8 + 4 + x + 8*y;
6484 int bn_idx= b_idx - (dir ? 8:1);
6486 if( h->non_zero_count_cache[b_idx] |
6487 h->non_zero_count_cache[bn_idx] ) {
6493 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6494 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6495 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6496 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6502 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6504 for( l = 0; l < 2; l++ ) {
6506 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6507 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6508 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6517 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6522 // Do not use s->qscale as luma quantizer because it has not the same
6523 // value in IPCM macroblocks.
6524 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6525 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6526 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6527 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6529 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6530 if( (edge&1) == 0 ) {
6531 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6532 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6533 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6534 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6537 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6538 if( (edge&1) == 0 ) {
6539 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6540 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6541 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6542 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6548 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6549 MpegEncContext * const s = &h->s;
6550 const int mb_xy= mb_x + mb_y*s->mb_stride;
6551 const int mb_type = s->current_picture.mb_type[mb_xy];
6552 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6553 int first_vertical_edge_done = 0;
6556 //for sufficiently low qp, filtering wouldn't do anything
6557 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6559 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6560 int qp = s->current_picture.qscale_table[mb_xy];
6562 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6563 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6568 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6569 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6570 int top_type, left_type[2];
6571 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6572 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6573 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6575 if(IS_8x8DCT(top_type)){
6576 h->non_zero_count_cache[4+8*0]=
6577 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6578 h->non_zero_count_cache[6+8*0]=
6579 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6581 if(IS_8x8DCT(left_type[0])){
6582 h->non_zero_count_cache[3+8*1]=
6583 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6585 if(IS_8x8DCT(left_type[1])){
6586 h->non_zero_count_cache[3+8*3]=
6587 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6590 if(IS_8x8DCT(mb_type)){
6591 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6592 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6594 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6595 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6597 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6598 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6600 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6601 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6606 // left mb is in picture
6607 && h->slice_table[mb_xy-1] != 0xFFFF
6608 // and current and left pair do not have the same interlaced type
6609 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6610 // and left mb is in the same slice if deblocking_filter == 2
6611 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6612 /* First vertical edge is different in MBAFF frames
6613 * There are 8 different bS to compute and 2 different Qp
6615 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6616 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6621 int mb_qp, mbn0_qp, mbn1_qp;
6623 first_vertical_edge_done = 1;
6625 if( IS_INTRA(mb_type) )
6626 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6628 for( i = 0; i < 8; i++ ) {
6629 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6631 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6633 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6634 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6635 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6637 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6644 mb_qp = s->current_picture.qscale_table[mb_xy];
6645 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6646 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6647 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6648 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6649 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6650 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6651 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6652 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6653 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6654 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6655 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6656 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6659 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6660 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6661 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6662 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6663 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6667 for( dir = 0; dir < 2; dir++ )
6668 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6670 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6671 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6675 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6676 H264Context *h = *(void**)arg;
6677 MpegEncContext * const s = &h->s;
6678 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6682 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6683 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6685 if( h->pps.cabac ) {
6689 align_get_bits( &s->gb );
6692 ff_init_cabac_states( &h->cabac);
6693 ff_init_cabac_decoder( &h->cabac,
6694 s->gb.buffer + get_bits_count(&s->gb)/8,
6695 (get_bits_left(&s->gb) + 7)/8);
6696 /* calculate pre-state */
6697 for( i= 0; i < 460; i++ ) {
6699 if( h->slice_type_nos == FF_I_TYPE )
6700 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6702 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6705 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6707 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6712 int ret = decode_mb_cabac(h);
6714 //STOP_TIMER("decode_mb_cabac")
6716 if(ret>=0) hl_decode_mb(h);
6718 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6721 ret = decode_mb_cabac(h);
6723 if(ret>=0) hl_decode_mb(h);
6726 eos = get_cabac_terminate( &h->cabac );
6728 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6729 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6730 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6734 if( ++s->mb_x >= s->mb_width ) {
6736 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6738 if(FIELD_OR_MBAFF_PICTURE) {
6743 if( eos || s->mb_y >= s->mb_height ) {
6744 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6745 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6752 int ret = decode_mb_cavlc(h);
6754 if(ret>=0) hl_decode_mb(h);
6756 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6758 ret = decode_mb_cavlc(h);
6760 if(ret>=0) hl_decode_mb(h);
6765 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6766 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6771 if(++s->mb_x >= s->mb_width){
6773 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6775 if(FIELD_OR_MBAFF_PICTURE) {
6778 if(s->mb_y >= s->mb_height){
6779 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6781 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6782 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6786 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6793 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6794 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6795 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6796 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6800 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6809 for(;s->mb_y < s->mb_height; s->mb_y++){
6810 for(;s->mb_x < s->mb_width; s->mb_x++){
6811 int ret= decode_mb(h);
6816 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6817 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6822 if(++s->mb_x >= s->mb_width){
6824 if(++s->mb_y >= s->mb_height){
6825 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6826 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6830 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6837 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6838 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6839 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6843 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6850 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6853 return -1; //not reached
6856 static int decode_picture_timing(H264Context *h){
6857 MpegEncContext * const s = &h->s;
6858 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6859 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6860 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6862 if(h->sps.pic_struct_present_flag){
6863 unsigned int i, num_clock_ts;
6864 h->sei_pic_struct = get_bits(&s->gb, 4);
6867 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6870 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6872 for (i = 0 ; i < num_clock_ts ; i++){
6873 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6874 unsigned int full_timestamp_flag;
6875 h->sei_ct_type |= 1<<get_bits(&s->gb, 2);
6876 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6877 skip_bits(&s->gb, 5); /* counting_type */
6878 full_timestamp_flag = get_bits(&s->gb, 1);
6879 skip_bits(&s->gb, 1); /* discontinuity_flag */
6880 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6881 skip_bits(&s->gb, 8); /* n_frames */
6882 if(full_timestamp_flag){
6883 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6884 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6885 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6887 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6888 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6889 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6890 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6891 if(get_bits(&s->gb, 1)) /* hours_flag */
6892 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6896 if(h->sps.time_offset_length > 0)
6897 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6901 if(s->avctx->debug & FF_DEBUG_PICT_INFO)
6902 av_log(s->avctx, AV_LOG_DEBUG, "ct_type:%X pic_struct:%d\n", h->sei_ct_type, h->sei_pic_struct);
6907 static int decode_unregistered_user_data(H264Context *h, int size){
6908 MpegEncContext * const s = &h->s;
6909 uint8_t user_data[16+256];
6915 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6916 user_data[i]= get_bits(&s->gb, 8);
6920 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6921 if(e==1 && build>=0)
6922 h->x264_build= build;
6924 if(s->avctx->debug & FF_DEBUG_BUGS)
6925 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6928 skip_bits(&s->gb, 8);
6933 static int decode_recovery_point(H264Context *h){
6934 MpegEncContext * const s = &h->s;
6936 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6937 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6942 static int decode_buffering_period(H264Context *h){
6943 MpegEncContext * const s = &h->s;
6944 unsigned int sps_id;
6948 sps_id = get_ue_golomb_31(&s->gb);
6949 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6950 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6953 sps = h->sps_buffers[sps_id];
6955 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6956 if (sps->nal_hrd_parameters_present_flag) {
6957 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6958 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6959 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6962 if (sps->vcl_hrd_parameters_present_flag) {
6963 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6964 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6965 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6969 h->sei_buffering_period_present = 1;
6973 int ff_h264_decode_sei(H264Context *h){
6974 MpegEncContext * const s = &h->s;
6976 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6981 type+= show_bits(&s->gb, 8);
6982 }while(get_bits(&s->gb, 8) == 255);
6986 size+= show_bits(&s->gb, 8);
6987 }while(get_bits(&s->gb, 8) == 255);
6990 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6991 if(decode_picture_timing(h) < 0)
6994 case SEI_TYPE_USER_DATA_UNREGISTERED:
6995 if(decode_unregistered_user_data(h, size) < 0)
6998 case SEI_TYPE_RECOVERY_POINT:
6999 if(decode_recovery_point(h) < 0)
7002 case SEI_BUFFERING_PERIOD:
7003 if(decode_buffering_period(h) < 0)
7007 skip_bits(&s->gb, 8*size);
7010 //FIXME check bits here
7011 align_get_bits(&s->gb);
7017 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
7018 MpegEncContext * const s = &h->s;
7020 cpb_count = get_ue_golomb_31(&s->gb) + 1;
7022 if(cpb_count > 32U){
7023 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
7027 get_bits(&s->gb, 4); /* bit_rate_scale */
7028 get_bits(&s->gb, 4); /* cpb_size_scale */
7029 for(i=0; i<cpb_count; i++){
7030 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7031 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7032 get_bits1(&s->gb); /* cbr_flag */
7034 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7035 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7036 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
7037 sps->time_offset_length = get_bits(&s->gb, 5);
7038 sps->cpb_cnt = cpb_count;
7042 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7043 MpegEncContext * const s = &h->s;
7044 int aspect_ratio_info_present_flag;
7045 unsigned int aspect_ratio_idc;
7047 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7049 if( aspect_ratio_info_present_flag ) {
7050 aspect_ratio_idc= get_bits(&s->gb, 8);
7051 if( aspect_ratio_idc == EXTENDED_SAR ) {
7052 sps->sar.num= get_bits(&s->gb, 16);
7053 sps->sar.den= get_bits(&s->gb, 16);
7054 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
7055 sps->sar= pixel_aspect[aspect_ratio_idc];
7057 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7064 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7066 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7067 get_bits1(&s->gb); /* overscan_appropriate_flag */
7070 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7071 get_bits(&s->gb, 3); /* video_format */
7072 get_bits1(&s->gb); /* video_full_range_flag */
7073 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7074 get_bits(&s->gb, 8); /* colour_primaries */
7075 get_bits(&s->gb, 8); /* transfer_characteristics */
7076 get_bits(&s->gb, 8); /* matrix_coefficients */
7080 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7081 s->avctx->chroma_sample_location = get_ue_golomb(&s->gb)+1; /* chroma_sample_location_type_top_field */
7082 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7085 sps->timing_info_present_flag = get_bits1(&s->gb);
7086 if(sps->timing_info_present_flag){
7087 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7088 sps->time_scale = get_bits_long(&s->gb, 32);
7089 if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){
7090 av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick invalid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick);
7093 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7096 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7097 if(sps->nal_hrd_parameters_present_flag)
7098 if(decode_hrd_parameters(h, sps) < 0)
7100 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7101 if(sps->vcl_hrd_parameters_present_flag)
7102 if(decode_hrd_parameters(h, sps) < 0)
7104 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7105 get_bits1(&s->gb); /* low_delay_hrd_flag */
7106 sps->pic_struct_present_flag = get_bits1(&s->gb);
7108 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7109 if(sps->bitstream_restriction_flag){
7110 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7111 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7112 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7113 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7114 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7115 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7116 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7118 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7119 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7127 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7128 const uint8_t *jvt_list, const uint8_t *fallback_list){
7129 MpegEncContext * const s = &h->s;
7130 int i, last = 8, next = 8;
7131 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7132 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7133 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7135 for(i=0;i<size;i++){
7137 next = (last + get_se_golomb(&s->gb)) & 0xff;
7138 if(!i && !next){ /* matrix not written, we use the preset one */
7139 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7142 last = factors[scan[i]] = next ? next : last;
7146 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7147 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7148 MpegEncContext * const s = &h->s;
7149 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7150 const uint8_t *fallback[4] = {
7151 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7152 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7153 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7154 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7156 if(get_bits1(&s->gb)){
7157 sps->scaling_matrix_present |= is_sps;
7158 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7159 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7160 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7161 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7162 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7163 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7164 if(is_sps || pps->transform_8x8_mode){
7165 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7166 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7171 int ff_h264_decode_seq_parameter_set(H264Context *h){
7172 MpegEncContext * const s = &h->s;
7173 int profile_idc, level_idc;
7174 unsigned int sps_id;
7178 profile_idc= get_bits(&s->gb, 8);
7179 get_bits1(&s->gb); //constraint_set0_flag
7180 get_bits1(&s->gb); //constraint_set1_flag
7181 get_bits1(&s->gb); //constraint_set2_flag
7182 get_bits1(&s->gb); //constraint_set3_flag
7183 get_bits(&s->gb, 4); // reserved
7184 level_idc= get_bits(&s->gb, 8);
7185 sps_id= get_ue_golomb_31(&s->gb);
7187 if(sps_id >= MAX_SPS_COUNT) {
7188 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7191 sps= av_mallocz(sizeof(SPS));
7195 sps->profile_idc= profile_idc;
7196 sps->level_idc= level_idc;
7198 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7199 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7200 sps->scaling_matrix_present = 0;
7202 if(sps->profile_idc >= 100){ //high profile
7203 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7204 if(sps->chroma_format_idc == 3)
7205 sps->residual_color_transform_flag = get_bits1(&s->gb);
7206 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7207 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7208 sps->transform_bypass = get_bits1(&s->gb);
7209 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7211 sps->chroma_format_idc= 1;
7214 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7215 sps->poc_type= get_ue_golomb_31(&s->gb);
7217 if(sps->poc_type == 0){ //FIXME #define
7218 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7219 } else if(sps->poc_type == 1){//FIXME #define
7220 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7221 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7222 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7223 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7225 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7226 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7230 for(i=0; i<sps->poc_cycle_length; i++)
7231 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7232 }else if(sps->poc_type != 2){
7233 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7237 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7238 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7239 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7242 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7243 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7244 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7245 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7246 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7247 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7251 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7252 if(!sps->frame_mbs_only_flag)
7253 sps->mb_aff= get_bits1(&s->gb);
7257 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7259 #ifndef ALLOW_INTERLACE
7261 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7263 sps->crop= get_bits1(&s->gb);
7265 sps->crop_left = get_ue_golomb(&s->gb);
7266 sps->crop_right = get_ue_golomb(&s->gb);
7267 sps->crop_top = get_ue_golomb(&s->gb);
7268 sps->crop_bottom= get_ue_golomb(&s->gb);
7269 if(sps->crop_left || sps->crop_top){
7270 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7272 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7273 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7279 sps->crop_bottom= 0;
7282 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7283 if( sps->vui_parameters_present_flag )
7284 if (decode_vui_parameters(h, sps) < 0)
7287 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7288 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7289 sps_id, sps->profile_idc, sps->level_idc,
7291 sps->ref_frame_count,
7292 sps->mb_width, sps->mb_height,
7293 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7294 sps->direct_8x8_inference_flag ? "8B8" : "",
7295 sps->crop_left, sps->crop_right,
7296 sps->crop_top, sps->crop_bottom,
7297 sps->vui_parameters_present_flag ? "VUI" : "",
7298 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc],
7299 sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
7300 sps->timing_info_present_flag ? sps->time_scale : 0
7304 av_free(h->sps_buffers[sps_id]);
7305 h->sps_buffers[sps_id]= sps;
7314 build_qp_table(PPS *pps, int t, int index)
7317 for(i = 0; i < 52; i++)
7318 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7321 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7322 MpegEncContext * const s = &h->s;
7323 unsigned int pps_id= get_ue_golomb(&s->gb);
7326 if(pps_id >= MAX_PPS_COUNT) {
7327 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7331 pps= av_mallocz(sizeof(PPS));
7334 pps->sps_id= get_ue_golomb_31(&s->gb);
7335 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7336 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7340 pps->cabac= get_bits1(&s->gb);
7341 pps->pic_order_present= get_bits1(&s->gb);
7342 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7343 if(pps->slice_group_count > 1 ){
7344 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7345 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7346 switch(pps->mb_slice_group_map_type){
7349 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7350 | run_length[ i ] |1 |ue(v) |
7355 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7357 | top_left_mb[ i ] |1 |ue(v) |
7358 | bottom_right_mb[ i ] |1 |ue(v) |
7366 | slice_group_change_direction_flag |1 |u(1) |
7367 | slice_group_change_rate_minus1 |1 |ue(v) |
7372 | slice_group_id_cnt_minus1 |1 |ue(v) |
7373 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7375 | slice_group_id[ i ] |1 |u(v) |
7380 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7381 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7382 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7383 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7387 pps->weighted_pred= get_bits1(&s->gb);
7388 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7389 pps->init_qp= get_se_golomb(&s->gb) + 26;
7390 pps->init_qs= get_se_golomb(&s->gb) + 26;
7391 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7392 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7393 pps->constrained_intra_pred= get_bits1(&s->gb);
7394 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7396 pps->transform_8x8_mode= 0;
7397 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7398 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7399 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7401 if(get_bits_count(&s->gb) < bit_length){
7402 pps->transform_8x8_mode= get_bits1(&s->gb);
7403 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7404 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7406 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7409 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7410 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7411 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7412 h->pps.chroma_qp_diff= 1;
7414 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7415 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7416 pps_id, pps->sps_id,
7417 pps->cabac ? "CABAC" : "CAVLC",
7418 pps->slice_group_count,
7419 pps->ref_count[0], pps->ref_count[1],
7420 pps->weighted_pred ? "weighted" : "",
7421 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7422 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7423 pps->constrained_intra_pred ? "CONSTR" : "",
7424 pps->redundant_pic_cnt_present ? "REDU" : "",
7425 pps->transform_8x8_mode ? "8x8DCT" : ""
7429 av_free(h->pps_buffers[pps_id]);
7430 h->pps_buffers[pps_id]= pps;
7438 * Call decode_slice() for each context.
7440 * @param h h264 master context
7441 * @param context_count number of contexts to execute
7443 static void execute_decode_slices(H264Context *h, int context_count){
7444 MpegEncContext * const s = &h->s;
7445 AVCodecContext * const avctx= s->avctx;
7449 if (s->avctx->hwaccel)
7451 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7453 if(context_count == 1) {
7454 decode_slice(avctx, &h);
7456 for(i = 1; i < context_count; i++) {
7457 hx = h->thread_context[i];
7458 hx->s.error_recognition = avctx->error_recognition;
7459 hx->s.error_count = 0;
7462 avctx->execute(avctx, (void *)decode_slice,
7463 h->thread_context, NULL, context_count, sizeof(void*));
7465 /* pull back stuff from slices to master context */
7466 hx = h->thread_context[context_count - 1];
7467 s->mb_x = hx->s.mb_x;
7468 s->mb_y = hx->s.mb_y;
7469 s->dropable = hx->s.dropable;
7470 s->picture_structure = hx->s.picture_structure;
7471 for(i = 1; i < context_count; i++)
7472 h->s.error_count += h->thread_context[i]->s.error_count;
7477 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7478 MpegEncContext * const s = &h->s;
7479 AVCodecContext * const avctx= s->avctx;
7481 H264Context *hx; ///< thread context
7482 int context_count = 0;
7483 int next_avc= h->is_avc ? 0 : buf_size;
7485 h->max_contexts = avctx->thread_count;
7488 for(i=0; i<50; i++){
7489 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7492 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7493 h->current_slice = 0;
7494 if (!s->first_field)
7495 s->current_picture_ptr= NULL;
7507 if(buf_index >= next_avc) {
7508 if(buf_index >= buf_size) break;
7510 for(i = 0; i < h->nal_length_size; i++)
7511 nalsize = (nalsize << 8) | buf[buf_index++];
7512 if(nalsize <= 1 || nalsize > buf_size - buf_index){
7517 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7521 next_avc= buf_index + nalsize;
7523 // start code prefix search
7524 for(; buf_index + 3 < next_avc; buf_index++){
7525 // This should always succeed in the first iteration.
7526 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7530 if(buf_index+3 >= buf_size) break;
7533 if(buf_index >= next_avc) continue;
7536 hx = h->thread_context[context_count];
7538 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
7539 if (ptr==NULL || dst_length < 0){
7542 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7544 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7546 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7547 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7550 if (h->is_avc && (nalsize != consumed) && nalsize){
7551 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7554 buf_index += consumed;
7556 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7557 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7562 switch(hx->nal_unit_type){
7564 if (h->nal_unit_type != NAL_IDR_SLICE) {
7565 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7568 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7570 init_get_bits(&hx->s.gb, ptr, bit_length);
7572 hx->inter_gb_ptr= &hx->s.gb;
7573 hx->s.data_partitioning = 0;
7575 if((err = decode_slice_header(hx, h)))
7578 if (s->avctx->hwaccel && h->current_slice == 1) {
7579 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7583 s->current_picture_ptr->key_frame |=
7584 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7585 (h->sei_recovery_frame_cnt >= 0);
7586 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7587 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7588 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7589 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7590 && avctx->skip_frame < AVDISCARD_ALL){
7591 if(avctx->hwaccel) {
7592 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7595 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7596 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7597 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7598 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7604 init_get_bits(&hx->s.gb, ptr, bit_length);
7606 hx->inter_gb_ptr= NULL;
7608 if ((err = decode_slice_header(hx, h)) < 0)
7611 hx->s.data_partitioning = 1;
7615 init_get_bits(&hx->intra_gb, ptr, bit_length);
7616 hx->intra_gb_ptr= &hx->intra_gb;
7619 init_get_bits(&hx->inter_gb, ptr, bit_length);
7620 hx->inter_gb_ptr= &hx->inter_gb;
7622 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7623 && s->context_initialized
7625 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7626 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7627 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7628 && avctx->skip_frame < AVDISCARD_ALL)
7632 init_get_bits(&s->gb, ptr, bit_length);
7633 ff_h264_decode_sei(h);
7636 init_get_bits(&s->gb, ptr, bit_length);
7637 ff_h264_decode_seq_parameter_set(h);
7639 if(s->flags& CODEC_FLAG_LOW_DELAY)
7642 if(avctx->has_b_frames < 2)
7643 avctx->has_b_frames= !s->low_delay;
7646 init_get_bits(&s->gb, ptr, bit_length);
7648 ff_h264_decode_picture_parameter_set(h, bit_length);
7652 case NAL_END_SEQUENCE:
7653 case NAL_END_STREAM:
7654 case NAL_FILLER_DATA:
7656 case NAL_AUXILIARY_SLICE:
7659 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7662 if(context_count == h->max_contexts) {
7663 execute_decode_slices(h, context_count);
7668 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7670 /* Slice could not be decoded in parallel mode, copy down
7671 * NAL unit stuff to context 0 and restart. Note that
7672 * rbsp_buffer is not transferred, but since we no longer
7673 * run in parallel mode this should not be an issue. */
7674 h->nal_unit_type = hx->nal_unit_type;
7675 h->nal_ref_idc = hx->nal_ref_idc;
7681 execute_decode_slices(h, context_count);
7686 * returns the number of bytes consumed for building the current frame
7688 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7689 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7690 if(pos+10>buf_size) pos=buf_size; // oops ;)
7695 static int decode_frame(AVCodecContext *avctx,
7696 void *data, int *data_size,
7699 const uint8_t *buf = avpkt->data;
7700 int buf_size = avpkt->size;
7701 H264Context *h = avctx->priv_data;
7702 MpegEncContext *s = &h->s;
7703 AVFrame *pict = data;
7706 s->flags= avctx->flags;
7707 s->flags2= avctx->flags2;
7709 /* end of stream, output what is still in the buffers */
7710 if (buf_size == 0) {
7714 //FIXME factorize this with the output code below
7715 out = h->delayed_pic[0];
7717 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
7718 if(h->delayed_pic[i]->poc < out->poc){
7719 out = h->delayed_pic[i];
7723 for(i=out_idx; h->delayed_pic[i]; i++)
7724 h->delayed_pic[i] = h->delayed_pic[i+1];
7727 *data_size = sizeof(AVFrame);
7728 *pict= *(AVFrame*)out;
7734 if(h->is_avc && !h->got_avcC) {
7735 int i, cnt, nalsize;
7736 unsigned char *p = avctx->extradata;
7737 if(avctx->extradata_size < 7) {
7738 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7742 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7745 /* sps and pps in the avcC always have length coded with 2 bytes,
7746 so put a fake nal_length_size = 2 while parsing them */
7747 h->nal_length_size = 2;
7748 // Decode sps from avcC
7749 cnt = *(p+5) & 0x1f; // Number of sps
7751 for (i = 0; i < cnt; i++) {
7752 nalsize = AV_RB16(p) + 2;
7753 if(decode_nal_units(h, p, nalsize) < 0) {
7754 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7759 // Decode pps from avcC
7760 cnt = *(p++); // Number of pps
7761 for (i = 0; i < cnt; i++) {
7762 nalsize = AV_RB16(p) + 2;
7763 if(decode_nal_units(h, p, nalsize) != nalsize) {
7764 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7769 // Now store right nal length size, that will be use to parse all other nals
7770 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7771 // Do not reparse avcC
7775 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7776 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7781 buf_index=decode_nal_units(h, buf, buf_size);
7785 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7786 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7787 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7791 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7792 Picture *out = s->current_picture_ptr;
7793 Picture *cur = s->current_picture_ptr;
7794 int i, pics, out_of_order, out_idx;
7798 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7799 /* Wait for second field. */
7803 cur->interlaced_frame = 0;
7804 cur->repeat_pict = 0;
7806 /* Signal interlacing information externally. */
7807 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7809 if(h->sps.pic_struct_present_flag){
7810 switch (h->sei_pic_struct)
7812 case SEI_PIC_STRUCT_FRAME:
7814 case SEI_PIC_STRUCT_TOP_FIELD:
7815 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7816 cur->interlaced_frame = 1;
7818 case SEI_PIC_STRUCT_TOP_BOTTOM:
7819 case SEI_PIC_STRUCT_BOTTOM_TOP:
7820 if (FIELD_OR_MBAFF_PICTURE)
7821 cur->interlaced_frame = 1;
7823 // try to flag soft telecine progressive
7824 cur->interlaced_frame = h->prev_interlaced_frame;
7826 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7827 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7828 // Signal the possibility of telecined film externally (pic_struct 5,6)
7829 // From these hints, let the applications decide if they apply deinterlacing.
7830 cur->repeat_pict = 1;
7832 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7833 // Force progressive here, as doubling interlaced frame is a bad idea.
7834 cur->repeat_pict = 2;
7836 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7837 cur->repeat_pict = 4;
7841 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
7842 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
7844 /* Derive interlacing flag from used decoding process. */
7845 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7847 h->prev_interlaced_frame = cur->interlaced_frame;
7849 if (cur->field_poc[0] != cur->field_poc[1]){
7850 /* Derive top_field_first from field pocs. */
7851 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7853 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7854 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7855 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7856 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7857 cur->top_field_first = 1;
7859 cur->top_field_first = 0;
7861 /* Most likely progressive */
7862 cur->top_field_first = 0;
7866 //FIXME do something with unavailable reference frames
7868 /* Sort B-frames into display order */
7870 if(h->sps.bitstream_restriction_flag
7871 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7872 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7876 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7877 && !h->sps.bitstream_restriction_flag){
7878 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7883 while(h->delayed_pic[pics]) pics++;
7885 assert(pics <= MAX_DELAYED_PIC_COUNT);
7887 h->delayed_pic[pics++] = cur;
7888 if(cur->reference == 0)
7889 cur->reference = DELAYED_PIC_REF;
7891 out = h->delayed_pic[0];
7893 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
7894 if(h->delayed_pic[i]->poc < out->poc){
7895 out = h->delayed_pic[i];
7898 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
7899 h->outputed_poc= INT_MIN;
7900 out_of_order = out->poc < h->outputed_poc;
7902 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7904 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7906 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
7907 || cur->pict_type == FF_B_TYPE)))
7910 s->avctx->has_b_frames++;
7913 if(out_of_order || pics > s->avctx->has_b_frames){
7914 out->reference &= ~DELAYED_PIC_REF;
7915 for(i=out_idx; h->delayed_pic[i]; i++)
7916 h->delayed_pic[i] = h->delayed_pic[i+1];
7918 if(!out_of_order && pics > s->avctx->has_b_frames){
7919 *data_size = sizeof(AVFrame);
7921 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
7922 h->outputed_poc = INT_MIN;
7924 h->outputed_poc = out->poc;
7925 *pict= *(AVFrame*)out;
7927 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7932 assert(pict->data[0] || !*data_size);
7933 ff_print_debug_info(s, pict);
7934 //printf("out %d\n", (int)pict->data[0]);
7936 return get_consumed_bytes(s, buf_index, buf_size);
7939 static inline void fill_mb_avail(H264Context *h){
7940 MpegEncContext * const s = &h->s;
7941 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7944 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7945 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7946 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7952 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7953 h->mb_avail[4]= 1; //FIXME move out
7954 h->mb_avail[5]= 0; //FIXME move out
7962 #define SIZE (COUNT*40)
7968 // int int_temp[10000];
7970 AVCodecContext avctx;
7972 dsputil_init(&dsp, &avctx);
7974 init_put_bits(&pb, temp, SIZE);
7975 printf("testing unsigned exp golomb\n");
7976 for(i=0; i<COUNT; i++){
7978 set_ue_golomb(&pb, i);
7979 STOP_TIMER("set_ue_golomb");
7981 flush_put_bits(&pb);
7983 init_get_bits(&gb, temp, 8*SIZE);
7984 for(i=0; i<COUNT; i++){
7987 s= show_bits(&gb, 24);
7990 j= get_ue_golomb(&gb);
7992 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7995 STOP_TIMER("get_ue_golomb");
7999 init_put_bits(&pb, temp, SIZE);
8000 printf("testing signed exp golomb\n");
8001 for(i=0; i<COUNT; i++){
8003 set_se_golomb(&pb, i - COUNT/2);
8004 STOP_TIMER("set_se_golomb");
8006 flush_put_bits(&pb);
8008 init_get_bits(&gb, temp, 8*SIZE);
8009 for(i=0; i<COUNT; i++){
8012 s= show_bits(&gb, 24);
8015 j= get_se_golomb(&gb);
8016 if(j != i - COUNT/2){
8017 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8020 STOP_TIMER("get_se_golomb");
8024 printf("testing 4x4 (I)DCT\n");
8027 uint8_t src[16], ref[16];
8028 uint64_t error= 0, max_error=0;
8030 for(i=0; i<COUNT; i++){
8032 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8033 for(j=0; j<16; j++){
8034 ref[j]= random()%255;
8035 src[j]= random()%255;
8038 h264_diff_dct_c(block, src, ref, 4);
8041 for(j=0; j<16; j++){
8042 // printf("%d ", block[j]);
8043 block[j]= block[j]*4;
8044 if(j&1) block[j]= (block[j]*4 + 2)/5;
8045 if(j&4) block[j]= (block[j]*4 + 2)/5;
8049 s->dsp.h264_idct_add(ref, block, 4);
8050 /* for(j=0; j<16; j++){
8051 printf("%d ", ref[j]);
8055 for(j=0; j<16; j++){
8056 int diff= FFABS(src[j] - ref[j]);
8059 max_error= FFMAX(max_error, diff);
8062 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8063 printf("testing quantizer\n");
8064 for(qp=0; qp<52; qp++){
8066 src1_block[i]= src2_block[i]= random()%255;
8069 printf("Testing NAL layer\n");
8071 uint8_t bitstream[COUNT];
8072 uint8_t nal[COUNT*2];
8074 memset(&h, 0, sizeof(H264Context));
8076 for(i=0; i<COUNT; i++){
8084 for(j=0; j<COUNT; j++){
8085 bitstream[j]= (random() % 255) + 1;
8088 for(j=0; j<zeros; j++){
8089 int pos= random() % COUNT;
8090 while(bitstream[pos] == 0){
8099 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8101 printf("encoding failed\n");
8105 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8109 if(out_length != COUNT){
8110 printf("incorrect length %d %d\n", out_length, COUNT);
8114 if(consumed != nal_length){
8115 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8119 if(memcmp(bitstream, out, COUNT)){
8120 printf("mismatch\n");
8126 printf("Testing RBSP\n");
8134 av_cold void ff_h264_free_context(H264Context *h)
8138 free_tables(h); //FIXME cleanup init stuff perhaps
8140 for(i = 0; i < MAX_SPS_COUNT; i++)
8141 av_freep(h->sps_buffers + i);
8143 for(i = 0; i < MAX_PPS_COUNT; i++)
8144 av_freep(h->pps_buffers + i);
8147 static av_cold int decode_end(AVCodecContext *avctx)
8149 H264Context *h = avctx->priv_data;
8150 MpegEncContext *s = &h->s;
8152 ff_h264_free_context(h);
8156 // memset(h, 0, sizeof(H264Context));
8162 AVCodec h264_decoder = {
8166 sizeof(H264Context),
8171 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8173 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8174 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8177 #if CONFIG_H264_VDPAU_DECODER
8178 AVCodec h264_vdpau_decoder = {
8182 sizeof(H264Context),
8187 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8189 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8190 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
8194 #if CONFIG_SVQ3_DECODER