2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
42 #include "x86/h264_i386.h"
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
86 return (b&0xFFFF) + (a<<16);
88 return (a&0xFFFF) + (b<<16);
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
254 if(!(top_type & type_mask))
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
270 if(!(left_type[i] & type_mask))
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
570 if(!(h->top_samples_available&0x8000)){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if((h->left_samples_available&0x8080) != 0x8080){
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
780 }else if(top_ref==ref){
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
886 pred_motion(h, 0, 4, 0, 0, mx, my);
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
945 map[list][old_ref] = cur_ref;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
996 assert(h->ref_list[1][0].reference&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1029 }else{ // AFR/FR -> AFR/FR
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1061 if(h->direct_spatial_mv_pred){
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1073 if(refc == PART_NOT_AVAILABLE)
1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1085 for(list=0; list<2; list++){
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1089 mv[list][0] = mv[list][1] = 0;
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
1097 }else if(ref[0] < 0){
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1104 for(i8=0; i8<4; i8++){
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1113 h->sub_mb_type[i8] = sub_mb_type;
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[y8])
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1121 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1131 }else if(IS_16X16(*mb_type)){
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0])
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1139 && (h->x264_build>33 || !h->x264_build)))){
1141 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1157 h->sub_mb_type[i8] = sub_mb_type;
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1167 && (h->x264_build>33 || !h->x264_build)))){
1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1169 if(IS_SUB_8X8(sub_mb_type)){
1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 for(i4=0; i4<4; i4++){
1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1211 const int16_t (*l1mv)[2]= l1mv0;
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1215 h->sub_mb_type[i8] = sub_mb_type;
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type)){
1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col[0])){
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1258 const int scale = dist_scale_factor[ref0];
1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
1275 const int16_t (*l1mv)[2]= l1mv0;
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1279 h->sub_mb_type[i8] = sub_mb_type;
1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col[0])){
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1290 ref0 = map_col_to_list0[0][ref0];
1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1295 scale = dist_scale_factor[ref0];
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1298 if(IS_SUB_8X8(sub_mb_type)){
1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1305 for(i4=0; i4<4; i4++){
1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1318 static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1327 for(list=0; list<h->list_count; list++){
1329 if(!USES_LIST(mb_type, list))
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1336 if( h->pps.cabac ) {
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1356 if(IS_8X8(mb_type)){
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1370 // src[0]&0x80; //forbidden bit
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1383 for(i=0; i+1<length; i+=9){
1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1387 for(i=0; i+1<length; i+=5){
1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1391 if(i>0 && !src[i]) i--;
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1401 /* startcode, so we must be past the end */
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1417 dst= h->rbsp_buffer[bufidx];
1423 //printf("decoding esc\n");
1424 memcpy(dst, src, i);
1427 //remove escapes (very rare 1:2^22)
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
1432 if(src[si+2]==3){ //escape
1437 }else //next start code
1441 dst[di++]= src[si++];
1444 dst[di++]= src[si++];
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1450 *consumed= si + 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 //memset(block, 64, 2*256);
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1551 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1573 static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1599 return h->pps.chroma_qp_table[t][qscale];
1602 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1609 const int luma_xy= (mx&3) + ((my&3)<<2);
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
1617 const int pic_width = 16*s->mb_width;
1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1640 // chroma offset when predicting from a field of opposite parity
1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1649 src_cb= s->edge_emu_buffer;
1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cr= s->edge_emu_buffer;
1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1673 x_offset += 8*s->mb_x;
1674 y_offset += 8*(s->mb_y >> MB_FIELD);
1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1683 chroma_op= chroma_avg;
1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1694 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1706 x_offset += 8*s->mb_x;
1707 y_offset += 8*(s->mb_y >> MB_FIELD);
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1761 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1779 static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1795 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1799 MpegEncContext * const s = &h->s;
1800 const int mb_xy= h->mb_xy;
1801 const int mb_type= s->current_picture.mb_type[mb_xy];
1803 assert(IS_INTER(mb_type));
1805 prefetch_motion(h, 0);
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1810 &weight_op[0], &weight_avg[0],
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1819 &weight_op[1], &weight_avg[1],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1828 &weight_op[2], &weight_avg[2],
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1833 assert(IS_8X8(mb_type));
1836 const int sub_mb_type= h->sub_mb_type[i];
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1844 &weight_op[3], &weight_avg[3],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1853 &weight_op[4], &weight_avg[4],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[5], &weight_avg[5],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1866 assert(IS_SUB_4X4(sub_mb_type));
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1872 &weight_op[6], &weight_avg[6],
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1879 prefetch_motion(h, 1);
1882 static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1907 static av_cold void decode_init_vlc(void){
1908 static int done = 0;
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1918 &chroma_dc_coeff_token_len [0], 1, 1,
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1927 &coeff_token_len [i][0], 1, 1,
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
1948 for(i=0; i<15; i++){
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
1953 &total_zeros_len [i][0], 1, 1,
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1970 &run_len [6][0], 1, 1,
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context *h){
1981 av_freep(&h->intra4x4_pred_mode);
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
1986 av_freep(&h->direct_table);
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
1994 for(i = 0; i < MAX_THREADS; i++) {
1995 hx = h->thread_context[i];
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2000 av_freep(&hx->rbsp_buffer[1]);
2001 av_freep(&hx->rbsp_buffer[0]);
2002 if (i) av_freep(&h->thread_context[i]);
2006 static void init_dequant8_coeff_table(H264Context *h){
2008 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2009 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2010 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2012 for(i=0; i<2; i++ ){
2013 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2014 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2018 for(q=0; q<52; q++){
2019 int shift = div6[q];
2022 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2023 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2024 h->pps.scaling_matrix8[i][x]) << shift;
2029 static void init_dequant4_coeff_table(H264Context *h){
2031 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2032 for(i=0; i<6; i++ ){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2035 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2036 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2043 for(q=0; q<52; q++){
2044 int shift = div6[q] + 2;
2047 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2048 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2049 h->pps.scaling_matrix4[i][x]) << shift;
2054 static void init_dequant_tables(H264Context *h){
2056 init_dequant4_coeff_table(h);
2057 if(h->pps.transform_8x8_mode)
2058 init_dequant8_coeff_table(h);
2059 if(h->sps.transform_bypass){
2062 h->dequant4_coeff[i][0][x] = 1<<6;
2063 if(h->pps.transform_8x8_mode)
2066 h->dequant8_coeff[i][0][x] = 1<<6;
2073 * needs width/height
2075 static int alloc_tables(H264Context *h){
2076 MpegEncContext * const s = &h->s;
2077 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2080 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
2082 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t), fail)
2083 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
2084 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
2086 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
2087 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
2088 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
2089 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
2091 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2092 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2094 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
2095 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
2096 for(y=0; y<s->mb_height; y++){
2097 for(x=0; x<s->mb_width; x++){
2098 const int mb_xy= x + y*s->mb_stride;
2099 const int b_xy = 4*x + 4*y*h->b_stride;
2100 const int b8_xy= 2*x + 2*y*h->b8_stride;
2102 h->mb2b_xy [mb_xy]= b_xy;
2103 h->mb2b8_xy[mb_xy]= b8_xy;
2107 s->obmc_scratchpad = NULL;
2109 if(!h->dequant4_coeff[0])
2110 init_dequant_tables(h);
2119 * Mimic alloc_tables(), but for every context thread.
2121 static void clone_tables(H264Context *dst, H264Context *src){
2122 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2123 dst->non_zero_count = src->non_zero_count;
2124 dst->slice_table = src->slice_table;
2125 dst->cbp_table = src->cbp_table;
2126 dst->mb2b_xy = src->mb2b_xy;
2127 dst->mb2b8_xy = src->mb2b8_xy;
2128 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2129 dst->mvd_table[0] = src->mvd_table[0];
2130 dst->mvd_table[1] = src->mvd_table[1];
2131 dst->direct_table = src->direct_table;
2133 dst->s.obmc_scratchpad = NULL;
2134 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2139 * Allocate buffers which are not shared amongst multiple threads.
2141 static int context_init(H264Context *h){
2142 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
2143 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
2147 return -1; // free_tables will clean up for us
2150 static av_cold void common_init(H264Context *h){
2151 MpegEncContext * const s = &h->s;
2153 s->width = s->avctx->width;
2154 s->height = s->avctx->height;
2155 s->codec_id= s->avctx->codec->id;
2157 ff_h264_pred_init(&h->hpc, s->codec_id);
2159 h->dequant_coeff_pps= -1;
2160 s->unrestricted_mv=1;
2161 s->decode=1; //FIXME
2163 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2165 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2166 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2170 * Reset SEI values at the beginning of the frame.
2172 * @param h H.264 context.
2174 static void reset_sei(H264Context *h) {
2175 h->sei_recovery_frame_cnt = -1;
2176 h->sei_dpb_output_delay = 0;
2177 h->sei_cpb_removal_delay = -1;
2178 h->sei_buffering_period_present = 0;
2181 static av_cold int decode_init(AVCodecContext *avctx){
2182 H264Context *h= avctx->priv_data;
2183 MpegEncContext * const s = &h->s;
2185 MPV_decode_defaults(s);
2190 s->out_format = FMT_H264;
2191 s->workaround_bugs= avctx->workaround_bugs;
2194 // s->decode_mb= ff_h263_decode_mb;
2195 s->quarter_sample = 1;
2196 if(!avctx->has_b_frames)
2199 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2200 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2201 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
2205 if(avctx->extradata_size > 0 && avctx->extradata &&
2206 *(char *)avctx->extradata == 1){
2213 h->thread_context[0] = h;
2214 h->outputed_poc = INT_MIN;
2215 h->prev_poc_msb= 1<<16;
2217 if(avctx->codec_id == CODEC_ID_H264){
2218 if(avctx->ticks_per_frame == 1){
2219 s->avctx->time_base.den *=2;
2221 avctx->ticks_per_frame = 2;
2226 static int frame_start(H264Context *h){
2227 MpegEncContext * const s = &h->s;
2230 if(MPV_frame_start(s, s->avctx) < 0)
2232 ff_er_frame_start(s);
2234 * MPV_frame_start uses pict_type to derive key_frame.
2235 * This is incorrect for H.264; IDR markings must be used.
2236 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2237 * See decode_nal_units().
2239 s->current_picture_ptr->key_frame= 0;
2240 s->current_picture_ptr->mmco_reset= 0;
2242 assert(s->linesize && s->uvlinesize);
2244 for(i=0; i<16; i++){
2245 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2246 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2249 h->block_offset[16+i]=
2250 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2251 h->block_offset[24+16+i]=
2252 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2255 /* can't be in alloc_tables because linesize isn't known there.
2256 * FIXME: redo bipred weight to not require extra buffer? */
2257 for(i = 0; i < s->avctx->thread_count; i++)
2258 if(!h->thread_context[i]->s.obmc_scratchpad)
2259 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2261 /* some macroblocks will be accessed before they're available */
2262 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2263 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2265 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2267 // We mark the current picture as non-reference after allocating it, so
2268 // that if we break out due to an error it can be released automatically
2269 // in the next MPV_frame_start().
2270 // SVQ3 as well as most other codecs have only last/next/current and thus
2271 // get released even with set reference, besides SVQ3 and others do not
2272 // mark frames as reference later "naturally".
2273 if(s->codec_id != CODEC_ID_SVQ3)
2274 s->current_picture_ptr->reference= 0;
2276 s->current_picture_ptr->field_poc[0]=
2277 s->current_picture_ptr->field_poc[1]= INT_MAX;
2278 assert(s->current_picture_ptr->long_ref==0);
2283 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2284 MpegEncContext * const s = &h->s;
2293 src_cb -= uvlinesize;
2294 src_cr -= uvlinesize;
2296 if(!simple && FRAME_MBAFF){
2298 offset = MB_MBAFF ? 1 : 17;
2299 uvoffset= MB_MBAFF ? 1 : 9;
2301 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2303 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2304 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2305 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2310 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2311 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2312 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2313 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2319 top_idx = MB_MBAFF ? 0 : 1;
2321 step= MB_MBAFF ? 2 : 1;
2324 // There are two lines saved, the line above the the top macroblock of a pair,
2325 // and the line above the bottom macroblock
2326 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2327 for(i=1; i<17 - skiplast; i++){
2328 h->left_border[offset+i*step]= src_y[15+i* linesize];
2331 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2332 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2334 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2335 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2336 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2337 for(i=1; i<9 - skiplast; i++){
2338 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2339 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2341 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2342 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2346 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2347 MpegEncContext * const s = &h->s;
2358 if(!simple && FRAME_MBAFF){
2360 offset = MB_MBAFF ? 1 : 17;
2361 uvoffset= MB_MBAFF ? 1 : 9;
2365 top_idx = MB_MBAFF ? 0 : 1;
2367 step= MB_MBAFF ? 2 : 1;
2370 if(h->deblocking_filter == 2) {
2372 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2373 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2375 deblock_left = (s->mb_x > 0);
2376 deblock_top = (s->mb_y > !!MB_FIELD);
2379 src_y -= linesize + 1;
2380 src_cb -= uvlinesize + 1;
2381 src_cr -= uvlinesize + 1;
2383 #define XCHG(a,b,t,xchg)\
2390 for(i = !deblock_top; i<16; i++){
2391 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2393 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2397 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2399 if(s->mb_x+1 < s->mb_width){
2400 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2404 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2406 for(i = !deblock_top; i<8; i++){
2407 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2408 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2410 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2411 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2414 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2415 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2420 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2421 MpegEncContext * const s = &h->s;
2422 const int mb_x= s->mb_x;
2423 const int mb_y= s->mb_y;
2424 const int mb_xy= h->mb_xy;
2425 const int mb_type= s->current_picture.mb_type[mb_xy];
2426 uint8_t *dest_y, *dest_cb, *dest_cr;
2427 int linesize, uvlinesize /*dct_offset*/;
2429 int *block_offset = &h->block_offset[0];
2430 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2431 /* is_h264 should always be true if SVQ3 is disabled. */
2432 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2433 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2434 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2436 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2437 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2438 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2440 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2441 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2443 if (!simple && MB_FIELD) {
2444 linesize = h->mb_linesize = s->linesize * 2;
2445 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2446 block_offset = &h->block_offset[24];
2447 if(mb_y&1){ //FIXME move out of this function?
2448 dest_y -= s->linesize*15;
2449 dest_cb-= s->uvlinesize*7;
2450 dest_cr-= s->uvlinesize*7;
2454 for(list=0; list<h->list_count; list++){
2455 if(!USES_LIST(mb_type, list))
2457 if(IS_16X16(mb_type)){
2458 int8_t *ref = &h->ref_cache[list][scan8[0]];
2459 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2461 for(i=0; i<16; i+=4){
2462 int ref = h->ref_cache[list][scan8[i]];
2464 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2470 linesize = h->mb_linesize = s->linesize;
2471 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2472 // dct_offset = s->linesize * 16;
2475 if (!simple && IS_INTRA_PCM(mb_type)) {
2476 for (i=0; i<16; i++) {
2477 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2479 for (i=0; i<8; i++) {
2480 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2481 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2484 if(IS_INTRA(mb_type)){
2485 if(h->deblocking_filter)
2486 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2488 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2489 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2490 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2493 if(IS_INTRA4x4(mb_type)){
2494 if(simple || !s->encoding){
2495 if(IS_8x8DCT(mb_type)){
2496 if(transform_bypass){
2498 idct_add = s->dsp.add_pixels8;
2500 idct_dc_add = s->dsp.h264_idct8_dc_add;
2501 idct_add = s->dsp.h264_idct8_add;
2503 for(i=0; i<16; i+=4){
2504 uint8_t * const ptr= dest_y + block_offset[i];
2505 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2506 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2507 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2509 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2510 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2511 (h->topright_samples_available<<i)&0x4000, linesize);
2513 if(nnz == 1 && h->mb[i*16])
2514 idct_dc_add(ptr, h->mb + i*16, linesize);
2516 idct_add (ptr, h->mb + i*16, linesize);
2521 if(transform_bypass){
2523 idct_add = s->dsp.add_pixels4;
2525 idct_dc_add = s->dsp.h264_idct_dc_add;
2526 idct_add = s->dsp.h264_idct_add;
2528 for(i=0; i<16; i++){
2529 uint8_t * const ptr= dest_y + block_offset[i];
2530 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2532 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2533 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2537 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2538 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2539 assert(mb_y || linesize <= block_offset[i]);
2540 if(!topright_avail){
2541 tr= ptr[3 - linesize]*0x01010101;
2542 topright= (uint8_t*) &tr;
2544 topright= ptr + 4 - linesize;
2548 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2549 nnz = h->non_zero_count_cache[ scan8[i] ];
2552 if(nnz == 1 && h->mb[i*16])
2553 idct_dc_add(ptr, h->mb + i*16, linesize);
2555 idct_add (ptr, h->mb + i*16, linesize);
2557 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2564 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2566 if(!transform_bypass)
2567 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2569 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2571 if(h->deblocking_filter)
2572 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2574 hl_motion(h, dest_y, dest_cb, dest_cr,
2575 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2576 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2577 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2581 if(!IS_INTRA4x4(mb_type)){
2583 if(IS_INTRA16x16(mb_type)){
2584 if(transform_bypass){
2585 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2586 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2588 for(i=0; i<16; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2590 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2594 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2596 }else if(h->cbp&15){
2597 if(transform_bypass){
2598 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2599 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2600 for(i=0; i<16; i+=di){
2601 if(h->non_zero_count_cache[ scan8[i] ]){
2602 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2606 if(IS_8x8DCT(mb_type)){
2607 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2609 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2614 for(i=0; i<16; i++){
2615 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2616 uint8_t * const ptr= dest_y + block_offset[i];
2617 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2623 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2624 uint8_t *dest[2] = {dest_cb, dest_cr};
2625 if(transform_bypass){
2626 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2627 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2628 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2630 idct_add = s->dsp.add_pixels4;
2631 for(i=16; i<16+8; i++){
2632 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2633 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2637 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2638 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2640 idct_add = s->dsp.h264_idct_add;
2641 idct_dc_add = s->dsp.h264_idct_dc_add;
2642 for(i=16; i<16+8; i++){
2643 if(h->non_zero_count_cache[ scan8[i] ])
2644 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2645 else if(h->mb[i*16])
2646 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2649 for(i=16; i<16+8; i++){
2650 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2651 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2652 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2659 if(h->cbp || IS_INTRA(mb_type))
2660 s->dsp.clear_blocks(h->mb);
2662 if(h->deblocking_filter) {
2663 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2664 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2665 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2666 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2667 if (!simple && FRAME_MBAFF) {
2668 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2670 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2676 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2678 static void hl_decode_mb_simple(H264Context *h){
2679 hl_decode_mb_internal(h, 1);
2683 * Process a macroblock; this handles edge cases, such as interlacing.
2685 static void av_noinline hl_decode_mb_complex(H264Context *h){
2686 hl_decode_mb_internal(h, 0);
2689 static void hl_decode_mb(H264Context *h){
2690 MpegEncContext * const s = &h->s;
2691 const int mb_xy= h->mb_xy;
2692 const int mb_type= s->current_picture.mb_type[mb_xy];
2693 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2696 hl_decode_mb_complex(h);
2697 else hl_decode_mb_simple(h);
2700 static void pic_as_field(Picture *pic, const int parity){
2702 for (i = 0; i < 4; ++i) {
2703 if (parity == PICT_BOTTOM_FIELD)
2704 pic->data[i] += pic->linesize[i];
2705 pic->reference = parity;
2706 pic->linesize[i] *= 2;
2708 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2711 static int split_field_copy(Picture *dest, Picture *src,
2712 int parity, int id_add){
2713 int match = !!(src->reference & parity);
2717 if(parity != PICT_FRAME){
2718 pic_as_field(dest, parity);
2720 dest->pic_id += id_add;
2727 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2731 while(i[0]<len || i[1]<len){
2732 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2734 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2737 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2738 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2741 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2742 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2749 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2754 best_poc= dir ? INT_MIN : INT_MAX;
2756 for(i=0; i<len; i++){
2757 const int poc= src[i]->poc;
2758 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2760 sorted[out_i]= src[i];
2763 if(best_poc == (dir ? INT_MIN : INT_MAX))
2765 limit= sorted[out_i++]->poc - dir;
2771 * fills the default_ref_list.
2773 static int fill_default_ref_list(H264Context *h){
2774 MpegEncContext * const s = &h->s;
2777 if(h->slice_type_nos==FF_B_TYPE){
2778 Picture *sorted[32];
2783 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2785 cur_poc= s->current_picture_ptr->poc;
2787 for(list= 0; list<2; list++){
2788 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2789 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2791 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2792 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2795 if(len < h->ref_count[list])
2796 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2800 if(lens[0] == lens[1] && lens[1] > 1){
2801 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2803 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2806 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2807 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2809 if(len < h->ref_count[0])
2810 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2813 for (i=0; i<h->ref_count[0]; i++) {
2814 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2816 if(h->slice_type_nos==FF_B_TYPE){
2817 for (i=0; i<h->ref_count[1]; i++) {
2818 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2825 static void print_short_term(H264Context *h);
2826 static void print_long_term(H264Context *h);
2829 * Extract structure information about the picture described by pic_num in
2830 * the current decoding context (frame or field). Note that pic_num is
2831 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2832 * @param pic_num picture number for which to extract structure information
2833 * @param structure one of PICT_XXX describing structure of picture
2835 * @return frame number (short term) or long term index of picture
2836 * described by pic_num
2838 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2839 MpegEncContext * const s = &h->s;
2841 *structure = s->picture_structure;
2844 /* opposite field */
2845 *structure ^= PICT_FRAME;
2852 static int decode_ref_pic_list_reordering(H264Context *h){
2853 MpegEncContext * const s = &h->s;
2854 int list, index, pic_structure;
2856 print_short_term(h);
2859 for(list=0; list<h->list_count; list++){
2860 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2862 if(get_bits1(&s->gb)){
2863 int pred= h->curr_pic_num;
2865 for(index=0; ; index++){
2866 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2867 unsigned int pic_id;
2869 Picture *ref = NULL;
2871 if(reordering_of_pic_nums_idc==3)
2874 if(index >= h->ref_count[list]){
2875 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2879 if(reordering_of_pic_nums_idc<3){
2880 if(reordering_of_pic_nums_idc<2){
2881 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2884 if(abs_diff_pic_num > h->max_pic_num){
2885 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2889 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2890 else pred+= abs_diff_pic_num;
2891 pred &= h->max_pic_num - 1;
2893 frame_num = pic_num_extract(h, pred, &pic_structure);
2895 for(i= h->short_ref_count-1; i>=0; i--){
2896 ref = h->short_ref[i];
2897 assert(ref->reference);
2898 assert(!ref->long_ref);
2900 ref->frame_num == frame_num &&
2901 (ref->reference & pic_structure)
2909 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2911 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2914 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2917 ref = h->long_ref[long_idx];
2918 assert(!(ref && !ref->reference));
2919 if(ref && (ref->reference & pic_structure)){
2920 ref->pic_id= pic_id;
2921 assert(ref->long_ref);
2929 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2930 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2932 for(i=index; i+1<h->ref_count[list]; i++){
2933 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2936 for(; i > index; i--){
2937 h->ref_list[list][i]= h->ref_list[list][i-1];
2939 h->ref_list[list][index]= *ref;
2941 pic_as_field(&h->ref_list[list][index], pic_structure);
2945 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2951 for(list=0; list<h->list_count; list++){
2952 for(index= 0; index < h->ref_count[list]; index++){
2953 if(!h->ref_list[list][index].data[0]){
2954 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2955 if(h->default_ref_list[list][0].data[0])
2956 h->ref_list[list][index]= h->default_ref_list[list][0];
2966 static void fill_mbaff_ref_list(H264Context *h){
2968 for(list=0; list<2; list++){ //FIXME try list_count
2969 for(i=0; i<h->ref_count[list]; i++){
2970 Picture *frame = &h->ref_list[list][i];
2971 Picture *field = &h->ref_list[list][16+2*i];
2974 field[0].linesize[j] <<= 1;
2975 field[0].reference = PICT_TOP_FIELD;
2976 field[0].poc= field[0].field_poc[0];
2977 field[1] = field[0];
2979 field[1].data[j] += frame->linesize[j];
2980 field[1].reference = PICT_BOTTOM_FIELD;
2981 field[1].poc= field[1].field_poc[1];
2983 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2984 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2986 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2987 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2991 for(j=0; j<h->ref_count[1]; j++){
2992 for(i=0; i<h->ref_count[0]; i++)
2993 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2994 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2995 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2999 static int pred_weight_table(H264Context *h){
3000 MpegEncContext * const s = &h->s;
3002 int luma_def, chroma_def;
3005 h->use_weight_chroma= 0;
3006 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3007 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3008 luma_def = 1<<h->luma_log2_weight_denom;
3009 chroma_def = 1<<h->chroma_log2_weight_denom;
3011 for(list=0; list<2; list++){
3012 h->luma_weight_flag[list] = 0;
3013 h->chroma_weight_flag[list] = 0;
3014 for(i=0; i<h->ref_count[list]; i++){
3015 int luma_weight_flag, chroma_weight_flag;
3017 luma_weight_flag= get_bits1(&s->gb);
3018 if(luma_weight_flag){
3019 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3020 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3021 if( h->luma_weight[list][i] != luma_def
3022 || h->luma_offset[list][i] != 0) {
3024 h->luma_weight_flag[list]= 1;
3027 h->luma_weight[list][i]= luma_def;
3028 h->luma_offset[list][i]= 0;
3032 chroma_weight_flag= get_bits1(&s->gb);
3033 if(chroma_weight_flag){
3036 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3037 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3038 if( h->chroma_weight[list][i][j] != chroma_def
3039 || h->chroma_offset[list][i][j] != 0) {
3040 h->use_weight_chroma= 1;
3041 h->chroma_weight_flag[list]= 1;
3047 h->chroma_weight[list][i][j]= chroma_def;
3048 h->chroma_offset[list][i][j]= 0;
3053 if(h->slice_type_nos != FF_B_TYPE) break;
3055 h->use_weight= h->use_weight || h->use_weight_chroma;
3059 static void implicit_weight_table(H264Context *h){
3060 MpegEncContext * const s = &h->s;
3062 int cur_poc = s->current_picture_ptr->poc;
3064 for (i = 0; i < 2; i++) {
3065 h->luma_weight_flag[i] = 0;
3066 h->chroma_weight_flag[i] = 0;
3069 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3070 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3072 h->use_weight_chroma= 0;
3077 h->use_weight_chroma= 2;
3078 h->luma_log2_weight_denom= 5;
3079 h->chroma_log2_weight_denom= 5;
3081 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3082 int poc0 = h->ref_list[0][ref0].poc;
3083 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3084 int poc1 = h->ref_list[1][ref1].poc;
3085 int td = av_clip(poc1 - poc0, -128, 127);
3087 int tb = av_clip(cur_poc - poc0, -128, 127);
3088 int tx = (16384 + (FFABS(td) >> 1)) / td;
3089 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3090 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3091 h->implicit_weight[ref0][ref1] = 32;
3093 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3095 h->implicit_weight[ref0][ref1] = 32;
3101 * Mark a picture as no longer needed for reference. The refmask
3102 * argument allows unreferencing of individual fields or the whole frame.
3103 * If the picture becomes entirely unreferenced, but is being held for
3104 * display purposes, it is marked as such.
3105 * @param refmask mask of fields to unreference; the mask is bitwise
3106 * anded with the reference marking of pic
3107 * @return non-zero if pic becomes entirely unreferenced (except possibly
3108 * for display purposes) zero if one of the fields remains in
3111 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3113 if (pic->reference &= refmask) {
3116 for(i = 0; h->delayed_pic[i]; i++)
3117 if(pic == h->delayed_pic[i]){
3118 pic->reference=DELAYED_PIC_REF;
3126 * instantaneous decoder refresh.
3128 static void idr(H264Context *h){
3131 for(i=0; i<16; i++){
3132 remove_long(h, i, 0);
3134 assert(h->long_ref_count==0);
3136 for(i=0; i<h->short_ref_count; i++){
3137 unreference_pic(h, h->short_ref[i], 0);
3138 h->short_ref[i]= NULL;
3140 h->short_ref_count=0;
3141 h->prev_frame_num= 0;
3142 h->prev_frame_num_offset= 0;
3147 /* forget old pics after a seek */
3148 static void flush_dpb(AVCodecContext *avctx){
3149 H264Context *h= avctx->priv_data;
3151 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3152 if(h->delayed_pic[i])
3153 h->delayed_pic[i]->reference= 0;
3154 h->delayed_pic[i]= NULL;
3156 h->outputed_poc= INT_MIN;
3157 h->prev_interlaced_frame = 1;
3159 if(h->s.current_picture_ptr)
3160 h->s.current_picture_ptr->reference= 0;
3161 h->s.first_field= 0;
3163 ff_mpeg_flush(avctx);
3167 * Find a Picture in the short term reference list by frame number.
3168 * @param frame_num frame number to search for
3169 * @param idx the index into h->short_ref where returned picture is found
3170 * undefined if no picture found.
3171 * @return pointer to the found picture, or NULL if no pic with the provided
3172 * frame number is found
3174 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3175 MpegEncContext * const s = &h->s;
3178 for(i=0; i<h->short_ref_count; i++){
3179 Picture *pic= h->short_ref[i];
3180 if(s->avctx->debug&FF_DEBUG_MMCO)
3181 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3182 if(pic->frame_num == frame_num) {
3191 * Remove a picture from the short term reference list by its index in
3192 * that list. This does no checking on the provided index; it is assumed
3193 * to be valid. Other list entries are shifted down.
3194 * @param i index into h->short_ref of picture to remove.
3196 static void remove_short_at_index(H264Context *h, int i){
3197 assert(i >= 0 && i < h->short_ref_count);
3198 h->short_ref[i]= NULL;
3199 if (--h->short_ref_count)
3200 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3205 * @return the removed picture or NULL if an error occurs
3207 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3208 MpegEncContext * const s = &h->s;
3212 if(s->avctx->debug&FF_DEBUG_MMCO)
3213 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3215 pic = find_short(h, frame_num, &i);
3217 if(unreference_pic(h, pic, ref_mask))
3218 remove_short_at_index(h, i);
3225 * Remove a picture from the long term reference list by its index in
3227 * @return the removed picture or NULL if an error occurs
3229 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3232 pic= h->long_ref[i];
3234 if(unreference_pic(h, pic, ref_mask)){
3235 assert(h->long_ref[i]->long_ref == 1);
3236 h->long_ref[i]->long_ref= 0;
3237 h->long_ref[i]= NULL;
3238 h->long_ref_count--;
3246 * print short term list
3248 static void print_short_term(H264Context *h) {
3250 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3251 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3252 for(i=0; i<h->short_ref_count; i++){
3253 Picture *pic= h->short_ref[i];
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3260 * print long term list
3262 static void print_long_term(H264Context *h) {
3264 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3265 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3266 for(i = 0; i < 16; i++){
3267 Picture *pic= h->long_ref[i];
3269 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3276 * Executes the reference picture marking (memory management control operations).
3278 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3279 MpegEncContext * const s = &h->s;
3280 int i, av_uninit(j);
3281 int current_ref_assigned=0;
3282 Picture *av_uninit(pic);
3284 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3285 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3287 for(i=0; i<mmco_count; i++){
3288 int av_uninit(structure), av_uninit(frame_num);
3289 if(s->avctx->debug&FF_DEBUG_MMCO)
3290 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3292 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3293 || mmco[i].opcode == MMCO_SHORT2LONG){
3294 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3295 pic = find_short(h, frame_num, &j);
3297 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3298 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3299 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3304 switch(mmco[i].opcode){
3305 case MMCO_SHORT2UNUSED:
3306 if(s->avctx->debug&FF_DEBUG_MMCO)
3307 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3308 remove_short(h, frame_num, structure ^ PICT_FRAME);
3310 case MMCO_SHORT2LONG:
3311 if (h->long_ref[mmco[i].long_arg] != pic)
3312 remove_long(h, mmco[i].long_arg, 0);
3314 remove_short_at_index(h, j);
3315 h->long_ref[ mmco[i].long_arg ]= pic;
3316 if (h->long_ref[ mmco[i].long_arg ]){
3317 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3318 h->long_ref_count++;
3321 case MMCO_LONG2UNUSED:
3322 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3323 pic = h->long_ref[j];
3325 remove_long(h, j, structure ^ PICT_FRAME);
3326 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3327 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3330 // Comment below left from previous code as it is an interresting note.
3331 /* First field in pair is in short term list or
3332 * at a different long term index.
3333 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3334 * Report the problem and keep the pair where it is,
3335 * and mark this field valid.
3338 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3339 remove_long(h, mmco[i].long_arg, 0);
3341 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3342 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3343 h->long_ref_count++;
3346 s->current_picture_ptr->reference |= s->picture_structure;
3347 current_ref_assigned=1;
3349 case MMCO_SET_MAX_LONG:
3350 assert(mmco[i].long_arg <= 16);
3351 // just remove the long term which index is greater than new max
3352 for(j = mmco[i].long_arg; j<16; j++){
3353 remove_long(h, j, 0);
3357 while(h->short_ref_count){
3358 remove_short(h, h->short_ref[0]->frame_num, 0);
3360 for(j = 0; j < 16; j++) {
3361 remove_long(h, j, 0);
3363 s->current_picture_ptr->poc=
3364 s->current_picture_ptr->field_poc[0]=
3365 s->current_picture_ptr->field_poc[1]=
3369 s->current_picture_ptr->frame_num= 0;
3370 s->current_picture_ptr->mmco_reset=1;
3376 if (!current_ref_assigned) {
3377 /* Second field of complementary field pair; the first field of
3378 * which is already referenced. If short referenced, it
3379 * should be first entry in short_ref. If not, it must exist
3380 * in long_ref; trying to put it on the short list here is an
3381 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3383 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3384 /* Just mark the second field valid */
3385 s->current_picture_ptr->reference = PICT_FRAME;
3386 } else if (s->current_picture_ptr->long_ref) {
3387 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3388 "assignment for second field "
3389 "in complementary field pair "
3390 "(first field is long term)\n");
3392 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3394 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3397 if(h->short_ref_count)
3398 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3400 h->short_ref[0]= s->current_picture_ptr;
3401 h->short_ref_count++;
3402 s->current_picture_ptr->reference |= s->picture_structure;
3406 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3408 /* We have too many reference frames, probably due to corrupted
3409 * stream. Need to discard one frame. Prevents overrun of the
3410 * short_ref and long_ref buffers.
3412 av_log(h->s.avctx, AV_LOG_ERROR,
3413 "number of reference frames exceeds max (probably "
3414 "corrupt input), discarding one\n");
3416 if (h->long_ref_count && !h->short_ref_count) {
3417 for (i = 0; i < 16; ++i)
3422 remove_long(h, i, 0);
3424 pic = h->short_ref[h->short_ref_count - 1];
3425 remove_short(h, pic->frame_num, 0);
3429 print_short_term(h);
3434 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3435 MpegEncContext * const s = &h->s;
3439 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3440 s->broken_link= get_bits1(gb) -1;
3442 h->mmco[0].opcode= MMCO_LONG;
3443 h->mmco[0].long_arg= 0;
3447 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3448 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3449 MMCOOpcode opcode= get_ue_golomb_31(gb);
3451 h->mmco[i].opcode= opcode;
3452 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3453 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3454 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3455 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3459 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3460 unsigned int long_arg= get_ue_golomb_31(gb);
3461 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3462 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3465 h->mmco[i].long_arg= long_arg;
3468 if(opcode > (unsigned)MMCO_LONG){
3469 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3472 if(opcode == MMCO_END)
3477 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3479 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3480 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3481 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3482 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3484 if (FIELD_PICTURE) {
3485 h->mmco[0].short_pic_num *= 2;
3486 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3487 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3497 static int init_poc(H264Context *h){
3498 MpegEncContext * const s = &h->s;
3499 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3501 Picture *cur = s->current_picture_ptr;
3503 h->frame_num_offset= h->prev_frame_num_offset;
3504 if(h->frame_num < h->prev_frame_num)
3505 h->frame_num_offset += max_frame_num;
3507 if(h->sps.poc_type==0){
3508 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3510 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3511 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3512 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3513 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3515 h->poc_msb = h->prev_poc_msb;
3516 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3518 field_poc[1] = h->poc_msb + h->poc_lsb;
3519 if(s->picture_structure == PICT_FRAME)
3520 field_poc[1] += h->delta_poc_bottom;
3521 }else if(h->sps.poc_type==1){
3522 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3525 if(h->sps.poc_cycle_length != 0)
3526 abs_frame_num = h->frame_num_offset + h->frame_num;
3530 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3533 expected_delta_per_poc_cycle = 0;
3534 for(i=0; i < h->sps.poc_cycle_length; i++)
3535 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3537 if(abs_frame_num > 0){
3538 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3539 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3541 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3542 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3543 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3547 if(h->nal_ref_idc == 0)
3548 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3550 field_poc[0] = expectedpoc + h->delta_poc[0];
3551 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3553 if(s->picture_structure == PICT_FRAME)
3554 field_poc[1] += h->delta_poc[1];
3556 int poc= 2*(h->frame_num_offset + h->frame_num);
3565 if(s->picture_structure != PICT_BOTTOM_FIELD)
3566 s->current_picture_ptr->field_poc[0]= field_poc[0];
3567 if(s->picture_structure != PICT_TOP_FIELD)
3568 s->current_picture_ptr->field_poc[1]= field_poc[1];
3569 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3576 * initialize scan tables
3578 static void init_scan_tables(H264Context *h){
3579 MpegEncContext * const s = &h->s;
3581 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3582 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3583 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3585 for(i=0; i<16; i++){
3586 #define T(x) (x>>2) | ((x<<2) & 0xF)
3587 h->zigzag_scan[i] = T(zigzag_scan[i]);
3588 h-> field_scan[i] = T( field_scan[i]);
3592 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3593 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3594 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3595 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3596 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3598 for(i=0; i<64; i++){
3599 #define T(x) (x>>3) | ((x&7)<<3)
3600 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3601 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3602 h->field_scan8x8[i] = T(field_scan8x8[i]);
3603 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3607 if(h->sps.transform_bypass){ //FIXME same ugly
3608 h->zigzag_scan_q0 = zigzag_scan;
3609 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3610 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3611 h->field_scan_q0 = field_scan;
3612 h->field_scan8x8_q0 = field_scan8x8;
3613 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3615 h->zigzag_scan_q0 = h->zigzag_scan;
3616 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3617 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3618 h->field_scan_q0 = h->field_scan;
3619 h->field_scan8x8_q0 = h->field_scan8x8;
3620 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3624 static void field_end(H264Context *h){
3625 MpegEncContext * const s = &h->s;
3626 AVCodecContext * const avctx= s->avctx;
3629 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
3630 s->current_picture_ptr->pict_type= s->pict_type;
3632 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3633 ff_vdpau_h264_set_reference_frames(s);
3636 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
3637 h->prev_poc_msb= h->poc_msb;
3638 h->prev_poc_lsb= h->poc_lsb;
3640 h->prev_frame_num_offset= h->frame_num_offset;
3641 h->prev_frame_num= h->frame_num;
3643 if (avctx->hwaccel) {
3644 if (avctx->hwaccel->end_frame(avctx) < 0)
3645 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
3648 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3649 ff_vdpau_h264_picture_complete(s);
3652 * FIXME: Error handling code does not seem to support interlaced
3653 * when slices span multiple rows
3654 * The ff_er_add_slice calls don't work right for bottom
3655 * fields; they cause massive erroneous error concealing
3656 * Error marking covers both fields (top and bottom).
3657 * This causes a mismatched s->error_count
3658 * and a bad error table. Further, the error count goes to
3659 * INT_MAX when called for bottom field, because mb_y is
3660 * past end by one (callers fault) and resync_mb_y != 0
3661 * causes problems for the first MB line, too.
3672 * Replicates H264 "master" context to thread contexts.
3674 static void clone_slice(H264Context *dst, H264Context *src)
3676 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3677 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3678 dst->s.current_picture = src->s.current_picture;
3679 dst->s.linesize = src->s.linesize;
3680 dst->s.uvlinesize = src->s.uvlinesize;
3681 dst->s.first_field = src->s.first_field;
3683 dst->prev_poc_msb = src->prev_poc_msb;
3684 dst->prev_poc_lsb = src->prev_poc_lsb;
3685 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3686 dst->prev_frame_num = src->prev_frame_num;
3687 dst->short_ref_count = src->short_ref_count;
3689 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3690 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3691 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3692 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3694 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3695 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3699 * decodes a slice header.
3700 * This will also call MPV_common_init() and frame_start() as needed.
3702 * @param h h264context
3703 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3705 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3707 static int decode_slice_header(H264Context *h, H264Context *h0){
3708 MpegEncContext * const s = &h->s;
3709 MpegEncContext * const s0 = &h0->s;
3710 unsigned int first_mb_in_slice;
3711 unsigned int pps_id;
3712 int num_ref_idx_active_override_flag;
3713 unsigned int slice_type, tmp, i, j;
3714 int default_ref_list_done = 0;
3715 int last_pic_structure;
3717 s->dropable= h->nal_ref_idc == 0;
3719 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3720 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3721 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3723 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3724 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3727 first_mb_in_slice= get_ue_golomb(&s->gb);
3729 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
3730 if(h0->current_slice && FIELD_PICTURE){
3734 h0->current_slice = 0;
3735 if (!s0->first_field)
3736 s->current_picture_ptr= NULL;
3739 slice_type= get_ue_golomb_31(&s->gb);
3741 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3746 h->slice_type_fixed=1;
3748 h->slice_type_fixed=0;
3750 slice_type= golomb_to_pict_type[ slice_type ];
3751 if (slice_type == FF_I_TYPE
3752 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3753 default_ref_list_done = 1;
3755 h->slice_type= slice_type;
3756 h->slice_type_nos= slice_type & 3;
3758 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3759 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3760 av_log(h->s.avctx, AV_LOG_ERROR,
3761 "B picture before any references, skipping\n");
3765 pps_id= get_ue_golomb(&s->gb);
3766 if(pps_id>=MAX_PPS_COUNT){
3767 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3770 if(!h0->pps_buffers[pps_id]) {
3771 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
3774 h->pps= *h0->pps_buffers[pps_id];
3776 if(!h0->sps_buffers[h->pps.sps_id]) {
3777 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
3780 h->sps = *h0->sps_buffers[h->pps.sps_id];
3782 if(h == h0 && h->dequant_coeff_pps != pps_id){
3783 h->dequant_coeff_pps = pps_id;
3784 init_dequant_tables(h);
3787 s->mb_width= h->sps.mb_width;
3788 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3790 h->b_stride= s->mb_width*4;
3791 h->b8_stride= s->mb_width*2;
3793 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3794 if(h->sps.frame_mbs_only_flag)
3795 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3797 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3799 if (s->context_initialized
3800 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3802 return -1; // width / height changed during parallelized decoding
3804 flush_dpb(s->avctx);
3807 if (!s->context_initialized) {
3809 return -1; // we cant (re-)initialize context during parallel decoding
3810 if (MPV_common_init(s) < 0)
3813 h->prev_interlaced_frame = 1;
3815 init_scan_tables(h);
3818 for(i = 1; i < s->avctx->thread_count; i++) {
3820 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3821 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3822 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3825 init_scan_tables(c);
3829 for(i = 0; i < s->avctx->thread_count; i++)
3830 if(context_init(h->thread_context[i]) < 0)
3833 s->avctx->width = s->width;
3834 s->avctx->height = s->height;
3835 s->avctx->sample_aspect_ratio= h->sps.sar;
3836 if(!s->avctx->sample_aspect_ratio.den)
3837 s->avctx->sample_aspect_ratio.den = 1;
3839 if(h->sps.timing_info_present_flag){
3840 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3841 if(h->x264_build > 0 && h->x264_build < 44)
3842 s->avctx->time_base.den *= 2;
3843 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3844 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3848 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3851 h->mb_aff_frame = 0;
3852 last_pic_structure = s0->picture_structure;
3853 if(h->sps.frame_mbs_only_flag){
3854 s->picture_structure= PICT_FRAME;
3856 if(get_bits1(&s->gb)) { //field_pic_flag
3857 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3859 s->picture_structure= PICT_FRAME;
3860 h->mb_aff_frame = h->sps.mb_aff;
3863 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3865 if(h0->current_slice == 0){
3866 while(h->frame_num != h->prev_frame_num &&
3867 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3868 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3869 if (frame_start(h) < 0)
3871 h->prev_frame_num++;
3872 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3873 s->current_picture_ptr->frame_num= h->prev_frame_num;
3874 execute_ref_pic_marking(h, NULL, 0);
3877 /* See if we have a decoded first field looking for a pair... */
3878 if (s0->first_field) {
3879 assert(s0->current_picture_ptr);
3880 assert(s0->current_picture_ptr->data[0]);
3881 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3883 /* figure out if we have a complementary field pair */
3884 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3886 * Previous field is unmatched. Don't display it, but let it
3887 * remain for reference if marked as such.
3889 s0->current_picture_ptr = NULL;
3890 s0->first_field = FIELD_PICTURE;
3893 if (h->nal_ref_idc &&
3894 s0->current_picture_ptr->reference &&
3895 s0->current_picture_ptr->frame_num != h->frame_num) {
3897 * This and previous field were reference, but had
3898 * different frame_nums. Consider this field first in
3899 * pair. Throw away previous field except for reference
3902 s0->first_field = 1;
3903 s0->current_picture_ptr = NULL;
3906 /* Second field in complementary pair */
3907 s0->first_field = 0;
3912 /* Frame or first field in a potentially complementary pair */
3913 assert(!s0->current_picture_ptr);
3914 s0->first_field = FIELD_PICTURE;
3917 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3918 s0->first_field = 0;
3925 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3927 assert(s->mb_num == s->mb_width * s->mb_height);
3928 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3929 first_mb_in_slice >= s->mb_num){
3930 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3933 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3934 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3935 if (s->picture_structure == PICT_BOTTOM_FIELD)
3936 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3937 assert(s->mb_y < s->mb_height);
3939 if(s->picture_structure==PICT_FRAME){
3940 h->curr_pic_num= h->frame_num;
3941 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3943 h->curr_pic_num= 2*h->frame_num + 1;
3944 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3947 if(h->nal_unit_type == NAL_IDR_SLICE){
3948 get_ue_golomb(&s->gb); /* idr_pic_id */
3951 if(h->sps.poc_type==0){
3952 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3954 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3955 h->delta_poc_bottom= get_se_golomb(&s->gb);
3959 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3960 h->delta_poc[0]= get_se_golomb(&s->gb);
3962 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3963 h->delta_poc[1]= get_se_golomb(&s->gb);
3968 if(h->pps.redundant_pic_cnt_present){
3969 h->redundant_pic_count= get_ue_golomb(&s->gb);
3972 //set defaults, might be overridden a few lines later
3973 h->ref_count[0]= h->pps.ref_count[0];
3974 h->ref_count[1]= h->pps.ref_count[1];
3976 if(h->slice_type_nos != FF_I_TYPE){
3977 if(h->slice_type_nos == FF_B_TYPE){
3978 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3980 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3982 if(num_ref_idx_active_override_flag){
3983 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3984 if(h->slice_type_nos==FF_B_TYPE)
3985 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3987 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3988 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3989 h->ref_count[0]= h->ref_count[1]= 1;
3993 if(h->slice_type_nos == FF_B_TYPE)
4000 if(!default_ref_list_done){
4001 fill_default_ref_list(h);
4004 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
4007 if(h->slice_type_nos!=FF_I_TYPE){
4008 s->last_picture_ptr= &h->ref_list[0][0];
4009 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
4011 if(h->slice_type_nos==FF_B_TYPE){
4012 s->next_picture_ptr= &h->ref_list[1][0];
4013 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
4016 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4017 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4018 pred_weight_table(h);
4019 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4020 implicit_weight_table(h);
4023 for (i = 0; i < 2; i++) {
4024 h->luma_weight_flag[i] = 0;
4025 h->chroma_weight_flag[i] = 0;
4030 decode_ref_pic_marking(h0, &s->gb);
4033 fill_mbaff_ref_list(h);
4035 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
4036 direct_dist_scale_factor(h);
4037 direct_ref_list_init(h);
4039 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4040 tmp = get_ue_golomb_31(&s->gb);
4042 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4045 h->cabac_init_idc= tmp;
4048 h->last_qscale_diff = 0;
4049 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4051 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4055 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4056 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4057 //FIXME qscale / qp ... stuff
4058 if(h->slice_type == FF_SP_TYPE){
4059 get_bits1(&s->gb); /* sp_for_switch_flag */
4061 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4062 get_se_golomb(&s->gb); /* slice_qs_delta */
4065 h->deblocking_filter = 1;
4066 h->slice_alpha_c0_offset = 0;
4067 h->slice_beta_offset = 0;
4068 if( h->pps.deblocking_filter_parameters_present ) {
4069 tmp= get_ue_golomb_31(&s->gb);
4071 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4074 h->deblocking_filter= tmp;
4075 if(h->deblocking_filter < 2)
4076 h->deblocking_filter^= 1; // 1<->0
4078 if( h->deblocking_filter ) {
4079 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4080 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4084 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4085 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4086 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4087 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4088 h->deblocking_filter= 0;
4090 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4091 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4092 /* Cheat slightly for speed:
4093 Do not bother to deblock across slices. */
4094 h->deblocking_filter = 2;
4096 h0->max_contexts = 1;
4097 if(!h0->single_decode_warning) {
4098 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4099 h0->single_decode_warning = 1;
4102 return 1; // deblocking switched inside frame
4107 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4108 slice_group_change_cycle= get_bits(&s->gb, ?);
4111 h0->last_slice_type = slice_type;
4112 h->slice_num = ++h0->current_slice;
4113 if(h->slice_num >= MAX_SLICES){
4114 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4118 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4122 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4123 +(h->ref_list[j][i].reference&3);
4126 for(i=16; i<48; i++)
4127 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4128 +(h->ref_list[j][i].reference&3);
4131 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4132 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4134 s->avctx->refs= h->sps.ref_frame_count;
4136 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4137 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4139 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4141 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4142 pps_id, h->frame_num,
4143 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4144 h->ref_count[0], h->ref_count[1],
4146 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4148 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4149 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4159 static inline int get_level_prefix(GetBitContext *gb){
4163 OPEN_READER(re, gb);
4164 UPDATE_CACHE(re, gb);
4165 buf=GET_CACHE(re, gb);
4167 log= 32 - av_log2(buf);
4169 print_bin(buf>>(32-log), log);
4170 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4173 LAST_SKIP_BITS(re, gb, log);
4174 CLOSE_READER(re, gb);
4179 static inline int get_dct8x8_allowed(H264Context *h){
4180 if(h->sps.direct_8x8_inference_flag)
4181 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4183 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4187 * decodes a residual block.
4188 * @param n block index
4189 * @param scantable scantable
4190 * @param max_coeff number of coefficients in the block
4191 * @return <0 if an error occurred
4193 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4194 MpegEncContext * const s = &h->s;
4195 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4197 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4199 //FIXME put trailing_onex into the context
4201 if(n == CHROMA_DC_BLOCK_INDEX){
4202 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4203 total_coeff= coeff_token>>2;
4205 if(n == LUMA_DC_BLOCK_INDEX){
4206 total_coeff= pred_non_zero_count(h, 0);
4207 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4208 total_coeff= coeff_token>>2;
4210 total_coeff= pred_non_zero_count(h, n);
4211 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4212 total_coeff= coeff_token>>2;
4213 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4217 //FIXME set last_non_zero?
4221 if(total_coeff > (unsigned)max_coeff) {
4222 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4226 trailing_ones= coeff_token&3;
4227 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4228 assert(total_coeff<=16);
4230 i = show_bits(gb, 3);
4231 skip_bits(gb, trailing_ones);
4232 level[0] = 1-((i&4)>>1);
4233 level[1] = 1-((i&2) );
4234 level[2] = 1-((i&1)<<1);
4236 if(trailing_ones<total_coeff) {
4238 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4239 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4240 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4242 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4243 if(level_code >= 100){
4244 prefix= level_code - 100;
4245 if(prefix == LEVEL_TAB_BITS)
4246 prefix += get_level_prefix(gb);
4248 //first coefficient has suffix_length equal to 0 or 1
4249 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4251 level_code= (prefix<<1) + get_bits1(gb); //part
4253 level_code= prefix; //part
4254 }else if(prefix==14){
4256 level_code= (prefix<<1) + get_bits1(gb); //part
4258 level_code= prefix + get_bits(gb, 4); //part
4260 level_code= 30 + get_bits(gb, prefix-3); //part
4262 level_code += (1<<(prefix-3))-4096;
4265 if(trailing_ones < 3) level_code += 2;
4268 mask= -(level_code&1);
4269 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4271 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4274 if(level_code + 3U > 6U)
4276 level[trailing_ones]= level_code;
4279 //remaining coefficients have suffix_length > 0
4280 for(i=trailing_ones+1;i<total_coeff;i++) {
4281 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4282 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4283 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4285 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4286 if(level_code >= 100){
4287 prefix= level_code - 100;
4288 if(prefix == LEVEL_TAB_BITS){
4289 prefix += get_level_prefix(gb);
4292 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4294 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4296 level_code += (1<<(prefix-3))-4096;
4298 mask= -(level_code&1);
4299 level_code= (((2+level_code)>>1) ^ mask) - mask;
4301 level[i]= level_code;
4303 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4308 if(total_coeff == max_coeff)
4311 if(n == CHROMA_DC_BLOCK_INDEX)
4312 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4314 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4317 coeff_num = zeros_left + total_coeff - 1;
4318 j = scantable[coeff_num];
4320 block[j] = level[0];
4321 for(i=1;i<total_coeff;i++) {
4324 else if(zeros_left < 7){
4325 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4327 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4329 zeros_left -= run_before;
4330 coeff_num -= 1 + run_before;
4331 j= scantable[ coeff_num ];
4336 block[j] = (level[0] * qmul[j] + 32)>>6;
4337 for(i=1;i<total_coeff;i++) {
4340 else if(zeros_left < 7){
4341 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4343 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4345 zeros_left -= run_before;
4346 coeff_num -= 1 + run_before;
4347 j= scantable[ coeff_num ];
4349 block[j]= (level[i] * qmul[j] + 32)>>6;
4354 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4361 static void predict_field_decoding_flag(H264Context *h){
4362 MpegEncContext * const s = &h->s;
4363 const int mb_xy= h->mb_xy;
4364 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4365 ? s->current_picture.mb_type[mb_xy-1]
4366 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4367 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4369 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4373 * decodes a P_SKIP or B_SKIP macroblock
4375 static void decode_mb_skip(H264Context *h){
4376 MpegEncContext * const s = &h->s;
4377 const int mb_xy= h->mb_xy;
4380 memset(h->non_zero_count[mb_xy], 0, 16);
4381 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4384 mb_type|= MB_TYPE_INTERLACED;
4386 if( h->slice_type_nos == FF_B_TYPE )
4388 // just for fill_caches. pred_direct_motion will set the real mb_type
4389 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4391 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4392 pred_direct_motion(h, &mb_type);
4393 mb_type|= MB_TYPE_SKIP;
4398 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4400 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4401 pred_pskip_motion(h, &mx, &my);
4402 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4403 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4406 write_back_motion(h, mb_type);
4407 s->current_picture.mb_type[mb_xy]= mb_type;
4408 s->current_picture.qscale_table[mb_xy]= s->qscale;
4409 h->slice_table[ mb_xy ]= h->slice_num;
4410 h->prev_mb_skipped= 1;
4414 * decodes a macroblock
4415 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4417 static int decode_mb_cavlc(H264Context *h){
4418 MpegEncContext * const s = &h->s;
4420 int partition_count;
4421 unsigned int mb_type, cbp;
4422 int dct8x8_allowed= h->pps.transform_8x8_mode;
4424 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4426 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4427 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4429 if(h->slice_type_nos != FF_I_TYPE){
4430 if(s->mb_skip_run==-1)
4431 s->mb_skip_run= get_ue_golomb(&s->gb);
4433 if (s->mb_skip_run--) {
4434 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4435 if(s->mb_skip_run==0)
4436 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4438 predict_field_decoding_flag(h);
4445 if( (s->mb_y&1) == 0 )
4446 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4449 h->prev_mb_skipped= 0;
4451 mb_type= get_ue_golomb(&s->gb);
4452 if(h->slice_type_nos == FF_B_TYPE){
4454 partition_count= b_mb_type_info[mb_type].partition_count;
4455 mb_type= b_mb_type_info[mb_type].type;
4458 goto decode_intra_mb;
4460 }else if(h->slice_type_nos == FF_P_TYPE){
4462 partition_count= p_mb_type_info[mb_type].partition_count;
4463 mb_type= p_mb_type_info[mb_type].type;
4466 goto decode_intra_mb;
4469 assert(h->slice_type_nos == FF_I_TYPE);
4470 if(h->slice_type == FF_SI_TYPE && mb_type)
4474 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4478 cbp= i_mb_type_info[mb_type].cbp;
4479 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4480 mb_type= i_mb_type_info[mb_type].type;
4484 mb_type |= MB_TYPE_INTERLACED;
4486 h->slice_table[ mb_xy ]= h->slice_num;
4488 if(IS_INTRA_PCM(mb_type)){
4491 // We assume these blocks are very rare so we do not optimize it.
4492 align_get_bits(&s->gb);
4494 // The pixels are stored in the same order as levels in h->mb array.
4495 for(x=0; x < (CHROMA ? 384 : 256); x++){
4496 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4499 // In deblocking, the quantizer is 0
4500 s->current_picture.qscale_table[mb_xy]= 0;
4501 // All coeffs are present
4502 memset(h->non_zero_count[mb_xy], 16, 16);
4504 s->current_picture.mb_type[mb_xy]= mb_type;
4509 h->ref_count[0] <<= 1;
4510 h->ref_count[1] <<= 1;
4513 fill_caches(h, mb_type, 0);
4516 if(IS_INTRA(mb_type)){
4518 // init_top_left_availability(h);
4519 if(IS_INTRA4x4(mb_type)){
4522 if(dct8x8_allowed && get_bits1(&s->gb)){
4523 mb_type |= MB_TYPE_8x8DCT;
4527 // fill_intra4x4_pred_table(h);
4528 for(i=0; i<16; i+=di){
4529 int mode= pred_intra_mode(h, i);
4531 if(!get_bits1(&s->gb)){
4532 const int rem_mode= get_bits(&s->gb, 3);
4533 mode = rem_mode + (rem_mode >= mode);
4537 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4539 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4541 write_back_intra_pred_mode(h);
4542 if( check_intra4x4_pred_mode(h) < 0)
4545 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4546 if(h->intra16x16_pred_mode < 0)
4550 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4553 h->chroma_pred_mode= pred_mode;
4555 }else if(partition_count==4){
4556 int i, j, sub_partition_count[4], list, ref[2][4];
4558 if(h->slice_type_nos == FF_B_TYPE){
4560 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4561 if(h->sub_mb_type[i] >=13){
4562 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4565 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4566 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4568 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4569 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4570 pred_direct_motion(h, &mb_type);
4571 h->ref_cache[0][scan8[4]] =
4572 h->ref_cache[1][scan8[4]] =
4573 h->ref_cache[0][scan8[12]] =
4574 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4577 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4579 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4580 if(h->sub_mb_type[i] >=4){
4581 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4584 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4585 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4589 for(list=0; list<h->list_count; list++){
4590 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4592 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4593 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4597 }else if(ref_count == 2){
4598 tmp= get_bits1(&s->gb)^1;
4600 tmp= get_ue_golomb_31(&s->gb);
4602 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4615 dct8x8_allowed = get_dct8x8_allowed(h);
4617 for(list=0; list<h->list_count; list++){
4619 if(IS_DIRECT(h->sub_mb_type[i])) {
4620 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4623 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4624 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4626 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4627 const int sub_mb_type= h->sub_mb_type[i];
4628 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4629 for(j=0; j<sub_partition_count[i]; j++){
4631 const int index= 4*i + block_width*j;
4632 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4633 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4634 mx += get_se_golomb(&s->gb);
4635 my += get_se_golomb(&s->gb);
4636 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4638 if(IS_SUB_8X8(sub_mb_type)){
4640 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4642 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4643 }else if(IS_SUB_8X4(sub_mb_type)){
4644 mv_cache[ 1 ][0]= mx;
4645 mv_cache[ 1 ][1]= my;
4646 }else if(IS_SUB_4X8(sub_mb_type)){
4647 mv_cache[ 8 ][0]= mx;
4648 mv_cache[ 8 ][1]= my;
4650 mv_cache[ 0 ][0]= mx;
4651 mv_cache[ 0 ][1]= my;
4654 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4660 }else if(IS_DIRECT(mb_type)){
4661 pred_direct_motion(h, &mb_type);
4662 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4664 int list, mx, my, i;
4665 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4666 if(IS_16X16(mb_type)){
4667 for(list=0; list<h->list_count; list++){
4669 if(IS_DIR(mb_type, 0, list)){
4670 if(h->ref_count[list]==1){
4672 }else if(h->ref_count[list]==2){
4673 val= get_bits1(&s->gb)^1;
4675 val= get_ue_golomb_31(&s->gb);
4676 if(val >= h->ref_count[list]){
4677 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4682 val= LIST_NOT_USED&0xFF;
4683 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4685 for(list=0; list<h->list_count; list++){
4687 if(IS_DIR(mb_type, 0, list)){
4688 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4689 mx += get_se_golomb(&s->gb);
4690 my += get_se_golomb(&s->gb);
4691 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4693 val= pack16to32(mx,my);
4696 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4699 else if(IS_16X8(mb_type)){
4700 for(list=0; list<h->list_count; list++){
4703 if(IS_DIR(mb_type, i, list)){
4704 if(h->ref_count[list] == 1){
4706 }else if(h->ref_count[list] == 2){
4707 val= get_bits1(&s->gb)^1;
4709 val= get_ue_golomb_31(&s->gb);
4710 if(val >= h->ref_count[list]){
4711 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4716 val= LIST_NOT_USED&0xFF;
4717 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4720 for(list=0; list<h->list_count; list++){
4723 if(IS_DIR(mb_type, i, list)){
4724 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4725 mx += get_se_golomb(&s->gb);
4726 my += get_se_golomb(&s->gb);
4727 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4729 val= pack16to32(mx,my);
4732 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4736 assert(IS_8X16(mb_type));
4737 for(list=0; list<h->list_count; list++){
4740 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4741 if(h->ref_count[list]==1){
4743 }else if(h->ref_count[list]==2){
4744 val= get_bits1(&s->gb)^1;
4746 val= get_ue_golomb_31(&s->gb);
4747 if(val >= h->ref_count[list]){
4748 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4753 val= LIST_NOT_USED&0xFF;
4754 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4757 for(list=0; list<h->list_count; list++){
4760 if(IS_DIR(mb_type, i, list)){
4761 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4762 mx += get_se_golomb(&s->gb);
4763 my += get_se_golomb(&s->gb);
4764 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4766 val= pack16to32(mx,my);
4769 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4775 if(IS_INTER(mb_type))
4776 write_back_motion(h, mb_type);
4778 if(!IS_INTRA16x16(mb_type)){
4779 cbp= get_ue_golomb(&s->gb);
4781 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4786 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4787 else cbp= golomb_to_inter_cbp [cbp];
4789 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4790 else cbp= golomb_to_inter_cbp_gray[cbp];
4795 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4796 if(get_bits1(&s->gb)){
4797 mb_type |= MB_TYPE_8x8DCT;
4798 h->cbp_table[mb_xy]= cbp;
4801 s->current_picture.mb_type[mb_xy]= mb_type;
4803 if(cbp || IS_INTRA16x16(mb_type)){
4804 int i8x8, i4x4, chroma_idx;
4806 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4807 const uint8_t *scan, *scan8x8, *dc_scan;
4809 // fill_non_zero_count_cache(h);
4811 if(IS_INTERLACED(mb_type)){
4812 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4813 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4814 dc_scan= luma_dc_field_scan;
4816 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4817 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4818 dc_scan= luma_dc_zigzag_scan;
4821 dquant= get_se_golomb(&s->gb);
4823 if( dquant > 25 || dquant < -26 ){
4824 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4828 s->qscale += dquant;
4829 if(((unsigned)s->qscale) > 51){
4830 if(s->qscale<0) s->qscale+= 52;
4831 else s->qscale-= 52;
4834 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4835 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4836 if(IS_INTRA16x16(mb_type)){
4837 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4838 return -1; //FIXME continue if partitioned and other return -1 too
4841 assert((cbp&15) == 0 || (cbp&15) == 15);
4844 for(i8x8=0; i8x8<4; i8x8++){
4845 for(i4x4=0; i4x4<4; i4x4++){
4846 const int index= i4x4 + 4*i8x8;
4847 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4853 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4856 for(i8x8=0; i8x8<4; i8x8++){
4857 if(cbp & (1<<i8x8)){
4858 if(IS_8x8DCT(mb_type)){
4859 DCTELEM *buf = &h->mb[64*i8x8];
4861 for(i4x4=0; i4x4<4; i4x4++){
4862 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4863 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4866 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4867 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4869 for(i4x4=0; i4x4<4; i4x4++){
4870 const int index= i4x4 + 4*i8x8;
4872 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4878 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4879 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4885 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4886 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4892 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4893 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4894 for(i4x4=0; i4x4<4; i4x4++){
4895 const int index= 16 + 4*chroma_idx + i4x4;
4896 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4902 uint8_t * const nnz= &h->non_zero_count_cache[0];
4903 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4904 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4907 uint8_t * const nnz= &h->non_zero_count_cache[0];
4908 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4909 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4910 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4912 s->current_picture.qscale_table[mb_xy]= s->qscale;
4913 write_back_non_zero_count(h);
4916 h->ref_count[0] >>= 1;
4917 h->ref_count[1] >>= 1;
4923 static int decode_cabac_field_decoding_flag(H264Context *h) {
4924 MpegEncContext * const s = &h->s;
4925 const int mb_x = s->mb_x;
4926 const int mb_y = s->mb_y & ~1;
4927 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4928 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4930 unsigned int ctx = 0;
4932 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4935 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4939 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4942 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4943 uint8_t *state= &h->cabac_state[ctx_base];
4947 MpegEncContext * const s = &h->s;
4948 const int mba_xy = h->left_mb_xy[0];
4949 const int mbb_xy = h->top_mb_xy;
4951 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4953 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4955 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4956 return 0; /* I4x4 */
4959 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4960 return 0; /* I4x4 */
4963 if( get_cabac_terminate( &h->cabac ) )
4964 return 25; /* PCM */
4966 mb_type = 1; /* I16x16 */
4967 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4968 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4969 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4970 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4971 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4975 static int decode_cabac_mb_type_b( H264Context *h ) {
4976 MpegEncContext * const s = &h->s;
4978 const int mba_xy = h->left_mb_xy[0];
4979 const int mbb_xy = h->top_mb_xy;
4982 assert(h->slice_type_nos == FF_B_TYPE);
4984 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4986 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4989 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4990 return 0; /* B_Direct_16x16 */
4992 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4993 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4996 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4997 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4998 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4999 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5001 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5002 else if( bits == 13 ) {
5003 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5004 } else if( bits == 14 )
5005 return 11; /* B_L1_L0_8x16 */
5006 else if( bits == 15 )
5007 return 22; /* B_8x8 */
5009 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5010 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5013 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5014 MpegEncContext * const s = &h->s;
5018 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5019 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5022 && h->slice_table[mba_xy] == h->slice_num
5023 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5024 mba_xy += s->mb_stride;
5026 mbb_xy = mb_xy - s->mb_stride;
5028 && h->slice_table[mbb_xy] == h->slice_num
5029 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5030 mbb_xy -= s->mb_stride;
5032 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5034 int mb_xy = h->mb_xy;
5036 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5039 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5041 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5044 if( h->slice_type_nos == FF_B_TYPE )
5046 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5049 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5052 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5055 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5056 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5057 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5059 if( mode >= pred_mode )
5065 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5066 const int mba_xy = h->left_mb_xy[0];
5067 const int mbb_xy = h->top_mb_xy;
5071 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5072 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5075 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5078 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5081 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5083 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5089 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5090 int cbp_b, cbp_a, ctx, cbp = 0;
5092 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5093 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5095 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5096 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5097 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5098 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5099 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5100 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5101 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5102 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5105 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5109 cbp_a = (h->left_cbp>>4)&0x03;
5110 cbp_b = (h-> top_cbp>>4)&0x03;
5113 if( cbp_a > 0 ) ctx++;
5114 if( cbp_b > 0 ) ctx += 2;
5115 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5119 if( cbp_a == 2 ) ctx++;
5120 if( cbp_b == 2 ) ctx += 2;
5121 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5123 static int decode_cabac_mb_dqp( H264Context *h) {
5124 int ctx= h->last_qscale_diff != 0;
5127 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5130 if(val > 102) //prevent infinite loop
5135 return (val + 1)>>1 ;
5137 return -((val + 1)>>1);
5139 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5140 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5142 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5144 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5148 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5150 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5151 return 0; /* B_Direct_8x8 */
5152 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5153 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5155 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5156 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5157 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5160 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5161 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5165 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5166 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5169 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5170 int refa = h->ref_cache[list][scan8[n] - 1];
5171 int refb = h->ref_cache[list][scan8[n] - 8];
5175 if( h->slice_type_nos == FF_B_TYPE) {
5176 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5178 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5187 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5190 if(ref >= 32 /*h->ref_list[list]*/){
5197 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5198 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5199 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5200 int ctxbase = (l == 0) ? 40 : 47;
5202 int ctx = (amvd>2) + (amvd>32);
5204 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5209 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5217 while( get_cabac_bypass( &h->cabac ) ) {
5221 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5226 if( get_cabac_bypass( &h->cabac ) )
5230 return get_cabac_bypass_sign( &h->cabac, -mvd );
5233 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5239 nza = h->left_cbp&0x100;
5240 nzb = h-> top_cbp&0x100;
5242 nza = (h->left_cbp>>(6+idx))&0x01;
5243 nzb = (h-> top_cbp>>(6+idx))&0x01;
5246 assert(cat == 1 || cat == 2 || cat == 4);
5247 nza = h->non_zero_count_cache[scan8[idx] - 1];
5248 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5257 return ctx + 4 * cat;
5260 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5261 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5262 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5263 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5264 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5267 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5268 static const int significant_coeff_flag_offset[2][6] = {
5269 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5270 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5272 static const int last_coeff_flag_offset[2][6] = {
5273 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5274 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5276 static const int coeff_abs_level_m1_offset[6] = {
5277 227+0, 227+10, 227+20, 227+30, 227+39, 426
5279 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5280 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5281 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5282 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5283 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5284 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5285 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5286 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5287 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5289 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5290 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5291 * map node ctx => cabac ctx for level=1 */
5292 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5293 /* map node ctx => cabac ctx for level>1 */
5294 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5295 static const uint8_t coeff_abs_level_transition[2][8] = {
5296 /* update node ctx after decoding a level=1 */
5297 { 1, 2, 3, 3, 4, 5, 6, 7 },
5298 /* update node ctx after decoding a level>1 */
5299 { 4, 4, 4, 4, 5, 6, 7, 7 }
5305 int coeff_count = 0;
5308 uint8_t *significant_coeff_ctx_base;
5309 uint8_t *last_coeff_ctx_base;
5310 uint8_t *abs_level_m1_ctx_base;
5313 #define CABAC_ON_STACK
5315 #ifdef CABAC_ON_STACK
5318 cc.range = h->cabac.range;
5319 cc.low = h->cabac.low;
5320 cc.bytestream= h->cabac.bytestream;
5322 #define CC &h->cabac
5326 /* cat: 0-> DC 16x16 n = 0
5327 * 1-> AC 16x16 n = luma4x4idx
5328 * 2-> Luma4x4 n = luma4x4idx
5329 * 3-> DC Chroma n = iCbCr
5330 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5331 * 5-> Luma8x8 n = 4 * luma8x8idx
5334 /* read coded block flag */
5335 if( is_dc || cat != 5 ) {
5336 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5338 h->non_zero_count_cache[scan8[n]] = 0;
5340 #ifdef CABAC_ON_STACK
5341 h->cabac.range = cc.range ;
5342 h->cabac.low = cc.low ;
5343 h->cabac.bytestream= cc.bytestream;
5349 significant_coeff_ctx_base = h->cabac_state
5350 + significant_coeff_flag_offset[MB_FIELD][cat];
5351 last_coeff_ctx_base = h->cabac_state
5352 + last_coeff_flag_offset[MB_FIELD][cat];
5353 abs_level_m1_ctx_base = h->cabac_state
5354 + coeff_abs_level_m1_offset[cat];
5356 if( !is_dc && cat == 5 ) {
5357 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5358 for(last= 0; last < coefs; last++) { \
5359 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5360 if( get_cabac( CC, sig_ctx )) { \
5361 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5362 index[coeff_count++] = last; \
5363 if( get_cabac( CC, last_ctx ) ) { \
5369 if( last == max_coeff -1 ) {\
5370 index[coeff_count++] = last;\
5372 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5373 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5374 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5376 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5378 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5380 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5383 assert(coeff_count > 0);
5387 h->cbp_table[h->mb_xy] |= 0x100;
5389 h->cbp_table[h->mb_xy] |= 0x40 << n;
5392 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5394 assert( cat == 1 || cat == 2 || cat == 4 );
5395 h->non_zero_count_cache[scan8[n]] = coeff_count;
5400 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5402 int j= scantable[index[--coeff_count]];
5404 if( get_cabac( CC, ctx ) == 0 ) {
5405 node_ctx = coeff_abs_level_transition[0][node_ctx];
5407 block[j] = get_cabac_bypass_sign( CC, -1);
5409 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5413 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5414 node_ctx = coeff_abs_level_transition[1][node_ctx];
5416 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5420 if( coeff_abs >= 15 ) {
5422 while( get_cabac_bypass( CC ) ) {
5428 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5434 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5436 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5439 } while( coeff_count );
5440 #ifdef CABAC_ON_STACK
5441 h->cabac.range = cc.range ;
5442 h->cabac.low = cc.low ;
5443 h->cabac.bytestream= cc.bytestream;
5449 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5450 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5453 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5454 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5458 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5460 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5462 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5463 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5467 static inline void compute_mb_neighbors(H264Context *h)
5469 MpegEncContext * const s = &h->s;
5470 const int mb_xy = h->mb_xy;
5471 h->top_mb_xy = mb_xy - s->mb_stride;
5472 h->left_mb_xy[0] = mb_xy - 1;
5474 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5475 const int top_pair_xy = pair_xy - s->mb_stride;
5476 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5477 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5478 const int curr_mb_field_flag = MB_FIELD;
5479 const int bottom = (s->mb_y & 1);
5481 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5482 h->top_mb_xy -= s->mb_stride;
5484 if (!left_mb_field_flag == curr_mb_field_flag) {
5485 h->left_mb_xy[0] = pair_xy - 1;
5487 } else if (FIELD_PICTURE) {
5488 h->top_mb_xy -= s->mb_stride;
5494 * decodes a macroblock
5495 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5497 static int decode_mb_cabac(H264Context *h) {
5498 MpegEncContext * const s = &h->s;
5500 int mb_type, partition_count, cbp = 0;
5501 int dct8x8_allowed= h->pps.transform_8x8_mode;
5503 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5505 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5506 if( h->slice_type_nos != FF_I_TYPE ) {
5508 /* a skipped mb needs the aff flag from the following mb */
5509 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5510 predict_field_decoding_flag(h);
5511 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5512 skip = h->next_mb_skipped;
5514 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5515 /* read skip flags */
5517 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5518 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5519 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5520 if(!h->next_mb_skipped)
5521 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5526 h->cbp_table[mb_xy] = 0;
5527 h->chroma_pred_mode_table[mb_xy] = 0;
5528 h->last_qscale_diff = 0;
5535 if( (s->mb_y&1) == 0 )
5537 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5540 h->prev_mb_skipped = 0;
5542 compute_mb_neighbors(h);
5544 if( h->slice_type_nos == FF_B_TYPE ) {
5545 mb_type = decode_cabac_mb_type_b( h );
5547 partition_count= b_mb_type_info[mb_type].partition_count;
5548 mb_type= b_mb_type_info[mb_type].type;
5551 goto decode_intra_mb;
5553 } else if( h->slice_type_nos == FF_P_TYPE ) {
5554 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5556 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5557 /* P_L0_D16x16, P_8x8 */
5558 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5560 /* P_L0_D8x16, P_L0_D16x8 */
5561 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5563 partition_count= p_mb_type_info[mb_type].partition_count;
5564 mb_type= p_mb_type_info[mb_type].type;
5566 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5567 goto decode_intra_mb;
5570 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5571 if(h->slice_type == FF_SI_TYPE && mb_type)
5573 assert(h->slice_type_nos == FF_I_TYPE);
5575 partition_count = 0;
5576 cbp= i_mb_type_info[mb_type].cbp;
5577 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5578 mb_type= i_mb_type_info[mb_type].type;
5581 mb_type |= MB_TYPE_INTERLACED;
5583 h->slice_table[ mb_xy ]= h->slice_num;
5585 if(IS_INTRA_PCM(mb_type)) {
5588 // We assume these blocks are very rare so we do not optimize it.
5589 // FIXME The two following lines get the bitstream position in the cabac
5590 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5591 ptr= h->cabac.bytestream;
5592 if(h->cabac.low&0x1) ptr--;
5594 if(h->cabac.low&0x1FF) ptr--;
5597 // The pixels are stored in the same order as levels in h->mb array.
5598 memcpy(h->mb, ptr, 256); ptr+=256;
5600 memcpy(h->mb+128, ptr, 128); ptr+=128;
5603 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5605 // All blocks are present
5606 h->cbp_table[mb_xy] = 0x1ef;
5607 h->chroma_pred_mode_table[mb_xy] = 0;
5608 // In deblocking, the quantizer is 0
5609 s->current_picture.qscale_table[mb_xy]= 0;
5610 // All coeffs are present
5611 memset(h->non_zero_count[mb_xy], 16, 16);
5612 s->current_picture.mb_type[mb_xy]= mb_type;
5613 h->last_qscale_diff = 0;
5618 h->ref_count[0] <<= 1;
5619 h->ref_count[1] <<= 1;
5622 fill_caches(h, mb_type, 0);
5624 if( IS_INTRA( mb_type ) ) {
5626 if( IS_INTRA4x4( mb_type ) ) {
5627 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5628 mb_type |= MB_TYPE_8x8DCT;
5629 for( i = 0; i < 16; i+=4 ) {
5630 int pred = pred_intra_mode( h, i );
5631 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5632 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5635 for( i = 0; i < 16; i++ ) {
5636 int pred = pred_intra_mode( h, i );
5637 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5639 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5642 write_back_intra_pred_mode(h);
5643 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5645 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5646 if( h->intra16x16_pred_mode < 0 ) return -1;
5649 h->chroma_pred_mode_table[mb_xy] =
5650 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5652 pred_mode= check_intra_pred_mode( h, pred_mode );
5653 if( pred_mode < 0 ) return -1;
5654 h->chroma_pred_mode= pred_mode;
5656 } else if( partition_count == 4 ) {
5657 int i, j, sub_partition_count[4], list, ref[2][4];
5659 if( h->slice_type_nos == FF_B_TYPE ) {
5660 for( i = 0; i < 4; i++ ) {
5661 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5662 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5663 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5665 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5666 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5667 pred_direct_motion(h, &mb_type);
5668 h->ref_cache[0][scan8[4]] =
5669 h->ref_cache[1][scan8[4]] =
5670 h->ref_cache[0][scan8[12]] =
5671 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5672 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5673 for( i = 0; i < 4; i++ )
5674 if( IS_DIRECT(h->sub_mb_type[i]) )
5675 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5679 for( i = 0; i < 4; i++ ) {
5680 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5681 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5682 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5686 for( list = 0; list < h->list_count; list++ ) {
5687 for( i = 0; i < 4; i++ ) {
5688 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5689 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5690 if( h->ref_count[list] > 1 ){
5691 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5692 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5693 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5701 h->ref_cache[list][ scan8[4*i]+1 ]=
5702 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5707 dct8x8_allowed = get_dct8x8_allowed(h);
5709 for(list=0; list<h->list_count; list++){
5711 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5712 if(IS_DIRECT(h->sub_mb_type[i])){
5713 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5717 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5718 const int sub_mb_type= h->sub_mb_type[i];
5719 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5720 for(j=0; j<sub_partition_count[i]; j++){
5723 const int index= 4*i + block_width*j;
5724 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5725 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5726 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5728 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5729 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5730 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5732 if(IS_SUB_8X8(sub_mb_type)){
5734 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5736 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5739 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5741 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5742 }else if(IS_SUB_8X4(sub_mb_type)){
5743 mv_cache[ 1 ][0]= mx;
5744 mv_cache[ 1 ][1]= my;
5746 mvd_cache[ 1 ][0]= mx - mpx;
5747 mvd_cache[ 1 ][1]= my - mpy;
5748 }else if(IS_SUB_4X8(sub_mb_type)){
5749 mv_cache[ 8 ][0]= mx;
5750 mv_cache[ 8 ][1]= my;
5752 mvd_cache[ 8 ][0]= mx - mpx;
5753 mvd_cache[ 8 ][1]= my - mpy;
5755 mv_cache[ 0 ][0]= mx;
5756 mv_cache[ 0 ][1]= my;
5758 mvd_cache[ 0 ][0]= mx - mpx;
5759 mvd_cache[ 0 ][1]= my - mpy;
5762 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5763 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5764 p[0] = p[1] = p[8] = p[9] = 0;
5765 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5769 } else if( IS_DIRECT(mb_type) ) {
5770 pred_direct_motion(h, &mb_type);
5771 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5772 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5773 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5775 int list, mx, my, i, mpx, mpy;
5776 if(IS_16X16(mb_type)){
5777 for(list=0; list<h->list_count; list++){
5778 if(IS_DIR(mb_type, 0, list)){
5780 if(h->ref_count[list] > 1){
5781 ref= decode_cabac_mb_ref(h, list, 0);
5782 if(ref >= (unsigned)h->ref_count[list]){
5783 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5788 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5790 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5792 for(list=0; list<h->list_count; list++){
5793 if(IS_DIR(mb_type, 0, list)){
5794 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5796 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5797 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5798 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5800 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5801 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5803 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5806 else if(IS_16X8(mb_type)){
5807 for(list=0; list<h->list_count; list++){
5809 if(IS_DIR(mb_type, i, list)){
5811 if(h->ref_count[list] > 1){
5812 ref= decode_cabac_mb_ref( h, list, 8*i );
5813 if(ref >= (unsigned)h->ref_count[list]){
5814 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5819 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5821 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5824 for(list=0; list<h->list_count; list++){
5826 if(IS_DIR(mb_type, i, list)){
5827 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5828 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5829 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5830 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5832 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5833 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5835 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5836 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5841 assert(IS_8X16(mb_type));
5842 for(list=0; list<h->list_count; list++){
5844 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5846 if(h->ref_count[list] > 1){
5847 ref= decode_cabac_mb_ref( h, list, 4*i );
5848 if(ref >= (unsigned)h->ref_count[list]){
5849 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5854 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5856 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5859 for(list=0; list<h->list_count; list++){
5861 if(IS_DIR(mb_type, i, list)){
5862 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5863 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5864 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5866 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5867 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5868 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5870 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5871 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5878 if( IS_INTER( mb_type ) ) {
5879 h->chroma_pred_mode_table[mb_xy] = 0;
5880 write_back_motion( h, mb_type );
5883 if( !IS_INTRA16x16( mb_type ) ) {
5884 cbp = decode_cabac_mb_cbp_luma( h );
5886 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5889 h->cbp_table[mb_xy] = h->cbp = cbp;
5891 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5892 if( decode_cabac_mb_transform_size( h ) )
5893 mb_type |= MB_TYPE_8x8DCT;
5895 s->current_picture.mb_type[mb_xy]= mb_type;
5897 if( cbp || IS_INTRA16x16( mb_type ) ) {
5898 const uint8_t *scan, *scan8x8, *dc_scan;
5899 const uint32_t *qmul;
5902 if(IS_INTERLACED(mb_type)){
5903 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5904 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5905 dc_scan= luma_dc_field_scan;
5907 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5908 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5909 dc_scan= luma_dc_zigzag_scan;
5912 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5913 if( dqp == INT_MIN ){
5914 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5918 if(((unsigned)s->qscale) > 51){
5919 if(s->qscale<0) s->qscale+= 52;
5920 else s->qscale-= 52;
5922 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5923 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5925 if( IS_INTRA16x16( mb_type ) ) {
5927 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5928 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5931 qmul = h->dequant4_coeff[0][s->qscale];
5932 for( i = 0; i < 16; i++ ) {
5933 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5934 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5937 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5941 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5942 if( cbp & (1<<i8x8) ) {
5943 if( IS_8x8DCT(mb_type) ) {
5944 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5945 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5947 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5948 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5949 const int index = 4*i8x8 + i4x4;
5950 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5952 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5953 //STOP_TIMER("decode_residual")
5957 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5958 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5965 for( c = 0; c < 2; c++ ) {
5966 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5967 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5973 for( c = 0; c < 2; c++ ) {
5974 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5975 for( i = 0; i < 4; i++ ) {
5976 const int index = 16 + 4 * c + i;
5977 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5978 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5982 uint8_t * const nnz= &h->non_zero_count_cache[0];
5983 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5984 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5987 uint8_t * const nnz= &h->non_zero_count_cache[0];
5988 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5989 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5990 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5991 h->last_qscale_diff = 0;
5994 s->current_picture.qscale_table[mb_xy]= s->qscale;
5995 write_back_non_zero_count(h);
5998 h->ref_count[0] >>= 1;
5999 h->ref_count[1] >>= 1;
6006 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6007 const int index_a = qp + h->slice_alpha_c0_offset;
6008 const int alpha = (alpha_table+52)[index_a];
6009 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6010 if (alpha ==0 || beta == 0) return;
6014 tc[0] = (tc0_table+52)[index_a][bS[0]];
6015 tc[1] = (tc0_table+52)[index_a][bS[1]];
6016 tc[2] = (tc0_table+52)[index_a][bS[2]];
6017 tc[3] = (tc0_table+52)[index_a][bS[3]];
6018 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6020 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
6023 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6024 const int index_a = qp + h->slice_alpha_c0_offset;
6025 const int alpha = (alpha_table+52)[index_a];
6026 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6027 if (alpha ==0 || beta == 0) return;
6031 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6032 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6033 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6034 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6035 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6037 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6041 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6043 for( i = 0; i < 16; i++, pix += stride) {
6049 int bS_index = (i >> 1);
6052 bS_index |= (i & 1);
6055 if( bS[bS_index] == 0 ) {
6059 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6060 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6061 alpha = (alpha_table+52)[index_a];
6062 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6064 if( bS[bS_index] < 4 ) {
6065 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6066 const int p0 = pix[-1];
6067 const int p1 = pix[-2];
6068 const int p2 = pix[-3];
6069 const int q0 = pix[0];
6070 const int q1 = pix[1];
6071 const int q2 = pix[2];
6073 if( FFABS( p0 - q0 ) < alpha &&
6074 FFABS( p1 - p0 ) < beta &&
6075 FFABS( q1 - q0 ) < beta ) {
6079 if( FFABS( p2 - p0 ) < beta ) {
6080 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6083 if( FFABS( q2 - q0 ) < beta ) {
6084 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6088 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6089 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6090 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6091 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6094 const int p0 = pix[-1];
6095 const int p1 = pix[-2];
6096 const int p2 = pix[-3];
6098 const int q0 = pix[0];
6099 const int q1 = pix[1];
6100 const int q2 = pix[2];
6102 if( FFABS( p0 - q0 ) < alpha &&
6103 FFABS( p1 - p0 ) < beta &&
6104 FFABS( q1 - q0 ) < beta ) {
6106 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6107 if( FFABS( p2 - p0 ) < beta)
6109 const int p3 = pix[-4];
6111 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6112 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6113 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6116 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6118 if( FFABS( q2 - q0 ) < beta)
6120 const int q3 = pix[3];
6122 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6123 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6124 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6127 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6131 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6132 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6134 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6139 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6141 for( i = 0; i < 8; i++, pix += stride) {
6149 if( bS[bS_index] == 0 ) {
6153 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6154 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6155 alpha = (alpha_table+52)[index_a];
6156 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6158 if( bS[bS_index] < 4 ) {
6159 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6160 const int p0 = pix[-1];
6161 const int p1 = pix[-2];
6162 const int q0 = pix[0];
6163 const int q1 = pix[1];
6165 if( FFABS( p0 - q0 ) < alpha &&
6166 FFABS( p1 - p0 ) < beta &&
6167 FFABS( q1 - q0 ) < beta ) {
6168 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6170 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6171 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6172 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6175 const int p0 = pix[-1];
6176 const int p1 = pix[-2];
6177 const int q0 = pix[0];
6178 const int q1 = pix[1];
6180 if( FFABS( p0 - q0 ) < alpha &&
6181 FFABS( p1 - p0 ) < beta &&
6182 FFABS( q1 - q0 ) < beta ) {
6184 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6185 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6186 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6192 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6193 const int index_a = qp + h->slice_alpha_c0_offset;
6194 const int alpha = (alpha_table+52)[index_a];
6195 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6196 if (alpha ==0 || beta == 0) return;
6200 tc[0] = (tc0_table+52)[index_a][bS[0]];
6201 tc[1] = (tc0_table+52)[index_a][bS[1]];
6202 tc[2] = (tc0_table+52)[index_a][bS[2]];
6203 tc[3] = (tc0_table+52)[index_a][bS[3]];
6204 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6206 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6210 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6211 const int index_a = qp + h->slice_alpha_c0_offset;
6212 const int alpha = (alpha_table+52)[index_a];
6213 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6214 if (alpha ==0 || beta == 0) return;
6218 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6219 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6220 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6221 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6222 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6224 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6228 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6229 MpegEncContext * const s = &h->s;
6230 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6232 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6236 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6237 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6238 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6239 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6240 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6243 assert(!FRAME_MBAFF);
6245 mb_type = s->current_picture.mb_type[mb_xy];
6246 qp = s->current_picture.qscale_table[mb_xy];
6247 qp0 = s->current_picture.qscale_table[mb_xy-1];
6248 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6249 qpc = get_chroma_qp( h, 0, qp );
6250 qpc0 = get_chroma_qp( h, 0, qp0 );
6251 qpc1 = get_chroma_qp( h, 0, qp1 );
6252 qp0 = (qp + qp0 + 1) >> 1;
6253 qp1 = (qp + qp1 + 1) >> 1;
6254 qpc0 = (qpc + qpc0 + 1) >> 1;
6255 qpc1 = (qpc + qpc1 + 1) >> 1;
6256 qp_thresh = 15 - h->slice_alpha_c0_offset;
6257 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6258 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6261 if( IS_INTRA(mb_type) ) {
6262 int16_t bS4[4] = {4,4,4,4};
6263 int16_t bS3[4] = {3,3,3,3};
6264 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6265 if( IS_8x8DCT(mb_type) ) {
6266 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6267 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6268 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6269 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6271 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6272 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6273 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6274 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6275 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6276 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6277 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6278 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6280 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6281 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6282 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6283 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6284 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6285 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6286 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6287 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6290 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6291 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6293 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6295 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6297 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6298 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6299 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6300 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6302 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6303 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6304 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6305 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6307 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6308 bSv[0][0] = 0x0004000400040004ULL;
6309 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6310 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6312 #define FILTER(hv,dir,edge)\
6313 if(bSv[dir][edge]) {\
6314 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6316 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6317 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6323 } else if( IS_8x8DCT(mb_type) ) {
6343 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6344 MpegEncContext * const s = &h->s;
6346 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6347 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6348 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6349 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6350 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6352 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6353 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6354 // how often to recheck mv-based bS when iterating between edges
6355 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6356 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6357 // how often to recheck mv-based bS when iterating along each edge
6358 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6360 if (first_vertical_edge_done) {
6364 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6367 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6368 && !IS_INTERLACED(mb_type)
6369 && IS_INTERLACED(mbm_type)
6371 // This is a special case in the norm where the filtering must
6372 // be done twice (one each of the field) even if we are in a
6373 // frame macroblock.
6375 static const int nnz_idx[4] = {4,5,6,3};
6376 unsigned int tmp_linesize = 2 * linesize;
6377 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6378 int mbn_xy = mb_xy - 2 * s->mb_stride;
6383 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6384 if( IS_INTRA(mb_type) ||
6385 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6386 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6388 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6389 for( i = 0; i < 4; i++ ) {
6390 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6391 mbn_nnz[nnz_idx[i]] != 0 )
6397 // Do not use s->qscale as luma quantizer because it has not the same
6398 // value in IPCM macroblocks.
6399 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6400 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6401 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6402 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6403 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6404 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6405 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6406 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6413 for( edge = start; edge < edges; edge++ ) {
6414 /* mbn_xy: neighbor macroblock */
6415 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6416 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6417 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6421 if( (edge&1) && IS_8x8DCT(mb_type) )
6424 if( IS_INTRA(mb_type) ||
6425 IS_INTRA(mbn_type) ) {
6428 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6429 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6438 bS[0] = bS[1] = bS[2] = bS[3] = value;
6443 if( edge & mask_edge ) {
6444 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6447 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6448 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6451 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6452 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6453 int bn_idx= b_idx - (dir ? 8:1);
6456 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6457 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6458 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6459 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6462 if(h->slice_type_nos == FF_B_TYPE && v){
6464 for( l = 0; !v && l < 2; l++ ) {
6466 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6467 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6468 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6472 bS[0] = bS[1] = bS[2] = bS[3] = v;
6478 for( i = 0; i < 4; i++ ) {
6479 int x = dir == 0 ? edge : i;
6480 int y = dir == 0 ? i : edge;
6481 int b_idx= 8 + 4 + x + 8*y;
6482 int bn_idx= b_idx - (dir ? 8:1);
6484 if( h->non_zero_count_cache[b_idx] |
6485 h->non_zero_count_cache[bn_idx] ) {
6491 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6492 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6493 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6494 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6500 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6502 for( l = 0; l < 2; l++ ) {
6504 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6505 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6506 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6515 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6520 // Do not use s->qscale as luma quantizer because it has not the same
6521 // value in IPCM macroblocks.
6522 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6523 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6524 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6525 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6527 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6528 if( (edge&1) == 0 ) {
6529 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6530 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6531 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6532 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6535 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6536 if( (edge&1) == 0 ) {
6537 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6538 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6539 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6540 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6546 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6547 MpegEncContext * const s = &h->s;
6548 const int mb_xy= mb_x + mb_y*s->mb_stride;
6549 const int mb_type = s->current_picture.mb_type[mb_xy];
6550 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6551 int first_vertical_edge_done = 0;
6554 //for sufficiently low qp, filtering wouldn't do anything
6555 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6557 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6558 int qp = s->current_picture.qscale_table[mb_xy];
6560 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6561 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6566 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6567 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6568 int top_type, left_type[2];
6569 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6570 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6571 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6573 if(IS_8x8DCT(top_type)){
6574 h->non_zero_count_cache[4+8*0]=
6575 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6576 h->non_zero_count_cache[6+8*0]=
6577 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6579 if(IS_8x8DCT(left_type[0])){
6580 h->non_zero_count_cache[3+8*1]=
6581 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6583 if(IS_8x8DCT(left_type[1])){
6584 h->non_zero_count_cache[3+8*3]=
6585 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6588 if(IS_8x8DCT(mb_type)){
6589 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6590 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6592 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6593 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6595 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6596 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6598 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6599 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6604 // left mb is in picture
6605 && h->slice_table[mb_xy-1] != 0xFFFF
6606 // and current and left pair do not have the same interlaced type
6607 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6608 // and left mb is in the same slice if deblocking_filter == 2
6609 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6610 /* First vertical edge is different in MBAFF frames
6611 * There are 8 different bS to compute and 2 different Qp
6613 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6614 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6619 int mb_qp, mbn0_qp, mbn1_qp;
6621 first_vertical_edge_done = 1;
6623 if( IS_INTRA(mb_type) )
6624 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6626 for( i = 0; i < 8; i++ ) {
6627 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6629 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6631 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6632 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6633 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6635 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6642 mb_qp = s->current_picture.qscale_table[mb_xy];
6643 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6644 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6645 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6646 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6647 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6648 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6649 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6650 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6651 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6652 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6653 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6654 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6657 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6658 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6659 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6660 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6661 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6665 for( dir = 0; dir < 2; dir++ )
6666 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6668 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6669 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6673 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6674 H264Context *h = *(void**)arg;
6675 MpegEncContext * const s = &h->s;
6676 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6680 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6681 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6683 if( h->pps.cabac ) {
6687 align_get_bits( &s->gb );
6690 ff_init_cabac_states( &h->cabac);
6691 ff_init_cabac_decoder( &h->cabac,
6692 s->gb.buffer + get_bits_count(&s->gb)/8,
6693 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6694 /* calculate pre-state */
6695 for( i= 0; i < 460; i++ ) {
6697 if( h->slice_type_nos == FF_I_TYPE )
6698 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6700 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6703 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6705 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6710 int ret = decode_mb_cabac(h);
6712 //STOP_TIMER("decode_mb_cabac")
6714 if(ret>=0) hl_decode_mb(h);
6716 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6719 ret = decode_mb_cabac(h);
6721 if(ret>=0) hl_decode_mb(h);
6724 eos = get_cabac_terminate( &h->cabac );
6726 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6727 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6728 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6732 if( ++s->mb_x >= s->mb_width ) {
6734 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6736 if(FIELD_OR_MBAFF_PICTURE) {
6741 if( eos || s->mb_y >= s->mb_height ) {
6742 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6743 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6750 int ret = decode_mb_cavlc(h);
6752 if(ret>=0) hl_decode_mb(h);
6754 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6756 ret = decode_mb_cavlc(h);
6758 if(ret>=0) hl_decode_mb(h);
6763 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6764 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6769 if(++s->mb_x >= s->mb_width){
6771 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6773 if(FIELD_OR_MBAFF_PICTURE) {
6776 if(s->mb_y >= s->mb_height){
6777 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6779 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6780 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6784 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6791 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6792 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6793 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6794 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6798 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6807 for(;s->mb_y < s->mb_height; s->mb_y++){
6808 for(;s->mb_x < s->mb_width; s->mb_x++){
6809 int ret= decode_mb(h);
6814 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6815 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6820 if(++s->mb_x >= s->mb_width){
6822 if(++s->mb_y >= s->mb_height){
6823 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6824 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6828 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6835 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6836 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6837 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6841 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6848 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6851 return -1; //not reached
6854 static int decode_picture_timing(H264Context *h){
6855 MpegEncContext * const s = &h->s;
6856 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6857 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6858 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6860 if(h->sps.pic_struct_present_flag){
6861 unsigned int i, num_clock_ts;
6862 h->sei_pic_struct = get_bits(&s->gb, 4);
6865 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6868 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6870 for (i = 0 ; i < num_clock_ts ; i++){
6871 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6872 unsigned int full_timestamp_flag;
6873 h->sei_ct_type |= 1<<get_bits(&s->gb, 2);
6874 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6875 skip_bits(&s->gb, 5); /* counting_type */
6876 full_timestamp_flag = get_bits(&s->gb, 1);
6877 skip_bits(&s->gb, 1); /* discontinuity_flag */
6878 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6879 skip_bits(&s->gb, 8); /* n_frames */
6880 if(full_timestamp_flag){
6881 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6882 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6883 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6885 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6886 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6887 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6888 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6889 if(get_bits(&s->gb, 1)) /* hours_flag */
6890 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6894 if(h->sps.time_offset_length > 0)
6895 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6899 if(s->avctx->debug & FF_DEBUG_PICT_INFO)
6900 av_log(s->avctx, AV_LOG_DEBUG, "ct_type:%X pic_struct:%d\n", h->sei_ct_type, h->sei_pic_struct);
6905 static int decode_unregistered_user_data(H264Context *h, int size){
6906 MpegEncContext * const s = &h->s;
6907 uint8_t user_data[16+256];
6913 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6914 user_data[i]= get_bits(&s->gb, 8);
6918 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6919 if(e==1 && build>=0)
6920 h->x264_build= build;
6922 if(s->avctx->debug & FF_DEBUG_BUGS)
6923 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6926 skip_bits(&s->gb, 8);
6931 static int decode_recovery_point(H264Context *h){
6932 MpegEncContext * const s = &h->s;
6934 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6935 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6940 static int decode_buffering_period(H264Context *h){
6941 MpegEncContext * const s = &h->s;
6942 unsigned int sps_id;
6946 sps_id = get_ue_golomb_31(&s->gb);
6947 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6948 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6951 sps = h->sps_buffers[sps_id];
6953 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6954 if (sps->nal_hrd_parameters_present_flag) {
6955 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6956 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6957 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6960 if (sps->vcl_hrd_parameters_present_flag) {
6961 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6962 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6963 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6967 h->sei_buffering_period_present = 1;
6971 int ff_h264_decode_sei(H264Context *h){
6972 MpegEncContext * const s = &h->s;
6974 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6979 type+= show_bits(&s->gb, 8);
6980 }while(get_bits(&s->gb, 8) == 255);
6984 size+= show_bits(&s->gb, 8);
6985 }while(get_bits(&s->gb, 8) == 255);
6988 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6989 if(decode_picture_timing(h) < 0)
6992 case SEI_TYPE_USER_DATA_UNREGISTERED:
6993 if(decode_unregistered_user_data(h, size) < 0)
6996 case SEI_TYPE_RECOVERY_POINT:
6997 if(decode_recovery_point(h) < 0)
7000 case SEI_BUFFERING_PERIOD:
7001 if(decode_buffering_period(h) < 0)
7005 skip_bits(&s->gb, 8*size);
7008 //FIXME check bits here
7009 align_get_bits(&s->gb);
7015 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
7016 MpegEncContext * const s = &h->s;
7018 cpb_count = get_ue_golomb_31(&s->gb) + 1;
7020 if(cpb_count > 32U){
7021 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
7025 get_bits(&s->gb, 4); /* bit_rate_scale */
7026 get_bits(&s->gb, 4); /* cpb_size_scale */
7027 for(i=0; i<cpb_count; i++){
7028 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7029 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7030 get_bits1(&s->gb); /* cbr_flag */
7032 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7033 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7034 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
7035 sps->time_offset_length = get_bits(&s->gb, 5);
7036 sps->cpb_cnt = cpb_count;
7040 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7041 MpegEncContext * const s = &h->s;
7042 int aspect_ratio_info_present_flag;
7043 unsigned int aspect_ratio_idc;
7045 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7047 if( aspect_ratio_info_present_flag ) {
7048 aspect_ratio_idc= get_bits(&s->gb, 8);
7049 if( aspect_ratio_idc == EXTENDED_SAR ) {
7050 sps->sar.num= get_bits(&s->gb, 16);
7051 sps->sar.den= get_bits(&s->gb, 16);
7052 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
7053 sps->sar= pixel_aspect[aspect_ratio_idc];
7055 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7062 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7064 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7065 get_bits1(&s->gb); /* overscan_appropriate_flag */
7068 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7069 get_bits(&s->gb, 3); /* video_format */
7070 get_bits1(&s->gb); /* video_full_range_flag */
7071 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7072 get_bits(&s->gb, 8); /* colour_primaries */
7073 get_bits(&s->gb, 8); /* transfer_characteristics */
7074 get_bits(&s->gb, 8); /* matrix_coefficients */
7078 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7079 s->avctx->chroma_sample_location = get_ue_golomb(&s->gb)+1; /* chroma_sample_location_type_top_field */
7080 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7083 sps->timing_info_present_flag = get_bits1(&s->gb);
7084 if(sps->timing_info_present_flag){
7085 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7086 sps->time_scale = get_bits_long(&s->gb, 32);
7087 if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){
7088 av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick invalid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick);
7091 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7094 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7095 if(sps->nal_hrd_parameters_present_flag)
7096 if(decode_hrd_parameters(h, sps) < 0)
7098 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7099 if(sps->vcl_hrd_parameters_present_flag)
7100 if(decode_hrd_parameters(h, sps) < 0)
7102 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7103 get_bits1(&s->gb); /* low_delay_hrd_flag */
7104 sps->pic_struct_present_flag = get_bits1(&s->gb);
7106 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7107 if(sps->bitstream_restriction_flag){
7108 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7109 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7110 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7111 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7112 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7113 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7114 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7116 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7117 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7125 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7126 const uint8_t *jvt_list, const uint8_t *fallback_list){
7127 MpegEncContext * const s = &h->s;
7128 int i, last = 8, next = 8;
7129 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7130 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7131 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7133 for(i=0;i<size;i++){
7135 next = (last + get_se_golomb(&s->gb)) & 0xff;
7136 if(!i && !next){ /* matrix not written, we use the preset one */
7137 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7140 last = factors[scan[i]] = next ? next : last;
7144 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7145 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7146 MpegEncContext * const s = &h->s;
7147 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7148 const uint8_t *fallback[4] = {
7149 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7150 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7151 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7152 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7154 if(get_bits1(&s->gb)){
7155 sps->scaling_matrix_present |= is_sps;
7156 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7157 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7158 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7159 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7160 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7161 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7162 if(is_sps || pps->transform_8x8_mode){
7163 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7164 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7169 int ff_h264_decode_seq_parameter_set(H264Context *h){
7170 MpegEncContext * const s = &h->s;
7171 int profile_idc, level_idc;
7172 unsigned int sps_id;
7176 profile_idc= get_bits(&s->gb, 8);
7177 get_bits1(&s->gb); //constraint_set0_flag
7178 get_bits1(&s->gb); //constraint_set1_flag
7179 get_bits1(&s->gb); //constraint_set2_flag
7180 get_bits1(&s->gb); //constraint_set3_flag
7181 get_bits(&s->gb, 4); // reserved
7182 level_idc= get_bits(&s->gb, 8);
7183 sps_id= get_ue_golomb_31(&s->gb);
7185 if(sps_id >= MAX_SPS_COUNT) {
7186 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7189 sps= av_mallocz(sizeof(SPS));
7193 sps->profile_idc= profile_idc;
7194 sps->level_idc= level_idc;
7196 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7197 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7198 sps->scaling_matrix_present = 0;
7200 if(sps->profile_idc >= 100){ //high profile
7201 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7202 if(sps->chroma_format_idc == 3)
7203 sps->residual_color_transform_flag = get_bits1(&s->gb);
7204 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7205 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7206 sps->transform_bypass = get_bits1(&s->gb);
7207 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7209 sps->chroma_format_idc= 1;
7212 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7213 sps->poc_type= get_ue_golomb_31(&s->gb);
7215 if(sps->poc_type == 0){ //FIXME #define
7216 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7217 } else if(sps->poc_type == 1){//FIXME #define
7218 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7219 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7220 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7221 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7223 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7224 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7228 for(i=0; i<sps->poc_cycle_length; i++)
7229 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7230 }else if(sps->poc_type != 2){
7231 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7235 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7236 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7237 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7240 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7241 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7242 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7243 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7244 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7245 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7249 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7250 if(!sps->frame_mbs_only_flag)
7251 sps->mb_aff= get_bits1(&s->gb);
7255 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7257 #ifndef ALLOW_INTERLACE
7259 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7261 sps->crop= get_bits1(&s->gb);
7263 sps->crop_left = get_ue_golomb(&s->gb);
7264 sps->crop_right = get_ue_golomb(&s->gb);
7265 sps->crop_top = get_ue_golomb(&s->gb);
7266 sps->crop_bottom= get_ue_golomb(&s->gb);
7267 if(sps->crop_left || sps->crop_top){
7268 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7270 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7271 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7277 sps->crop_bottom= 0;
7280 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7281 if( sps->vui_parameters_present_flag )
7282 if (decode_vui_parameters(h, sps) < 0)
7285 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7286 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7287 sps_id, sps->profile_idc, sps->level_idc,
7289 sps->ref_frame_count,
7290 sps->mb_width, sps->mb_height,
7291 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7292 sps->direct_8x8_inference_flag ? "8B8" : "",
7293 sps->crop_left, sps->crop_right,
7294 sps->crop_top, sps->crop_bottom,
7295 sps->vui_parameters_present_flag ? "VUI" : "",
7296 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc],
7297 sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
7298 sps->timing_info_present_flag ? sps->time_scale : 0
7302 av_free(h->sps_buffers[sps_id]);
7303 h->sps_buffers[sps_id]= sps;
7312 build_qp_table(PPS *pps, int t, int index)
7315 for(i = 0; i < 52; i++)
7316 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7319 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7320 MpegEncContext * const s = &h->s;
7321 unsigned int pps_id= get_ue_golomb(&s->gb);
7324 if(pps_id >= MAX_PPS_COUNT) {
7325 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7329 pps= av_mallocz(sizeof(PPS));
7332 pps->sps_id= get_ue_golomb_31(&s->gb);
7333 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7334 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7338 pps->cabac= get_bits1(&s->gb);
7339 pps->pic_order_present= get_bits1(&s->gb);
7340 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7341 if(pps->slice_group_count > 1 ){
7342 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7343 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7344 switch(pps->mb_slice_group_map_type){
7347 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7348 | run_length[ i ] |1 |ue(v) |
7353 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7355 | top_left_mb[ i ] |1 |ue(v) |
7356 | bottom_right_mb[ i ] |1 |ue(v) |
7364 | slice_group_change_direction_flag |1 |u(1) |
7365 | slice_group_change_rate_minus1 |1 |ue(v) |
7370 | slice_group_id_cnt_minus1 |1 |ue(v) |
7371 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7373 | slice_group_id[ i ] |1 |u(v) |
7378 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7379 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7380 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7381 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7385 pps->weighted_pred= get_bits1(&s->gb);
7386 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7387 pps->init_qp= get_se_golomb(&s->gb) + 26;
7388 pps->init_qs= get_se_golomb(&s->gb) + 26;
7389 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7390 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7391 pps->constrained_intra_pred= get_bits1(&s->gb);
7392 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7394 pps->transform_8x8_mode= 0;
7395 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7396 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7397 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7399 if(get_bits_count(&s->gb) < bit_length){
7400 pps->transform_8x8_mode= get_bits1(&s->gb);
7401 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7402 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7404 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7407 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7408 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7409 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7410 h->pps.chroma_qp_diff= 1;
7412 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7413 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7414 pps_id, pps->sps_id,
7415 pps->cabac ? "CABAC" : "CAVLC",
7416 pps->slice_group_count,
7417 pps->ref_count[0], pps->ref_count[1],
7418 pps->weighted_pred ? "weighted" : "",
7419 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7420 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7421 pps->constrained_intra_pred ? "CONSTR" : "",
7422 pps->redundant_pic_cnt_present ? "REDU" : "",
7423 pps->transform_8x8_mode ? "8x8DCT" : ""
7427 av_free(h->pps_buffers[pps_id]);
7428 h->pps_buffers[pps_id]= pps;
7436 * Call decode_slice() for each context.
7438 * @param h h264 master context
7439 * @param context_count number of contexts to execute
7441 static void execute_decode_slices(H264Context *h, int context_count){
7442 MpegEncContext * const s = &h->s;
7443 AVCodecContext * const avctx= s->avctx;
7447 if (s->avctx->hwaccel)
7449 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7451 if(context_count == 1) {
7452 decode_slice(avctx, &h);
7454 for(i = 1; i < context_count; i++) {
7455 hx = h->thread_context[i];
7456 hx->s.error_recognition = avctx->error_recognition;
7457 hx->s.error_count = 0;
7460 avctx->execute(avctx, (void *)decode_slice,
7461 h->thread_context, NULL, context_count, sizeof(void*));
7463 /* pull back stuff from slices to master context */
7464 hx = h->thread_context[context_count - 1];
7465 s->mb_x = hx->s.mb_x;
7466 s->mb_y = hx->s.mb_y;
7467 s->dropable = hx->s.dropable;
7468 s->picture_structure = hx->s.picture_structure;
7469 for(i = 1; i < context_count; i++)
7470 h->s.error_count += h->thread_context[i]->s.error_count;
7475 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7476 MpegEncContext * const s = &h->s;
7477 AVCodecContext * const avctx= s->avctx;
7479 H264Context *hx; ///< thread context
7480 int context_count = 0;
7481 int next_avc= h->is_avc ? 0 : buf_size;
7483 h->max_contexts = avctx->thread_count;
7486 for(i=0; i<50; i++){
7487 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7490 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7491 h->current_slice = 0;
7492 if (!s->first_field)
7493 s->current_picture_ptr= NULL;
7505 if(buf_index >= next_avc) {
7506 if(buf_index >= buf_size) break;
7508 for(i = 0; i < h->nal_length_size; i++)
7509 nalsize = (nalsize << 8) | buf[buf_index++];
7510 if(nalsize <= 1 || nalsize > buf_size - buf_index){
7515 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7519 next_avc= buf_index + nalsize;
7521 // start code prefix search
7522 for(; buf_index + 3 < buf_size; buf_index++){
7523 // This should always succeed in the first iteration.
7524 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7528 if(buf_index+3 >= buf_size) break;
7533 hx = h->thread_context[context_count];
7535 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
7536 if (ptr==NULL || dst_length < 0){
7539 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7541 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7543 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7544 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7547 if (h->is_avc && (nalsize != consumed) && nalsize){
7548 int i, debug_level = AV_LOG_DEBUG;
7549 for (i = consumed; i < nalsize; i++)
7550 if (buf[buf_index+i])
7551 debug_level = AV_LOG_ERROR;
7552 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7555 buf_index += consumed;
7557 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7558 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7563 switch(hx->nal_unit_type){
7565 if (h->nal_unit_type != NAL_IDR_SLICE) {
7566 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7569 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7571 init_get_bits(&hx->s.gb, ptr, bit_length);
7573 hx->inter_gb_ptr= &hx->s.gb;
7574 hx->s.data_partitioning = 0;
7576 if((err = decode_slice_header(hx, h)))
7579 if (s->avctx->hwaccel && h->current_slice == 1) {
7580 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7584 s->current_picture_ptr->key_frame |=
7585 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7586 (h->sei_recovery_frame_cnt >= 0);
7587 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7588 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7589 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7590 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7591 && avctx->skip_frame < AVDISCARD_ALL){
7592 if(avctx->hwaccel) {
7593 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7596 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7597 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7598 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7599 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7605 init_get_bits(&hx->s.gb, ptr, bit_length);
7607 hx->inter_gb_ptr= NULL;
7609 if ((err = decode_slice_header(hx, h)) < 0)
7612 hx->s.data_partitioning = 1;
7616 init_get_bits(&hx->intra_gb, ptr, bit_length);
7617 hx->intra_gb_ptr= &hx->intra_gb;
7620 init_get_bits(&hx->inter_gb, ptr, bit_length);
7621 hx->inter_gb_ptr= &hx->inter_gb;
7623 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7624 && s->context_initialized
7626 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7627 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7628 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7629 && avctx->skip_frame < AVDISCARD_ALL)
7633 init_get_bits(&s->gb, ptr, bit_length);
7634 ff_h264_decode_sei(h);
7637 init_get_bits(&s->gb, ptr, bit_length);
7638 ff_h264_decode_seq_parameter_set(h);
7640 if(s->flags& CODEC_FLAG_LOW_DELAY)
7643 if(avctx->has_b_frames < 2)
7644 avctx->has_b_frames= !s->low_delay;
7647 init_get_bits(&s->gb, ptr, bit_length);
7649 ff_h264_decode_picture_parameter_set(h, bit_length);
7653 case NAL_END_SEQUENCE:
7654 case NAL_END_STREAM:
7655 case NAL_FILLER_DATA:
7657 case NAL_AUXILIARY_SLICE:
7660 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7663 if(context_count == h->max_contexts) {
7664 execute_decode_slices(h, context_count);
7669 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7671 /* Slice could not be decoded in parallel mode, copy down
7672 * NAL unit stuff to context 0 and restart. Note that
7673 * rbsp_buffer is not transferred, but since we no longer
7674 * run in parallel mode this should not be an issue. */
7675 h->nal_unit_type = hx->nal_unit_type;
7676 h->nal_ref_idc = hx->nal_ref_idc;
7682 execute_decode_slices(h, context_count);
7687 * returns the number of bytes consumed for building the current frame
7689 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7690 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7691 if(pos+10>buf_size) pos=buf_size; // oops ;)
7696 static int decode_frame(AVCodecContext *avctx,
7697 void *data, int *data_size,
7700 const uint8_t *buf = avpkt->data;
7701 int buf_size = avpkt->size;
7702 H264Context *h = avctx->priv_data;
7703 MpegEncContext *s = &h->s;
7704 AVFrame *pict = data;
7707 s->flags= avctx->flags;
7708 s->flags2= avctx->flags2;
7710 /* end of stream, output what is still in the buffers */
7711 if (buf_size == 0) {
7715 //FIXME factorize this with the output code below
7716 out = h->delayed_pic[0];
7718 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
7719 if(h->delayed_pic[i]->poc < out->poc){
7720 out = h->delayed_pic[i];
7724 for(i=out_idx; h->delayed_pic[i]; i++)
7725 h->delayed_pic[i] = h->delayed_pic[i+1];
7728 *data_size = sizeof(AVFrame);
7729 *pict= *(AVFrame*)out;
7735 if(h->is_avc && !h->got_avcC) {
7736 int i, cnt, nalsize;
7737 unsigned char *p = avctx->extradata;
7738 if(avctx->extradata_size < 7) {
7739 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7743 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7746 /* sps and pps in the avcC always have length coded with 2 bytes,
7747 so put a fake nal_length_size = 2 while parsing them */
7748 h->nal_length_size = 2;
7749 // Decode sps from avcC
7750 cnt = *(p+5) & 0x1f; // Number of sps
7752 for (i = 0; i < cnt; i++) {
7753 nalsize = AV_RB16(p) + 2;
7754 if(decode_nal_units(h, p, nalsize) < 0) {
7755 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7760 // Decode pps from avcC
7761 cnt = *(p++); // Number of pps
7762 for (i = 0; i < cnt; i++) {
7763 nalsize = AV_RB16(p) + 2;
7764 if(decode_nal_units(h, p, nalsize) != nalsize) {
7765 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7770 // Now store right nal length size, that will be use to parse all other nals
7771 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7772 // Do not reparse avcC
7776 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7777 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7782 buf_index=decode_nal_units(h, buf, buf_size);
7786 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7787 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7788 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7792 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7793 Picture *out = s->current_picture_ptr;
7794 Picture *cur = s->current_picture_ptr;
7795 int i, pics, out_of_order, out_idx;
7799 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7800 /* Wait for second field. */
7804 cur->interlaced_frame = 0;
7805 cur->repeat_pict = 0;
7807 /* Signal interlacing information externally. */
7808 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7810 if(h->sps.pic_struct_present_flag){
7811 switch (h->sei_pic_struct)
7813 case SEI_PIC_STRUCT_FRAME:
7815 case SEI_PIC_STRUCT_TOP_FIELD:
7816 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7817 cur->interlaced_frame = 1;
7819 case SEI_PIC_STRUCT_TOP_BOTTOM:
7820 case SEI_PIC_STRUCT_BOTTOM_TOP:
7821 if (FIELD_OR_MBAFF_PICTURE)
7822 cur->interlaced_frame = 1;
7824 // try to flag soft telecine progressive
7825 cur->interlaced_frame = h->prev_interlaced_frame;
7827 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7828 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7829 // Signal the possibility of telecined film externally (pic_struct 5,6)
7830 // From these hints, let the applications decide if they apply deinterlacing.
7831 cur->repeat_pict = 1;
7833 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7834 // Force progressive here, as doubling interlaced frame is a bad idea.
7835 cur->repeat_pict = 2;
7837 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7838 cur->repeat_pict = 4;
7842 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
7843 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
7845 /* Derive interlacing flag from used decoding process. */
7846 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7848 h->prev_interlaced_frame = cur->interlaced_frame;
7850 if (cur->field_poc[0] != cur->field_poc[1]){
7851 /* Derive top_field_first from field pocs. */
7852 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7854 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7855 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7856 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7857 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7858 cur->top_field_first = 1;
7860 cur->top_field_first = 0;
7862 /* Most likely progressive */
7863 cur->top_field_first = 0;
7867 //FIXME do something with unavailable reference frames
7869 /* Sort B-frames into display order */
7871 if(h->sps.bitstream_restriction_flag
7872 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7873 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7877 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7878 && !h->sps.bitstream_restriction_flag){
7879 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7884 while(h->delayed_pic[pics]) pics++;
7886 assert(pics <= MAX_DELAYED_PIC_COUNT);
7888 h->delayed_pic[pics++] = cur;
7889 if(cur->reference == 0)
7890 cur->reference = DELAYED_PIC_REF;
7892 out = h->delayed_pic[0];
7894 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
7895 if(h->delayed_pic[i]->poc < out->poc){
7896 out = h->delayed_pic[i];
7899 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
7900 h->outputed_poc= INT_MIN;
7901 out_of_order = out->poc < h->outputed_poc;
7903 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7905 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7907 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
7908 || cur->pict_type == FF_B_TYPE)))
7911 s->avctx->has_b_frames++;
7914 if(out_of_order || pics > s->avctx->has_b_frames){
7915 out->reference &= ~DELAYED_PIC_REF;
7916 for(i=out_idx; h->delayed_pic[i]; i++)
7917 h->delayed_pic[i] = h->delayed_pic[i+1];
7919 if(!out_of_order && pics > s->avctx->has_b_frames){
7920 *data_size = sizeof(AVFrame);
7922 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
7923 h->outputed_poc = INT_MIN;
7925 h->outputed_poc = out->poc;
7926 *pict= *(AVFrame*)out;
7928 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7933 assert(pict->data[0] || !*data_size);
7934 ff_print_debug_info(s, pict);
7935 //printf("out %d\n", (int)pict->data[0]);
7937 return get_consumed_bytes(s, buf_index, buf_size);
7940 static inline void fill_mb_avail(H264Context *h){
7941 MpegEncContext * const s = &h->s;
7942 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7945 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7946 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7947 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7953 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7954 h->mb_avail[4]= 1; //FIXME move out
7955 h->mb_avail[5]= 0; //FIXME move out
7963 #define SIZE (COUNT*40)
7969 // int int_temp[10000];
7971 AVCodecContext avctx;
7973 dsputil_init(&dsp, &avctx);
7975 init_put_bits(&pb, temp, SIZE);
7976 printf("testing unsigned exp golomb\n");
7977 for(i=0; i<COUNT; i++){
7979 set_ue_golomb(&pb, i);
7980 STOP_TIMER("set_ue_golomb");
7982 flush_put_bits(&pb);
7984 init_get_bits(&gb, temp, 8*SIZE);
7985 for(i=0; i<COUNT; i++){
7988 s= show_bits(&gb, 24);
7991 j= get_ue_golomb(&gb);
7993 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7996 STOP_TIMER("get_ue_golomb");
8000 init_put_bits(&pb, temp, SIZE);
8001 printf("testing signed exp golomb\n");
8002 for(i=0; i<COUNT; i++){
8004 set_se_golomb(&pb, i - COUNT/2);
8005 STOP_TIMER("set_se_golomb");
8007 flush_put_bits(&pb);
8009 init_get_bits(&gb, temp, 8*SIZE);
8010 for(i=0; i<COUNT; i++){
8013 s= show_bits(&gb, 24);
8016 j= get_se_golomb(&gb);
8017 if(j != i - COUNT/2){
8018 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8021 STOP_TIMER("get_se_golomb");
8025 printf("testing 4x4 (I)DCT\n");
8028 uint8_t src[16], ref[16];
8029 uint64_t error= 0, max_error=0;
8031 for(i=0; i<COUNT; i++){
8033 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8034 for(j=0; j<16; j++){
8035 ref[j]= random()%255;
8036 src[j]= random()%255;
8039 h264_diff_dct_c(block, src, ref, 4);
8042 for(j=0; j<16; j++){
8043 // printf("%d ", block[j]);
8044 block[j]= block[j]*4;
8045 if(j&1) block[j]= (block[j]*4 + 2)/5;
8046 if(j&4) block[j]= (block[j]*4 + 2)/5;
8050 s->dsp.h264_idct_add(ref, block, 4);
8051 /* for(j=0; j<16; j++){
8052 printf("%d ", ref[j]);
8056 for(j=0; j<16; j++){
8057 int diff= FFABS(src[j] - ref[j]);
8060 max_error= FFMAX(max_error, diff);
8063 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8064 printf("testing quantizer\n");
8065 for(qp=0; qp<52; qp++){
8067 src1_block[i]= src2_block[i]= random()%255;
8070 printf("Testing NAL layer\n");
8072 uint8_t bitstream[COUNT];
8073 uint8_t nal[COUNT*2];
8075 memset(&h, 0, sizeof(H264Context));
8077 for(i=0; i<COUNT; i++){
8085 for(j=0; j<COUNT; j++){
8086 bitstream[j]= (random() % 255) + 1;
8089 for(j=0; j<zeros; j++){
8090 int pos= random() % COUNT;
8091 while(bitstream[pos] == 0){
8100 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8102 printf("encoding failed\n");
8106 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8110 if(out_length != COUNT){
8111 printf("incorrect length %d %d\n", out_length, COUNT);
8115 if(consumed != nal_length){
8116 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8120 if(memcmp(bitstream, out, COUNT)){
8121 printf("mismatch\n");
8127 printf("Testing RBSP\n");
8135 av_cold void ff_h264_free_context(H264Context *h)
8139 free_tables(h); //FIXME cleanup init stuff perhaps
8141 for(i = 0; i < MAX_SPS_COUNT; i++)
8142 av_freep(h->sps_buffers + i);
8144 for(i = 0; i < MAX_PPS_COUNT; i++)
8145 av_freep(h->pps_buffers + i);
8148 static av_cold int decode_end(AVCodecContext *avctx)
8150 H264Context *h = avctx->priv_data;
8151 MpegEncContext *s = &h->s;
8153 ff_h264_free_context(h);
8157 // memset(h, 0, sizeof(H264Context));
8163 AVCodec h264_decoder = {
8167 sizeof(H264Context),
8172 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8174 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8175 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8178 #if CONFIG_H264_VDPAU_DECODER
8179 AVCodec h264_vdpau_decoder = {
8183 sizeof(H264Context),
8188 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8190 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8191 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
8195 #if CONFIG_SVQ3_DECODER