2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
42 #include "x86/h264_i386.h"
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
88 return (a&0xFFFF) + (b<<16);
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
254 if(!(top_type & type_mask))
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
270 if(!(left_type[i] & type_mask))
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
570 if(!(h->top_samples_available&0x8000)){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if((h->left_samples_available&0x8080) != 0x8080){
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
780 }else if(top_ref==ref){
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
886 pred_motion(h, 0, 4, 0, 0, mx, my);
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
945 map[list][old_ref] = cur_ref;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
996 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
998 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
999 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1000 int cur_poc = s->current_picture_ptr->poc;
1001 int *col_poc = h->ref_list[1]->field_poc;
1002 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1003 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1005 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1006 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1007 mb_xy += s->mb_stride*fieldoff;
1010 }else{ // AFL/AFR/FR/FL -> AFR/FR
1011 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1012 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1013 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1014 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1017 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1018 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1019 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1024 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1025 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1027 }else{ // AFR/FR -> AFR/FR
1030 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1031 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1032 /* FIXME save sub mb types from previous frames (or derive from MVs)
1033 * so we know exactly what block size to use */
1034 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1035 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1036 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1040 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1046 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1047 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1048 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1049 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1052 l1ref0 += h->b8_stride;
1053 l1ref1 += h->b8_stride;
1054 l1mv0 += 2*b4_stride;
1055 l1mv1 += 2*b4_stride;
1059 if(h->direct_spatial_mv_pred){
1064 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1066 /* ref = min(neighbors) */
1067 for(list=0; list<2; list++){
1068 int refa = h->ref_cache[list][scan8[0] - 1];
1069 int refb = h->ref_cache[list][scan8[0] - 8];
1070 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1071 if(refc == PART_NOT_AVAILABLE)
1072 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1073 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1078 if(ref[0] < 0 && ref[1] < 0){
1079 ref[0] = ref[1] = 0;
1080 mv[0][0] = mv[0][1] =
1081 mv[1][0] = mv[1][1] = 0;
1083 for(list=0; list<2; list++){
1085 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1087 mv[list][0] = mv[list][1] = 0;
1093 *mb_type &= ~MB_TYPE_L1;
1094 sub_mb_type &= ~MB_TYPE_L1;
1095 }else if(ref[0] < 0){
1097 *mb_type &= ~MB_TYPE_L0;
1098 sub_mb_type &= ~MB_TYPE_L0;
1101 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1102 for(i8=0; i8<4; i8++){
1105 int xy8 = x8+y8*b8_stride;
1106 int xy4 = 3*x8+y8*b4_stride;
1109 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1111 h->sub_mb_type[i8] = sub_mb_type;
1113 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1114 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1115 if(!IS_INTRA(mb_type_col[y8])
1116 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1117 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1119 a= pack16to32(mv[0][0],mv[0][1]);
1121 b= pack16to32(mv[1][0],mv[1][1]);
1123 a= pack16to32(mv[0][0],mv[0][1]);
1124 b= pack16to32(mv[1][0],mv[1][1]);
1126 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1127 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1129 }else if(IS_16X16(*mb_type)){
1132 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1133 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1134 if(!IS_INTRA(mb_type_col[0])
1135 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1136 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1137 && (h->x264_build>33 || !h->x264_build)))){
1139 a= pack16to32(mv[0][0],mv[0][1]);
1141 b= pack16to32(mv[1][0],mv[1][1]);
1143 a= pack16to32(mv[0][0],mv[0][1]);
1144 b= pack16to32(mv[1][0],mv[1][1]);
1146 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1147 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1149 for(i8=0; i8<4; i8++){
1150 const int x8 = i8&1;
1151 const int y8 = i8>>1;
1153 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1155 h->sub_mb_type[i8] = sub_mb_type;
1157 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1158 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1159 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1160 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1163 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1164 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1165 && (h->x264_build>33 || !h->x264_build)))){
1166 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1167 if(IS_SUB_8X8(sub_mb_type)){
1168 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1169 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1171 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1173 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1176 for(i4=0; i4<4; i4++){
1177 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1178 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1180 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1182 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1188 }else{ /* direct temporal mv pred */
1189 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1190 const int *dist_scale_factor = h->dist_scale_factor;
1193 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1194 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1195 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1196 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1198 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1201 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1202 /* FIXME assumes direct_8x8_inference == 1 */
1203 int y_shift = 2*!IS_INTERLACED(*mb_type);
1205 for(i8=0; i8<4; i8++){
1206 const int x8 = i8&1;
1207 const int y8 = i8>>1;
1209 const int16_t (*l1mv)[2]= l1mv0;
1211 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1213 h->sub_mb_type[i8] = sub_mb_type;
1215 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1216 if(IS_INTRA(mb_type_col[y8])){
1217 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1219 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1223 ref0 = l1ref0[x8 + y8*b8_stride];
1225 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1227 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1230 scale = dist_scale_factor[ref0];
1231 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1234 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1235 int my_col = (mv_col[1]<<y_shift)/2;
1236 int mx = (scale * mv_col[0] + 128) >> 8;
1237 int my = (scale * my_col + 128) >> 8;
1238 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1239 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1245 /* one-to-one mv scaling */
1247 if(IS_16X16(*mb_type)){
1250 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1251 if(IS_INTRA(mb_type_col[0])){
1254 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1255 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1256 const int scale = dist_scale_factor[ref0];
1257 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1259 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1260 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1262 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1263 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1265 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1266 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1267 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1269 for(i8=0; i8<4; i8++){
1270 const int x8 = i8&1;
1271 const int y8 = i8>>1;
1273 const int16_t (*l1mv)[2]= l1mv0;
1275 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1277 h->sub_mb_type[i8] = sub_mb_type;
1278 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1279 if(IS_INTRA(mb_type_col[0])){
1280 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1282 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1286 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1288 ref0 = map_col_to_list0[0][ref0];
1290 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1293 scale = dist_scale_factor[ref0];
1295 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1296 if(IS_SUB_8X8(sub_mb_type)){
1297 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1298 int mx = (scale * mv_col[0] + 128) >> 8;
1299 int my = (scale * mv_col[1] + 128) >> 8;
1300 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1301 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1303 for(i4=0; i4<4; i4++){
1304 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1305 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1306 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1307 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1308 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1309 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1316 static inline void write_back_motion(H264Context *h, int mb_type){
1317 MpegEncContext * const s = &h->s;
1318 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1319 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1322 if(!USES_LIST(mb_type, 0))
1323 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1325 for(list=0; list<h->list_count; list++){
1327 if(!USES_LIST(mb_type, list))
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1332 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1334 if( h->pps.cabac ) {
1335 if(IS_SKIP(mb_type))
1336 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1339 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1340 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1345 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1346 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1347 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1348 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1349 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1353 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1354 if(IS_8X8(mb_type)){
1355 uint8_t *direct_table = &h->direct_table[b8_xy];
1356 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1357 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1358 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1363 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1368 // src[0]&0x80; //forbidden bit
1369 h->nal_ref_idc= src[0]>>5;
1370 h->nal_unit_type= src[0]&0x1F;
1374 for(i=0; i<length; i++)
1375 printf("%2X ", src[i]);
1378 #if HAVE_FAST_UNALIGNED
1379 # if HAVE_FAST_64BIT
1381 for(i=0; i+1<length; i+=9){
1382 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1385 for(i=0; i+1<length; i+=5){
1386 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1389 if(i>0 && !src[i]) i--;
1393 for(i=0; i+1<length; i+=2){
1394 if(src[i]) continue;
1395 if(i>0 && src[i-1]==0) i--;
1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1399 /* startcode, so we must be past the end */
1407 if(i>=length-1){ //no escaped 0
1408 *dst_length= length;
1409 *consumed= length+1; //+1 for the header
1413 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1414 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1415 dst= h->rbsp_buffer[bufidx];
1421 //printf("decoding esc\n");
1422 memcpy(dst, src, i);
1425 //remove escapes (very rare 1:2^22)
1427 dst[di++]= src[si++];
1428 dst[di++]= src[si++];
1429 }else if(src[si]==0 && src[si+1]==0){
1430 if(src[si+2]==3){ //escape
1435 }else //next start code
1439 dst[di++]= src[si++];
1442 dst[di++]= src[si++];
1445 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1448 *consumed= si + 1;//+1 for the header
1449 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1453 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1457 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1467 * IDCT transforms the 16 dc values and dequantizes them.
1468 * @param qp quantization parameter
1470 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1473 int temp[16]; //FIXME check if this is a good idea
1474 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1475 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1477 //memset(block, 64, 2*256);
1480 const int offset= y_offset[i];
1481 const int z0= block[offset+stride*0] + block[offset+stride*4];
1482 const int z1= block[offset+stride*0] - block[offset+stride*4];
1483 const int z2= block[offset+stride*1] - block[offset+stride*5];
1484 const int z3= block[offset+stride*1] + block[offset+stride*5];
1493 const int offset= x_offset[i];
1494 const int z0= temp[4*0+i] + temp[4*2+i];
1495 const int z1= temp[4*0+i] - temp[4*2+i];
1496 const int z2= temp[4*1+i] - temp[4*3+i];
1497 const int z3= temp[4*1+i] + temp[4*3+i];
1499 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1500 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1501 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1502 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1508 * DCT transforms the 16 dc values.
1509 * @param qp quantization parameter ??? FIXME
1511 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1512 // const int qmul= dequant_coeff[qp][0];
1514 int temp[16]; //FIXME check if this is a good idea
1515 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1516 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1519 const int offset= y_offset[i];
1520 const int z0= block[offset+stride*0] + block[offset+stride*4];
1521 const int z1= block[offset+stride*0] - block[offset+stride*4];
1522 const int z2= block[offset+stride*1] - block[offset+stride*5];
1523 const int z3= block[offset+stride*1] + block[offset+stride*5];
1532 const int offset= x_offset[i];
1533 const int z0= temp[4*0+i] + temp[4*2+i];
1534 const int z1= temp[4*0+i] - temp[4*2+i];
1535 const int z2= temp[4*1+i] - temp[4*3+i];
1536 const int z3= temp[4*1+i] + temp[4*3+i];
1538 block[stride*0 +offset]= (z0 + z3)>>1;
1539 block[stride*2 +offset]= (z1 + z2)>>1;
1540 block[stride*8 +offset]= (z1 - z2)>>1;
1541 block[stride*10+offset]= (z0 - z3)>>1;
1549 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1550 const int stride= 16*2;
1551 const int xStride= 16;
1554 a= block[stride*0 + xStride*0];
1555 b= block[stride*0 + xStride*1];
1556 c= block[stride*1 + xStride*0];
1557 d= block[stride*1 + xStride*1];
1564 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1565 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1566 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1567 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1571 static void chroma_dc_dct_c(DCTELEM *block){
1572 const int stride= 16*2;
1573 const int xStride= 16;
1576 a= block[stride*0 + xStride*0];
1577 b= block[stride*0 + xStride*1];
1578 c= block[stride*1 + xStride*0];
1579 d= block[stride*1 + xStride*1];
1586 block[stride*0 + xStride*0]= (a+c);
1587 block[stride*0 + xStride*1]= (e+b);
1588 block[stride*1 + xStride*0]= (a-c);
1589 block[stride*1 + xStride*1]= (e-b);
1594 * gets the chroma qp.
1596 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1597 return h->pps.chroma_qp_table[t][qscale];
1600 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1601 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1602 int src_x_offset, int src_y_offset,
1603 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1604 MpegEncContext * const s = &h->s;
1605 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1606 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1607 const int luma_xy= (mx&3) + ((my&3)<<2);
1608 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1609 uint8_t * src_cb, * src_cr;
1610 int extra_width= h->emu_edge_width;
1611 int extra_height= h->emu_edge_height;
1613 const int full_mx= mx>>2;
1614 const int full_my= my>>2;
1615 const int pic_width = 16*s->mb_width;
1616 const int pic_height = 16*s->mb_height >> MB_FIELD;
1618 if(mx&7) extra_width -= 3;
1619 if(my&7) extra_height -= 3;
1621 if( full_mx < 0-extra_width
1622 || full_my < 0-extra_height
1623 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1624 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1625 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1626 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1630 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1632 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1635 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1638 // chroma offset when predicting from a field of opposite parity
1639 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1640 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1642 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1643 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1646 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1647 src_cb= s->edge_emu_buffer;
1649 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1652 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1653 src_cr= s->edge_emu_buffer;
1655 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1658 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1659 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1660 int x_offset, int y_offset,
1661 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1662 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1663 int list0, int list1){
1664 MpegEncContext * const s = &h->s;
1665 qpel_mc_func *qpix_op= qpix_put;
1666 h264_chroma_mc_func chroma_op= chroma_put;
1668 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1669 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1670 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1671 x_offset += 8*s->mb_x;
1672 y_offset += 8*(s->mb_y >> MB_FIELD);
1675 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1676 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1677 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1678 qpix_op, chroma_op);
1681 chroma_op= chroma_avg;
1685 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1692 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1693 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1694 int x_offset, int y_offset,
1695 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1696 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1697 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1698 int list0, int list1){
1699 MpegEncContext * const s = &h->s;
1701 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1702 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1703 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1704 x_offset += 8*s->mb_x;
1705 y_offset += 8*(s->mb_y >> MB_FIELD);
1708 /* don't optimize for luma-only case, since B-frames usually
1709 * use implicit weights => chroma too. */
1710 uint8_t *tmp_cb = s->obmc_scratchpad;
1711 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1712 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1713 int refn0 = h->ref_cache[0][ scan8[n] ];
1714 int refn1 = h->ref_cache[1][ scan8[n] ];
1716 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1717 dest_y, dest_cb, dest_cr,
1718 x_offset, y_offset, qpix_put, chroma_put);
1719 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1720 tmp_y, tmp_cb, tmp_cr,
1721 x_offset, y_offset, qpix_put, chroma_put);
1723 if(h->use_weight == 2){
1724 int weight0 = h->implicit_weight[refn0][refn1];
1725 int weight1 = 64 - weight0;
1726 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1727 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1728 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1731 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1732 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1733 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1734 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1735 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1736 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1737 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1738 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1741 int list = list1 ? 1 : 0;
1742 int refn = h->ref_cache[list][ scan8[n] ];
1743 Picture *ref= &h->ref_list[list][refn];
1744 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1745 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1746 qpix_put, chroma_put);
1748 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1749 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1750 if(h->use_weight_chroma){
1751 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1752 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1753 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1759 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1760 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1761 int x_offset, int y_offset,
1762 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1763 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1764 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1765 int list0, int list1){
1766 if((h->use_weight==2 && list0 && list1
1767 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1768 || h->use_weight==1)
1769 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1770 x_offset, y_offset, qpix_put, chroma_put,
1771 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1773 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1774 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1777 static inline void prefetch_motion(H264Context *h, int list){
1778 /* fetch pixels for estimated mv 4 macroblocks ahead
1779 * optimized for 64byte cache lines */
1780 MpegEncContext * const s = &h->s;
1781 const int refn = h->ref_cache[list][scan8[0]];
1783 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1784 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1785 uint8_t **src= h->ref_list[list][refn].data;
1786 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1787 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1788 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1789 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1793 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1794 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1795 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1796 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1797 MpegEncContext * const s = &h->s;
1798 const int mb_xy= h->mb_xy;
1799 const int mb_type= s->current_picture.mb_type[mb_xy];
1801 assert(IS_INTER(mb_type));
1803 prefetch_motion(h, 0);
1805 if(IS_16X16(mb_type)){
1806 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1807 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1808 &weight_op[0], &weight_avg[0],
1809 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1810 }else if(IS_16X8(mb_type)){
1811 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1812 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1813 &weight_op[1], &weight_avg[1],
1814 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1815 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1816 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1817 &weight_op[1], &weight_avg[1],
1818 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1819 }else if(IS_8X16(mb_type)){
1820 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1821 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1822 &weight_op[2], &weight_avg[2],
1823 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1824 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1825 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1826 &weight_op[2], &weight_avg[2],
1827 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1831 assert(IS_8X8(mb_type));
1834 const int sub_mb_type= h->sub_mb_type[i];
1836 int x_offset= (i&1)<<2;
1837 int y_offset= (i&2)<<1;
1839 if(IS_SUB_8X8(sub_mb_type)){
1840 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1841 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1842 &weight_op[3], &weight_avg[3],
1843 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1844 }else if(IS_SUB_8X4(sub_mb_type)){
1845 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1846 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1847 &weight_op[4], &weight_avg[4],
1848 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1849 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1850 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1851 &weight_op[4], &weight_avg[4],
1852 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853 }else if(IS_SUB_4X8(sub_mb_type)){
1854 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1855 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1856 &weight_op[5], &weight_avg[5],
1857 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1858 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1859 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1860 &weight_op[5], &weight_avg[5],
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1864 assert(IS_SUB_4X4(sub_mb_type));
1866 int sub_x_offset= x_offset + 2*(j&1);
1867 int sub_y_offset= y_offset + (j&2);
1868 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870 &weight_op[6], &weight_avg[6],
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1877 prefetch_motion(h, 1);
1880 static av_cold void init_cavlc_level_tab(void){
1881 int suffix_length, mask;
1884 for(suffix_length=0; suffix_length<7; suffix_length++){
1885 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1886 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1887 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1889 mask= -(level_code&1);
1890 level_code= (((2+level_code)>>1) ^ mask) - mask;
1891 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1892 cavlc_level_tab[suffix_length][i][0]= level_code;
1893 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1894 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1895 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1896 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1898 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1899 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1905 static av_cold void decode_init_vlc(void){
1906 static int done = 0;
1913 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1914 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1915 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1916 &chroma_dc_coeff_token_len [0], 1, 1,
1917 &chroma_dc_coeff_token_bits[0], 1, 1,
1918 INIT_VLC_USE_NEW_STATIC);
1922 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1923 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1924 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1925 &coeff_token_len [i][0], 1, 1,
1926 &coeff_token_bits[i][0], 1, 1,
1927 INIT_VLC_USE_NEW_STATIC);
1928 offset += coeff_token_vlc_tables_size[i];
1931 * This is a one time safety check to make sure that
1932 * the packed static coeff_token_vlc table sizes
1933 * were initialized correctly.
1935 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1938 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1939 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1940 init_vlc(&chroma_dc_total_zeros_vlc[i],
1941 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1942 &chroma_dc_total_zeros_len [i][0], 1, 1,
1943 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1944 INIT_VLC_USE_NEW_STATIC);
1946 for(i=0; i<15; i++){
1947 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1948 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1949 init_vlc(&total_zeros_vlc[i],
1950 TOTAL_ZEROS_VLC_BITS, 16,
1951 &total_zeros_len [i][0], 1, 1,
1952 &total_zeros_bits[i][0], 1, 1,
1953 INIT_VLC_USE_NEW_STATIC);
1957 run_vlc[i].table = run_vlc_tables[i];
1958 run_vlc[i].table_allocated = run_vlc_tables_size;
1959 init_vlc(&run_vlc[i],
1961 &run_len [i][0], 1, 1,
1962 &run_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
1965 run7_vlc.table = run7_vlc_table,
1966 run7_vlc.table_allocated = run7_vlc_table_size;
1967 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1968 &run_len [6][0], 1, 1,
1969 &run_bits[6][0], 1, 1,
1970 INIT_VLC_USE_NEW_STATIC);
1972 init_cavlc_level_tab();
1976 static void free_tables(H264Context *h){
1979 av_freep(&h->intra4x4_pred_mode);
1980 av_freep(&h->chroma_pred_mode_table);
1981 av_freep(&h->cbp_table);
1982 av_freep(&h->mvd_table[0]);
1983 av_freep(&h->mvd_table[1]);
1984 av_freep(&h->direct_table);
1985 av_freep(&h->non_zero_count);
1986 av_freep(&h->slice_table_base);
1987 h->slice_table= NULL;
1989 av_freep(&h->mb2b_xy);
1990 av_freep(&h->mb2b8_xy);
1992 for(i = 0; i < h->s.avctx->thread_count; i++) {
1993 hx = h->thread_context[i];
1995 av_freep(&hx->top_borders[1]);
1996 av_freep(&hx->top_borders[0]);
1997 av_freep(&hx->s.obmc_scratchpad);
2001 static void init_dequant8_coeff_table(H264Context *h){
2003 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2004 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2005 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2007 for(i=0; i<2; i++ ){
2008 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2009 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2013 for(q=0; q<52; q++){
2014 int shift = div6[q];
2017 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2018 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2019 h->pps.scaling_matrix8[i][x]) << shift;
2024 static void init_dequant4_coeff_table(H264Context *h){
2026 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2027 for(i=0; i<6; i++ ){
2028 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2030 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2031 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2038 for(q=0; q<52; q++){
2039 int shift = div6[q] + 2;
2042 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2043 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2044 h->pps.scaling_matrix4[i][x]) << shift;
2049 static void init_dequant_tables(H264Context *h){
2051 init_dequant4_coeff_table(h);
2052 if(h->pps.transform_8x8_mode)
2053 init_dequant8_coeff_table(h);
2054 if(h->sps.transform_bypass){
2057 h->dequant4_coeff[i][0][x] = 1<<6;
2058 if(h->pps.transform_8x8_mode)
2061 h->dequant8_coeff[i][0][x] = 1<<6;
2068 * needs width/height
2070 static int alloc_tables(H264Context *h){
2071 MpegEncContext * const s = &h->s;
2072 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2075 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2077 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2078 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2079 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2081 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2082 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2083 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2084 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2086 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2087 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2089 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2090 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2091 for(y=0; y<s->mb_height; y++){
2092 for(x=0; x<s->mb_width; x++){
2093 const int mb_xy= x + y*s->mb_stride;
2094 const int b_xy = 4*x + 4*y*h->b_stride;
2095 const int b8_xy= 2*x + 2*y*h->b8_stride;
2097 h->mb2b_xy [mb_xy]= b_xy;
2098 h->mb2b8_xy[mb_xy]= b8_xy;
2102 s->obmc_scratchpad = NULL;
2104 if(!h->dequant4_coeff[0])
2105 init_dequant_tables(h);
2114 * Mimic alloc_tables(), but for every context thread.
2116 static void clone_tables(H264Context *dst, H264Context *src){
2117 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2118 dst->non_zero_count = src->non_zero_count;
2119 dst->slice_table = src->slice_table;
2120 dst->cbp_table = src->cbp_table;
2121 dst->mb2b_xy = src->mb2b_xy;
2122 dst->mb2b8_xy = src->mb2b8_xy;
2123 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2124 dst->mvd_table[0] = src->mvd_table[0];
2125 dst->mvd_table[1] = src->mvd_table[1];
2126 dst->direct_table = src->direct_table;
2128 dst->s.obmc_scratchpad = NULL;
2129 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2134 * Allocate buffers which are not shared amongst multiple threads.
2136 static int context_init(H264Context *h){
2137 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2138 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2142 return -1; // free_tables will clean up for us
2145 static av_cold void common_init(H264Context *h){
2146 MpegEncContext * const s = &h->s;
2148 s->width = s->avctx->width;
2149 s->height = s->avctx->height;
2150 s->codec_id= s->avctx->codec->id;
2152 ff_h264_pred_init(&h->hpc, s->codec_id);
2154 h->dequant_coeff_pps= -1;
2155 s->unrestricted_mv=1;
2156 s->decode=1; //FIXME
2158 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2160 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2161 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2165 * Reset SEI values at the beginning of the frame.
2167 * @param h H.264 context.
2169 static void reset_sei(H264Context *h) {
2170 h->sei_recovery_frame_cnt = -1;
2171 h->sei_dpb_output_delay = 0;
2172 h->sei_cpb_removal_delay = -1;
2173 h->sei_buffering_period_present = 0;
2176 static av_cold int decode_init(AVCodecContext *avctx){
2177 H264Context *h= avctx->priv_data;
2178 MpegEncContext * const s = &h->s;
2180 MPV_decode_defaults(s);
2185 s->out_format = FMT_H264;
2186 s->workaround_bugs= avctx->workaround_bugs;
2189 // s->decode_mb= ff_h263_decode_mb;
2190 s->quarter_sample = 1;
2193 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2194 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2196 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2197 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2201 if(avctx->extradata_size > 0 && avctx->extradata &&
2202 *(char *)avctx->extradata == 1){
2209 h->thread_context[0] = h;
2210 h->outputed_poc = INT_MIN;
2211 h->prev_poc_msb= 1<<16;
2213 if(avctx->codec_id == CODEC_ID_H264){
2214 if(avctx->ticks_per_frame == 1){
2215 s->avctx->time_base.den *=2;
2217 avctx->ticks_per_frame = 2;
2222 static int frame_start(H264Context *h){
2223 MpegEncContext * const s = &h->s;
2226 if(MPV_frame_start(s, s->avctx) < 0)
2228 ff_er_frame_start(s);
2230 * MPV_frame_start uses pict_type to derive key_frame.
2231 * This is incorrect for H.264; IDR markings must be used.
2232 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2233 * See decode_nal_units().
2235 s->current_picture_ptr->key_frame= 0;
2237 assert(s->linesize && s->uvlinesize);
2239 for(i=0; i<16; i++){
2240 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2244 h->block_offset[16+i]=
2245 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2246 h->block_offset[24+16+i]=
2247 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2250 /* can't be in alloc_tables because linesize isn't known there.
2251 * FIXME: redo bipred weight to not require extra buffer? */
2252 for(i = 0; i < s->avctx->thread_count; i++)
2253 if(!h->thread_context[i]->s.obmc_scratchpad)
2254 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2256 /* some macroblocks will be accessed before they're available */
2257 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2258 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2260 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2262 // We mark the current picture as non-reference after allocating it, so
2263 // that if we break out due to an error it can be released automatically
2264 // in the next MPV_frame_start().
2265 // SVQ3 as well as most other codecs have only last/next/current and thus
2266 // get released even with set reference, besides SVQ3 and others do not
2267 // mark frames as reference later "naturally".
2268 if(s->codec_id != CODEC_ID_SVQ3)
2269 s->current_picture_ptr->reference= 0;
2271 s->current_picture_ptr->field_poc[0]=
2272 s->current_picture_ptr->field_poc[1]= INT_MAX;
2273 assert(s->current_picture_ptr->long_ref==0);
2278 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2279 MpegEncContext * const s = &h->s;
2288 src_cb -= uvlinesize;
2289 src_cr -= uvlinesize;
2291 if(!simple && FRAME_MBAFF){
2293 offset = MB_MBAFF ? 1 : 17;
2294 uvoffset= MB_MBAFF ? 1 : 9;
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2297 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2298 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2305 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2306 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2307 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2308 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2314 top_idx = MB_MBAFF ? 0 : 1;
2316 step= MB_MBAFF ? 2 : 1;
2319 // There are two lines saved, the line above the the top macroblock of a pair,
2320 // and the line above the bottom macroblock
2321 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2322 for(i=1; i<17 - skiplast; i++){
2323 h->left_border[offset+i*step]= src_y[15+i* linesize];
2326 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2327 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2329 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2330 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2331 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2332 for(i=1; i<9 - skiplast; i++){
2333 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2334 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2336 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2337 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2341 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2342 MpegEncContext * const s = &h->s;
2353 if(!simple && FRAME_MBAFF){
2355 offset = MB_MBAFF ? 1 : 17;
2356 uvoffset= MB_MBAFF ? 1 : 9;
2360 top_idx = MB_MBAFF ? 0 : 1;
2362 step= MB_MBAFF ? 2 : 1;
2365 if(h->deblocking_filter == 2) {
2367 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2368 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2370 deblock_left = (s->mb_x > 0);
2371 deblock_top = (s->mb_y > !!MB_FIELD);
2374 src_y -= linesize + 1;
2375 src_cb -= uvlinesize + 1;
2376 src_cr -= uvlinesize + 1;
2378 #define XCHG(a,b,t,xchg)\
2385 for(i = !deblock_top; i<16; i++){
2386 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2388 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2392 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2393 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2394 if(s->mb_x+1 < s->mb_width){
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2399 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2401 for(i = !deblock_top; i<8; i++){
2402 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2403 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2405 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2406 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2409 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2410 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2415 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2416 MpegEncContext * const s = &h->s;
2417 const int mb_x= s->mb_x;
2418 const int mb_y= s->mb_y;
2419 const int mb_xy= h->mb_xy;
2420 const int mb_type= s->current_picture.mb_type[mb_xy];
2421 uint8_t *dest_y, *dest_cb, *dest_cr;
2422 int linesize, uvlinesize /*dct_offset*/;
2424 int *block_offset = &h->block_offset[0];
2425 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2426 /* is_h264 should always be true if SVQ3 is disabled. */
2427 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2428 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2429 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2431 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2432 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2433 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2435 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2436 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2438 if (!simple && MB_FIELD) {
2439 linesize = h->mb_linesize = s->linesize * 2;
2440 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2441 block_offset = &h->block_offset[24];
2442 if(mb_y&1){ //FIXME move out of this function?
2443 dest_y -= s->linesize*15;
2444 dest_cb-= s->uvlinesize*7;
2445 dest_cr-= s->uvlinesize*7;
2449 for(list=0; list<h->list_count; list++){
2450 if(!USES_LIST(mb_type, list))
2452 if(IS_16X16(mb_type)){
2453 int8_t *ref = &h->ref_cache[list][scan8[0]];
2454 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2456 for(i=0; i<16; i+=4){
2457 int ref = h->ref_cache[list][scan8[i]];
2459 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2465 linesize = h->mb_linesize = s->linesize;
2466 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2467 // dct_offset = s->linesize * 16;
2470 if (!simple && IS_INTRA_PCM(mb_type)) {
2471 for (i=0; i<16; i++) {
2472 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2474 for (i=0; i<8; i++) {
2475 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2476 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2479 if(IS_INTRA(mb_type)){
2480 if(h->deblocking_filter)
2481 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2483 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2484 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2485 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2488 if(IS_INTRA4x4(mb_type)){
2489 if(simple || !s->encoding){
2490 if(IS_8x8DCT(mb_type)){
2491 if(transform_bypass){
2493 idct_add = s->dsp.add_pixels8;
2495 idct_dc_add = s->dsp.h264_idct8_dc_add;
2496 idct_add = s->dsp.h264_idct8_add;
2498 for(i=0; i<16; i+=4){
2499 uint8_t * const ptr= dest_y + block_offset[i];
2500 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2501 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2502 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2504 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2505 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2506 (h->topright_samples_available<<i)&0x4000, linesize);
2508 if(nnz == 1 && h->mb[i*16])
2509 idct_dc_add(ptr, h->mb + i*16, linesize);
2511 idct_add (ptr, h->mb + i*16, linesize);
2516 if(transform_bypass){
2518 idct_add = s->dsp.add_pixels4;
2520 idct_dc_add = s->dsp.h264_idct_dc_add;
2521 idct_add = s->dsp.h264_idct_add;
2523 for(i=0; i<16; i++){
2524 uint8_t * const ptr= dest_y + block_offset[i];
2525 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2527 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2528 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2532 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2533 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2534 assert(mb_y || linesize <= block_offset[i]);
2535 if(!topright_avail){
2536 tr= ptr[3 - linesize]*0x01010101;
2537 topright= (uint8_t*) &tr;
2539 topright= ptr + 4 - linesize;
2543 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2544 nnz = h->non_zero_count_cache[ scan8[i] ];
2547 if(nnz == 1 && h->mb[i*16])
2548 idct_dc_add(ptr, h->mb + i*16, linesize);
2550 idct_add (ptr, h->mb + i*16, linesize);
2552 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2559 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2561 if(!transform_bypass)
2562 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2564 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2566 if(h->deblocking_filter)
2567 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2569 hl_motion(h, dest_y, dest_cb, dest_cr,
2570 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2571 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2572 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2576 if(!IS_INTRA4x4(mb_type)){
2578 if(IS_INTRA16x16(mb_type)){
2579 if(transform_bypass){
2580 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2581 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2583 for(i=0; i<16; i++){
2584 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2585 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2589 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2591 }else if(h->cbp&15){
2592 if(transform_bypass){
2593 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2594 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2595 for(i=0; i<16; i+=di){
2596 if(h->non_zero_count_cache[ scan8[i] ]){
2597 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2601 if(IS_8x8DCT(mb_type)){
2602 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2604 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2609 for(i=0; i<16; i++){
2610 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2611 uint8_t * const ptr= dest_y + block_offset[i];
2612 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2618 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2619 uint8_t *dest[2] = {dest_cb, dest_cr};
2620 if(transform_bypass){
2621 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2622 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2623 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2625 idct_add = s->dsp.add_pixels4;
2626 for(i=16; i<16+8; i++){
2627 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2628 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2632 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2633 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2635 idct_add = s->dsp.h264_idct_add;
2636 idct_dc_add = s->dsp.h264_idct_dc_add;
2637 for(i=16; i<16+8; i++){
2638 if(h->non_zero_count_cache[ scan8[i] ])
2639 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2640 else if(h->mb[i*16])
2641 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2644 for(i=16; i<16+8; i++){
2645 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2646 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2647 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2654 if(h->cbp || IS_INTRA(mb_type))
2655 s->dsp.clear_blocks(h->mb);
2657 if(h->deblocking_filter) {
2658 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2659 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2660 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2661 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2662 if (!simple && FRAME_MBAFF) {
2663 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2665 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2671 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2673 static void hl_decode_mb_simple(H264Context *h){
2674 hl_decode_mb_internal(h, 1);
2678 * Process a macroblock; this handles edge cases, such as interlacing.
2680 static void av_noinline hl_decode_mb_complex(H264Context *h){
2681 hl_decode_mb_internal(h, 0);
2684 static void hl_decode_mb(H264Context *h){
2685 MpegEncContext * const s = &h->s;
2686 const int mb_xy= h->mb_xy;
2687 const int mb_type= s->current_picture.mb_type[mb_xy];
2688 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2691 hl_decode_mb_complex(h);
2692 else hl_decode_mb_simple(h);
2695 static void pic_as_field(Picture *pic, const int parity){
2697 for (i = 0; i < 4; ++i) {
2698 if (parity == PICT_BOTTOM_FIELD)
2699 pic->data[i] += pic->linesize[i];
2700 pic->reference = parity;
2701 pic->linesize[i] *= 2;
2703 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2706 static int split_field_copy(Picture *dest, Picture *src,
2707 int parity, int id_add){
2708 int match = !!(src->reference & parity);
2712 if(parity != PICT_FRAME){
2713 pic_as_field(dest, parity);
2715 dest->pic_id += id_add;
2722 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2726 while(i[0]<len || i[1]<len){
2727 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2729 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2732 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2733 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2736 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2737 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2744 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2749 best_poc= dir ? INT_MIN : INT_MAX;
2751 for(i=0; i<len; i++){
2752 const int poc= src[i]->poc;
2753 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2755 sorted[out_i]= src[i];
2758 if(best_poc == (dir ? INT_MIN : INT_MAX))
2760 limit= sorted[out_i++]->poc - dir;
2766 * fills the default_ref_list.
2768 static int fill_default_ref_list(H264Context *h){
2769 MpegEncContext * const s = &h->s;
2772 if(h->slice_type_nos==FF_B_TYPE){
2773 Picture *sorted[32];
2778 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2780 cur_poc= s->current_picture_ptr->poc;
2782 for(list= 0; list<2; list++){
2783 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2784 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2786 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2787 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2790 if(len < h->ref_count[list])
2791 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2795 if(lens[0] == lens[1] && lens[1] > 1){
2796 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2798 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2801 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2802 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2804 if(len < h->ref_count[0])
2805 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2808 for (i=0; i<h->ref_count[0]; i++) {
2809 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2811 if(h->slice_type_nos==FF_B_TYPE){
2812 for (i=0; i<h->ref_count[1]; i++) {
2813 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2820 static void print_short_term(H264Context *h);
2821 static void print_long_term(H264Context *h);
2824 * Extract structure information about the picture described by pic_num in
2825 * the current decoding context (frame or field). Note that pic_num is
2826 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2827 * @param pic_num picture number for which to extract structure information
2828 * @param structure one of PICT_XXX describing structure of picture
2830 * @return frame number (short term) or long term index of picture
2831 * described by pic_num
2833 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2834 MpegEncContext * const s = &h->s;
2836 *structure = s->picture_structure;
2839 /* opposite field */
2840 *structure ^= PICT_FRAME;
2847 static int decode_ref_pic_list_reordering(H264Context *h){
2848 MpegEncContext * const s = &h->s;
2849 int list, index, pic_structure;
2851 print_short_term(h);
2854 for(list=0; list<h->list_count; list++){
2855 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2857 if(get_bits1(&s->gb)){
2858 int pred= h->curr_pic_num;
2860 for(index=0; ; index++){
2861 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2862 unsigned int pic_id;
2864 Picture *ref = NULL;
2866 if(reordering_of_pic_nums_idc==3)
2869 if(index >= h->ref_count[list]){
2870 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2874 if(reordering_of_pic_nums_idc<3){
2875 if(reordering_of_pic_nums_idc<2){
2876 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2879 if(abs_diff_pic_num > h->max_pic_num){
2880 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2884 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2885 else pred+= abs_diff_pic_num;
2886 pred &= h->max_pic_num - 1;
2888 frame_num = pic_num_extract(h, pred, &pic_structure);
2890 for(i= h->short_ref_count-1; i>=0; i--){
2891 ref = h->short_ref[i];
2892 assert(ref->reference);
2893 assert(!ref->long_ref);
2895 ref->frame_num == frame_num &&
2896 (ref->reference & pic_structure)
2904 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2906 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2909 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2912 ref = h->long_ref[long_idx];
2913 assert(!(ref && !ref->reference));
2914 if(ref && (ref->reference & pic_structure)){
2915 ref->pic_id= pic_id;
2916 assert(ref->long_ref);
2924 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2925 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2927 for(i=index; i+1<h->ref_count[list]; i++){
2928 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2931 for(; i > index; i--){
2932 h->ref_list[list][i]= h->ref_list[list][i-1];
2934 h->ref_list[list][index]= *ref;
2936 pic_as_field(&h->ref_list[list][index], pic_structure);
2940 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2946 for(list=0; list<h->list_count; list++){
2947 for(index= 0; index < h->ref_count[list]; index++){
2948 if(!h->ref_list[list][index].data[0]){
2949 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2950 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2958 static void fill_mbaff_ref_list(H264Context *h){
2960 for(list=0; list<2; list++){ //FIXME try list_count
2961 for(i=0; i<h->ref_count[list]; i++){
2962 Picture *frame = &h->ref_list[list][i];
2963 Picture *field = &h->ref_list[list][16+2*i];
2966 field[0].linesize[j] <<= 1;
2967 field[0].reference = PICT_TOP_FIELD;
2968 field[0].poc= field[0].field_poc[0];
2969 field[1] = field[0];
2971 field[1].data[j] += frame->linesize[j];
2972 field[1].reference = PICT_BOTTOM_FIELD;
2973 field[1].poc= field[1].field_poc[1];
2975 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2976 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2978 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2979 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2983 for(j=0; j<h->ref_count[1]; j++){
2984 for(i=0; i<h->ref_count[0]; i++)
2985 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2986 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2987 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2991 static int pred_weight_table(H264Context *h){
2992 MpegEncContext * const s = &h->s;
2994 int luma_def, chroma_def;
2997 h->use_weight_chroma= 0;
2998 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2999 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3000 luma_def = 1<<h->luma_log2_weight_denom;
3001 chroma_def = 1<<h->chroma_log2_weight_denom;
3003 for(list=0; list<2; list++){
3004 h->luma_weight_flag[list] = 0;
3005 h->chroma_weight_flag[list] = 0;
3006 for(i=0; i<h->ref_count[list]; i++){
3007 int luma_weight_flag, chroma_weight_flag;
3009 luma_weight_flag= get_bits1(&s->gb);
3010 if(luma_weight_flag){
3011 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3012 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3013 if( h->luma_weight[list][i] != luma_def
3014 || h->luma_offset[list][i] != 0) {
3016 h->luma_weight_flag[list]= 1;
3019 h->luma_weight[list][i]= luma_def;
3020 h->luma_offset[list][i]= 0;
3024 chroma_weight_flag= get_bits1(&s->gb);
3025 if(chroma_weight_flag){
3028 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3029 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3030 if( h->chroma_weight[list][i][j] != chroma_def
3031 || h->chroma_offset[list][i][j] != 0) {
3032 h->use_weight_chroma= 1;
3033 h->chroma_weight_flag[list]= 1;
3039 h->chroma_weight[list][i][j]= chroma_def;
3040 h->chroma_offset[list][i][j]= 0;
3045 if(h->slice_type_nos != FF_B_TYPE) break;
3047 h->use_weight= h->use_weight || h->use_weight_chroma;
3051 static void implicit_weight_table(H264Context *h){
3052 MpegEncContext * const s = &h->s;
3054 int cur_poc = s->current_picture_ptr->poc;
3056 for (i = 0; i < 2; i++) {
3057 h->luma_weight_flag[i] = 0;
3058 h->chroma_weight_flag[i] = 0;
3061 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3062 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3064 h->use_weight_chroma= 0;
3069 h->use_weight_chroma= 2;
3070 h->luma_log2_weight_denom= 5;
3071 h->chroma_log2_weight_denom= 5;
3073 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3074 int poc0 = h->ref_list[0][ref0].poc;
3075 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3076 int poc1 = h->ref_list[1][ref1].poc;
3077 int td = av_clip(poc1 - poc0, -128, 127);
3079 int tb = av_clip(cur_poc - poc0, -128, 127);
3080 int tx = (16384 + (FFABS(td) >> 1)) / td;
3081 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3082 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3083 h->implicit_weight[ref0][ref1] = 32;
3085 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3087 h->implicit_weight[ref0][ref1] = 32;
3093 * Mark a picture as no longer needed for reference. The refmask
3094 * argument allows unreferencing of individual fields or the whole frame.
3095 * If the picture becomes entirely unreferenced, but is being held for
3096 * display purposes, it is marked as such.
3097 * @param refmask mask of fields to unreference; the mask is bitwise
3098 * anded with the reference marking of pic
3099 * @return non-zero if pic becomes entirely unreferenced (except possibly
3100 * for display purposes) zero if one of the fields remains in
3103 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3105 if (pic->reference &= refmask) {
3108 for(i = 0; h->delayed_pic[i]; i++)
3109 if(pic == h->delayed_pic[i]){
3110 pic->reference=DELAYED_PIC_REF;
3118 * instantaneous decoder refresh.
3120 static void idr(H264Context *h){
3123 for(i=0; i<16; i++){
3124 remove_long(h, i, 0);
3126 assert(h->long_ref_count==0);
3128 for(i=0; i<h->short_ref_count; i++){
3129 unreference_pic(h, h->short_ref[i], 0);
3130 h->short_ref[i]= NULL;
3132 h->short_ref_count=0;
3133 h->prev_frame_num= 0;
3134 h->prev_frame_num_offset= 0;
3139 /* forget old pics after a seek */
3140 static void flush_dpb(AVCodecContext *avctx){
3141 H264Context *h= avctx->priv_data;
3143 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3144 if(h->delayed_pic[i])
3145 h->delayed_pic[i]->reference= 0;
3146 h->delayed_pic[i]= NULL;
3148 h->outputed_poc= INT_MIN;
3150 if(h->s.current_picture_ptr)
3151 h->s.current_picture_ptr->reference= 0;
3152 h->s.first_field= 0;
3154 ff_mpeg_flush(avctx);
3158 * Find a Picture in the short term reference list by frame number.
3159 * @param frame_num frame number to search for
3160 * @param idx the index into h->short_ref where returned picture is found
3161 * undefined if no picture found.
3162 * @return pointer to the found picture, or NULL if no pic with the provided
3163 * frame number is found
3165 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3166 MpegEncContext * const s = &h->s;
3169 for(i=0; i<h->short_ref_count; i++){
3170 Picture *pic= h->short_ref[i];
3171 if(s->avctx->debug&FF_DEBUG_MMCO)
3172 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3173 if(pic->frame_num == frame_num) {
3182 * Remove a picture from the short term reference list by its index in
3183 * that list. This does no checking on the provided index; it is assumed
3184 * to be valid. Other list entries are shifted down.
3185 * @param i index into h->short_ref of picture to remove.
3187 static void remove_short_at_index(H264Context *h, int i){
3188 assert(i >= 0 && i < h->short_ref_count);
3189 h->short_ref[i]= NULL;
3190 if (--h->short_ref_count)
3191 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3196 * @return the removed picture or NULL if an error occurs
3198 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3199 MpegEncContext * const s = &h->s;
3203 if(s->avctx->debug&FF_DEBUG_MMCO)
3204 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3206 pic = find_short(h, frame_num, &i);
3208 if(unreference_pic(h, pic, ref_mask))
3209 remove_short_at_index(h, i);
3216 * Remove a picture from the long term reference list by its index in
3218 * @return the removed picture or NULL if an error occurs
3220 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3223 pic= h->long_ref[i];
3225 if(unreference_pic(h, pic, ref_mask)){
3226 assert(h->long_ref[i]->long_ref == 1);
3227 h->long_ref[i]->long_ref= 0;
3228 h->long_ref[i]= NULL;
3229 h->long_ref_count--;
3237 * print short term list
3239 static void print_short_term(H264Context *h) {
3241 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3242 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3243 for(i=0; i<h->short_ref_count; i++){
3244 Picture *pic= h->short_ref[i];
3245 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3251 * print long term list
3253 static void print_long_term(H264Context *h) {
3255 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3256 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3257 for(i = 0; i < 16; i++){
3258 Picture *pic= h->long_ref[i];
3260 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3267 * Executes the reference picture marking (memory management control operations).
3269 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3270 MpegEncContext * const s = &h->s;
3272 int current_ref_assigned=0;
3273 Picture *av_uninit(pic);
3275 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3276 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3278 for(i=0; i<mmco_count; i++){
3279 int structure, av_uninit(frame_num);
3280 if(s->avctx->debug&FF_DEBUG_MMCO)
3281 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3283 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3284 || mmco[i].opcode == MMCO_SHORT2LONG){
3285 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3286 pic = find_short(h, frame_num, &j);
3288 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3289 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3290 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3295 switch(mmco[i].opcode){
3296 case MMCO_SHORT2UNUSED:
3297 if(s->avctx->debug&FF_DEBUG_MMCO)
3298 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3299 remove_short(h, frame_num, structure ^ PICT_FRAME);
3301 case MMCO_SHORT2LONG:
3302 if (h->long_ref[mmco[i].long_arg] != pic)
3303 remove_long(h, mmco[i].long_arg, 0);
3305 remove_short_at_index(h, j);
3306 h->long_ref[ mmco[i].long_arg ]= pic;
3307 if (h->long_ref[ mmco[i].long_arg ]){
3308 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3309 h->long_ref_count++;
3312 case MMCO_LONG2UNUSED:
3313 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3314 pic = h->long_ref[j];
3316 remove_long(h, j, structure ^ PICT_FRAME);
3317 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3318 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3321 // Comment below left from previous code as it is an interresting note.
3322 /* First field in pair is in short term list or
3323 * at a different long term index.
3324 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3325 * Report the problem and keep the pair where it is,
3326 * and mark this field valid.
3329 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3330 remove_long(h, mmco[i].long_arg, 0);
3332 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3333 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3334 h->long_ref_count++;
3337 s->current_picture_ptr->reference |= s->picture_structure;
3338 current_ref_assigned=1;
3340 case MMCO_SET_MAX_LONG:
3341 assert(mmco[i].long_arg <= 16);
3342 // just remove the long term which index is greater than new max
3343 for(j = mmco[i].long_arg; j<16; j++){
3344 remove_long(h, j, 0);
3348 while(h->short_ref_count){
3349 remove_short(h, h->short_ref[0]->frame_num, 0);
3351 for(j = 0; j < 16; j++) {
3352 remove_long(h, j, 0);
3354 s->current_picture_ptr->poc=
3355 s->current_picture_ptr->field_poc[0]=
3356 s->current_picture_ptr->field_poc[1]=
3360 s->current_picture_ptr->frame_num= 0;
3366 if (!current_ref_assigned) {
3367 /* Second field of complementary field pair; the first field of
3368 * which is already referenced. If short referenced, it
3369 * should be first entry in short_ref. If not, it must exist
3370 * in long_ref; trying to put it on the short list here is an
3371 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3373 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3374 /* Just mark the second field valid */
3375 s->current_picture_ptr->reference = PICT_FRAME;
3376 } else if (s->current_picture_ptr->long_ref) {
3377 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3378 "assignment for second field "
3379 "in complementary field pair "
3380 "(first field is long term)\n");
3382 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3384 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3387 if(h->short_ref_count)
3388 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3390 h->short_ref[0]= s->current_picture_ptr;
3391 h->short_ref_count++;
3392 s->current_picture_ptr->reference |= s->picture_structure;
3396 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3398 /* We have too many reference frames, probably due to corrupted
3399 * stream. Need to discard one frame. Prevents overrun of the
3400 * short_ref and long_ref buffers.
3402 av_log(h->s.avctx, AV_LOG_ERROR,
3403 "number of reference frames exceeds max (probably "
3404 "corrupt input), discarding one\n");
3406 if (h->long_ref_count && !h->short_ref_count) {
3407 for (i = 0; i < 16; ++i)
3412 remove_long(h, i, 0);
3414 pic = h->short_ref[h->short_ref_count - 1];
3415 remove_short(h, pic->frame_num, 0);
3419 print_short_term(h);
3424 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3425 MpegEncContext * const s = &h->s;
3429 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3430 s->broken_link= get_bits1(gb) -1;
3432 h->mmco[0].opcode= MMCO_LONG;
3433 h->mmco[0].long_arg= 0;
3437 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3438 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3439 MMCOOpcode opcode= get_ue_golomb_31(gb);
3441 h->mmco[i].opcode= opcode;
3442 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3443 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3444 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3445 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3449 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3450 unsigned int long_arg= get_ue_golomb_31(gb);
3451 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3452 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3455 h->mmco[i].long_arg= long_arg;
3458 if(opcode > (unsigned)MMCO_LONG){
3459 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3462 if(opcode == MMCO_END)
3467 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3469 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3470 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3471 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3472 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3474 if (FIELD_PICTURE) {
3475 h->mmco[0].short_pic_num *= 2;
3476 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3477 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3487 static int init_poc(H264Context *h){
3488 MpegEncContext * const s = &h->s;
3489 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3491 Picture *cur = s->current_picture_ptr;
3493 h->frame_num_offset= h->prev_frame_num_offset;
3494 if(h->frame_num < h->prev_frame_num)
3495 h->frame_num_offset += max_frame_num;
3497 if(h->sps.poc_type==0){
3498 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3500 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3501 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3502 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3503 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3505 h->poc_msb = h->prev_poc_msb;
3506 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3508 field_poc[1] = h->poc_msb + h->poc_lsb;
3509 if(s->picture_structure == PICT_FRAME)
3510 field_poc[1] += h->delta_poc_bottom;
3511 }else if(h->sps.poc_type==1){
3512 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3515 if(h->sps.poc_cycle_length != 0)
3516 abs_frame_num = h->frame_num_offset + h->frame_num;
3520 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3523 expected_delta_per_poc_cycle = 0;
3524 for(i=0; i < h->sps.poc_cycle_length; i++)
3525 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3527 if(abs_frame_num > 0){
3528 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3529 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3531 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3532 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3533 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3537 if(h->nal_ref_idc == 0)
3538 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3540 field_poc[0] = expectedpoc + h->delta_poc[0];
3541 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3543 if(s->picture_structure == PICT_FRAME)
3544 field_poc[1] += h->delta_poc[1];
3546 int poc= 2*(h->frame_num_offset + h->frame_num);
3555 if(s->picture_structure != PICT_BOTTOM_FIELD)
3556 s->current_picture_ptr->field_poc[0]= field_poc[0];
3557 if(s->picture_structure != PICT_TOP_FIELD)
3558 s->current_picture_ptr->field_poc[1]= field_poc[1];
3559 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3566 * initialize scan tables
3568 static void init_scan_tables(H264Context *h){
3569 MpegEncContext * const s = &h->s;
3571 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3572 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3573 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3575 for(i=0; i<16; i++){
3576 #define T(x) (x>>2) | ((x<<2) & 0xF)
3577 h->zigzag_scan[i] = T(zigzag_scan[i]);
3578 h-> field_scan[i] = T( field_scan[i]);
3582 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3583 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3584 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3585 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3586 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3588 for(i=0; i<64; i++){
3589 #define T(x) (x>>3) | ((x&7)<<3)
3590 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3591 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3592 h->field_scan8x8[i] = T(field_scan8x8[i]);
3593 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3597 if(h->sps.transform_bypass){ //FIXME same ugly
3598 h->zigzag_scan_q0 = zigzag_scan;
3599 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3600 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3601 h->field_scan_q0 = field_scan;
3602 h->field_scan8x8_q0 = field_scan8x8;
3603 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3605 h->zigzag_scan_q0 = h->zigzag_scan;
3606 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3607 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3608 h->field_scan_q0 = h->field_scan;
3609 h->field_scan8x8_q0 = h->field_scan8x8;
3610 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3615 * Replicates H264 "master" context to thread contexts.
3617 static void clone_slice(H264Context *dst, H264Context *src)
3619 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3620 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3621 dst->s.current_picture = src->s.current_picture;
3622 dst->s.linesize = src->s.linesize;
3623 dst->s.uvlinesize = src->s.uvlinesize;
3624 dst->s.first_field = src->s.first_field;
3626 dst->prev_poc_msb = src->prev_poc_msb;
3627 dst->prev_poc_lsb = src->prev_poc_lsb;
3628 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3629 dst->prev_frame_num = src->prev_frame_num;
3630 dst->short_ref_count = src->short_ref_count;
3632 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3633 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3634 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3635 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3637 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3638 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3642 * decodes a slice header.
3643 * This will also call MPV_common_init() and frame_start() as needed.
3645 * @param h h264context
3646 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3648 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3650 static int decode_slice_header(H264Context *h, H264Context *h0){
3651 MpegEncContext * const s = &h->s;
3652 MpegEncContext * const s0 = &h0->s;
3653 unsigned int first_mb_in_slice;
3654 unsigned int pps_id;
3655 int num_ref_idx_active_override_flag;
3656 unsigned int slice_type, tmp, i, j;
3657 int default_ref_list_done = 0;
3658 int last_pic_structure;
3660 s->dropable= h->nal_ref_idc == 0;
3662 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3663 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3664 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3666 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3667 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3670 first_mb_in_slice= get_ue_golomb(&s->gb);
3672 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3673 h0->current_slice = 0;
3674 if (!s0->first_field)
3675 s->current_picture_ptr= NULL;
3678 slice_type= get_ue_golomb_31(&s->gb);
3680 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3685 h->slice_type_fixed=1;
3687 h->slice_type_fixed=0;
3689 slice_type= golomb_to_pict_type[ slice_type ];
3690 if (slice_type == FF_I_TYPE
3691 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3692 default_ref_list_done = 1;
3694 h->slice_type= slice_type;
3695 h->slice_type_nos= slice_type & 3;
3697 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3698 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3699 av_log(h->s.avctx, AV_LOG_ERROR,
3700 "B picture before any references, skipping\n");
3704 pps_id= get_ue_golomb(&s->gb);
3705 if(pps_id>=MAX_PPS_COUNT){
3706 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3709 if(!h0->pps_buffers[pps_id]) {
3710 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3713 h->pps= *h0->pps_buffers[pps_id];
3715 if(!h0->sps_buffers[h->pps.sps_id]) {
3716 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3719 h->sps = *h0->sps_buffers[h->pps.sps_id];
3721 if(h == h0 && h->dequant_coeff_pps != pps_id){
3722 h->dequant_coeff_pps = pps_id;
3723 init_dequant_tables(h);
3726 s->mb_width= h->sps.mb_width;
3727 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3729 h->b_stride= s->mb_width*4;
3730 h->b8_stride= s->mb_width*2;
3732 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3733 if(h->sps.frame_mbs_only_flag)
3734 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3736 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3738 if (s->context_initialized
3739 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3741 return -1; // width / height changed during parallelized decoding
3743 flush_dpb(s->avctx);
3746 if (!s->context_initialized) {
3748 return -1; // we cant (re-)initialize context during parallel decoding
3749 if (MPV_common_init(s) < 0)
3753 init_scan_tables(h);
3756 for(i = 1; i < s->avctx->thread_count; i++) {
3758 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3759 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3760 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3763 init_scan_tables(c);
3767 for(i = 0; i < s->avctx->thread_count; i++)
3768 if(context_init(h->thread_context[i]) < 0)
3771 s->avctx->width = s->width;
3772 s->avctx->height = s->height;
3773 s->avctx->sample_aspect_ratio= h->sps.sar;
3774 if(!s->avctx->sample_aspect_ratio.den)
3775 s->avctx->sample_aspect_ratio.den = 1;
3777 if(h->sps.timing_info_present_flag){
3778 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3779 if(h->x264_build > 0 && h->x264_build < 44)
3780 s->avctx->time_base.den *= 2;
3781 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3782 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3786 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3789 h->mb_aff_frame = 0;
3790 last_pic_structure = s0->picture_structure;
3791 if(h->sps.frame_mbs_only_flag){
3792 s->picture_structure= PICT_FRAME;
3794 if(get_bits1(&s->gb)) { //field_pic_flag
3795 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3797 s->picture_structure= PICT_FRAME;
3798 h->mb_aff_frame = h->sps.mb_aff;
3801 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3803 if(h0->current_slice == 0){
3804 while(h->frame_num != h->prev_frame_num &&
3805 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3806 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3807 if (frame_start(h) < 0)
3809 h->prev_frame_num++;
3810 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3811 s->current_picture_ptr->frame_num= h->prev_frame_num;
3812 execute_ref_pic_marking(h, NULL, 0);
3815 /* See if we have a decoded first field looking for a pair... */
3816 if (s0->first_field) {
3817 assert(s0->current_picture_ptr);
3818 assert(s0->current_picture_ptr->data[0]);
3819 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3821 /* figure out if we have a complementary field pair */
3822 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3824 * Previous field is unmatched. Don't display it, but let it
3825 * remain for reference if marked as such.
3827 s0->current_picture_ptr = NULL;
3828 s0->first_field = FIELD_PICTURE;
3831 if (h->nal_ref_idc &&
3832 s0->current_picture_ptr->reference &&
3833 s0->current_picture_ptr->frame_num != h->frame_num) {
3835 * This and previous field were reference, but had
3836 * different frame_nums. Consider this field first in
3837 * pair. Throw away previous field except for reference
3840 s0->first_field = 1;
3841 s0->current_picture_ptr = NULL;
3844 /* Second field in complementary pair */
3845 s0->first_field = 0;
3850 /* Frame or first field in a potentially complementary pair */
3851 assert(!s0->current_picture_ptr);
3852 s0->first_field = FIELD_PICTURE;
3855 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3856 s0->first_field = 0;
3863 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3865 assert(s->mb_num == s->mb_width * s->mb_height);
3866 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3867 first_mb_in_slice >= s->mb_num){
3868 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3871 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3872 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3873 if (s->picture_structure == PICT_BOTTOM_FIELD)
3874 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3875 assert(s->mb_y < s->mb_height);
3877 if(s->picture_structure==PICT_FRAME){
3878 h->curr_pic_num= h->frame_num;
3879 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3881 h->curr_pic_num= 2*h->frame_num + 1;
3882 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3885 if(h->nal_unit_type == NAL_IDR_SLICE){
3886 get_ue_golomb(&s->gb); /* idr_pic_id */
3889 if(h->sps.poc_type==0){
3890 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3892 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3893 h->delta_poc_bottom= get_se_golomb(&s->gb);
3897 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3898 h->delta_poc[0]= get_se_golomb(&s->gb);
3900 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3901 h->delta_poc[1]= get_se_golomb(&s->gb);
3906 if(h->pps.redundant_pic_cnt_present){
3907 h->redundant_pic_count= get_ue_golomb(&s->gb);
3910 //set defaults, might be overridden a few lines later
3911 h->ref_count[0]= h->pps.ref_count[0];
3912 h->ref_count[1]= h->pps.ref_count[1];
3914 if(h->slice_type_nos != FF_I_TYPE){
3915 if(h->slice_type_nos == FF_B_TYPE){
3916 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3918 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3920 if(num_ref_idx_active_override_flag){
3921 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3922 if(h->slice_type_nos==FF_B_TYPE)
3923 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3925 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3926 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3927 h->ref_count[0]= h->ref_count[1]= 1;
3931 if(h->slice_type_nos == FF_B_TYPE)
3938 if(!default_ref_list_done){
3939 fill_default_ref_list(h);
3942 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3945 if(h->slice_type_nos!=FF_I_TYPE){
3946 s->last_picture_ptr= &h->ref_list[0][0];
3947 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3949 if(h->slice_type_nos==FF_B_TYPE){
3950 s->next_picture_ptr= &h->ref_list[1][0];
3951 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3954 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3955 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3956 pred_weight_table(h);
3957 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3958 implicit_weight_table(h);
3961 for (i = 0; i < 2; i++) {
3962 h->luma_weight_flag[i] = 0;
3963 h->chroma_weight_flag[i] = 0;
3968 decode_ref_pic_marking(h0, &s->gb);
3971 fill_mbaff_ref_list(h);
3973 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3974 direct_dist_scale_factor(h);
3975 direct_ref_list_init(h);
3977 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3978 tmp = get_ue_golomb_31(&s->gb);
3980 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3983 h->cabac_init_idc= tmp;
3986 h->last_qscale_diff = 0;
3987 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3989 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3993 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3994 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3995 //FIXME qscale / qp ... stuff
3996 if(h->slice_type == FF_SP_TYPE){
3997 get_bits1(&s->gb); /* sp_for_switch_flag */
3999 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4000 get_se_golomb(&s->gb); /* slice_qs_delta */
4003 h->deblocking_filter = 1;
4004 h->slice_alpha_c0_offset = 0;
4005 h->slice_beta_offset = 0;
4006 if( h->pps.deblocking_filter_parameters_present ) {
4007 tmp= get_ue_golomb_31(&s->gb);
4009 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4012 h->deblocking_filter= tmp;
4013 if(h->deblocking_filter < 2)
4014 h->deblocking_filter^= 1; // 1<->0
4016 if( h->deblocking_filter ) {
4017 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4018 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4022 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4023 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4024 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4025 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4026 h->deblocking_filter= 0;
4028 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4029 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4030 /* Cheat slightly for speed:
4031 Do not bother to deblock across slices. */
4032 h->deblocking_filter = 2;
4034 h0->max_contexts = 1;
4035 if(!h0->single_decode_warning) {
4036 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4037 h0->single_decode_warning = 1;
4040 return 1; // deblocking switched inside frame
4045 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4046 slice_group_change_cycle= get_bits(&s->gb, ?);
4049 h0->last_slice_type = slice_type;
4050 h->slice_num = ++h0->current_slice;
4051 if(h->slice_num >= MAX_SLICES){
4052 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4056 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4060 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4061 +(h->ref_list[j][i].reference&3);
4064 for(i=16; i<48; i++)
4065 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4066 +(h->ref_list[j][i].reference&3);
4069 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4070 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4072 s->avctx->refs= h->sps.ref_frame_count;
4074 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4075 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4077 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4079 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4080 pps_id, h->frame_num,
4081 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4082 h->ref_count[0], h->ref_count[1],
4084 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4086 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4087 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4097 static inline int get_level_prefix(GetBitContext *gb){
4101 OPEN_READER(re, gb);
4102 UPDATE_CACHE(re, gb);
4103 buf=GET_CACHE(re, gb);
4105 log= 32 - av_log2(buf);
4107 print_bin(buf>>(32-log), log);
4108 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4111 LAST_SKIP_BITS(re, gb, log);
4112 CLOSE_READER(re, gb);
4117 static inline int get_dct8x8_allowed(H264Context *h){
4118 if(h->sps.direct_8x8_inference_flag)
4119 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4121 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4125 * decodes a residual block.
4126 * @param n block index
4127 * @param scantable scantable
4128 * @param max_coeff number of coefficients in the block
4129 * @return <0 if an error occurred
4131 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4132 MpegEncContext * const s = &h->s;
4133 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4135 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4137 //FIXME put trailing_onex into the context
4139 if(n == CHROMA_DC_BLOCK_INDEX){
4140 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4141 total_coeff= coeff_token>>2;
4143 if(n == LUMA_DC_BLOCK_INDEX){
4144 total_coeff= pred_non_zero_count(h, 0);
4145 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4146 total_coeff= coeff_token>>2;
4148 total_coeff= pred_non_zero_count(h, n);
4149 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4150 total_coeff= coeff_token>>2;
4151 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4155 //FIXME set last_non_zero?
4159 if(total_coeff > (unsigned)max_coeff) {
4160 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4164 trailing_ones= coeff_token&3;
4165 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4166 assert(total_coeff<=16);
4168 i = show_bits(gb, 3);
4169 skip_bits(gb, trailing_ones);
4170 level[0] = 1-((i&4)>>1);
4171 level[1] = 1-((i&2) );
4172 level[2] = 1-((i&1)<<1);
4174 if(trailing_ones<total_coeff) {
4176 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4177 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4178 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4180 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4181 if(level_code >= 100){
4182 prefix= level_code - 100;
4183 if(prefix == LEVEL_TAB_BITS)
4184 prefix += get_level_prefix(gb);
4186 //first coefficient has suffix_length equal to 0 or 1
4187 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4189 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4191 level_code= (prefix<<suffix_length); //part
4192 }else if(prefix==14){
4194 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4196 level_code= prefix + get_bits(gb, 4); //part
4198 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4199 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4201 level_code += (1<<(prefix-3))-4096;
4204 if(trailing_ones < 3) level_code += 2;
4207 mask= -(level_code&1);
4208 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4210 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4213 if(level_code + 3U > 6U)
4215 level[trailing_ones]= level_code;
4218 //remaining coefficients have suffix_length > 0
4219 for(i=trailing_ones+1;i<total_coeff;i++) {
4220 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4221 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4222 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4224 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4225 if(level_code >= 100){
4226 prefix= level_code - 100;
4227 if(prefix == LEVEL_TAB_BITS){
4228 prefix += get_level_prefix(gb);
4231 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4233 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4235 level_code += (1<<(prefix-3))-4096;
4237 mask= -(level_code&1);
4238 level_code= (((2+level_code)>>1) ^ mask) - mask;
4240 level[i]= level_code;
4242 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4247 if(total_coeff == max_coeff)
4250 if(n == CHROMA_DC_BLOCK_INDEX)
4251 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4253 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4256 coeff_num = zeros_left + total_coeff - 1;
4257 j = scantable[coeff_num];
4259 block[j] = level[0];
4260 for(i=1;i<total_coeff;i++) {
4263 else if(zeros_left < 7){
4264 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4266 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4268 zeros_left -= run_before;
4269 coeff_num -= 1 + run_before;
4270 j= scantable[ coeff_num ];
4275 block[j] = (level[0] * qmul[j] + 32)>>6;
4276 for(i=1;i<total_coeff;i++) {
4279 else if(zeros_left < 7){
4280 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4282 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4284 zeros_left -= run_before;
4285 coeff_num -= 1 + run_before;
4286 j= scantable[ coeff_num ];
4288 block[j]= (level[i] * qmul[j] + 32)>>6;
4293 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4300 static void predict_field_decoding_flag(H264Context *h){
4301 MpegEncContext * const s = &h->s;
4302 const int mb_xy= h->mb_xy;
4303 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4304 ? s->current_picture.mb_type[mb_xy-1]
4305 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4306 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4308 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4312 * decodes a P_SKIP or B_SKIP macroblock
4314 static void decode_mb_skip(H264Context *h){
4315 MpegEncContext * const s = &h->s;
4316 const int mb_xy= h->mb_xy;
4319 memset(h->non_zero_count[mb_xy], 0, 16);
4320 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4323 mb_type|= MB_TYPE_INTERLACED;
4325 if( h->slice_type_nos == FF_B_TYPE )
4327 // just for fill_caches. pred_direct_motion will set the real mb_type
4328 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4330 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4331 pred_direct_motion(h, &mb_type);
4332 mb_type|= MB_TYPE_SKIP;
4337 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4339 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4340 pred_pskip_motion(h, &mx, &my);
4341 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4342 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4345 write_back_motion(h, mb_type);
4346 s->current_picture.mb_type[mb_xy]= mb_type;
4347 s->current_picture.qscale_table[mb_xy]= s->qscale;
4348 h->slice_table[ mb_xy ]= h->slice_num;
4349 h->prev_mb_skipped= 1;
4353 * decodes a macroblock
4354 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4356 static int decode_mb_cavlc(H264Context *h){
4357 MpegEncContext * const s = &h->s;
4359 int partition_count;
4360 unsigned int mb_type, cbp;
4361 int dct8x8_allowed= h->pps.transform_8x8_mode;
4363 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4365 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4366 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4368 if(h->slice_type_nos != FF_I_TYPE){
4369 if(s->mb_skip_run==-1)
4370 s->mb_skip_run= get_ue_golomb(&s->gb);
4372 if (s->mb_skip_run--) {
4373 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4374 if(s->mb_skip_run==0)
4375 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4377 predict_field_decoding_flag(h);
4384 if( (s->mb_y&1) == 0 )
4385 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4388 h->prev_mb_skipped= 0;
4390 mb_type= get_ue_golomb(&s->gb);
4391 if(h->slice_type_nos == FF_B_TYPE){
4393 partition_count= b_mb_type_info[mb_type].partition_count;
4394 mb_type= b_mb_type_info[mb_type].type;
4397 goto decode_intra_mb;
4399 }else if(h->slice_type_nos == FF_P_TYPE){
4401 partition_count= p_mb_type_info[mb_type].partition_count;
4402 mb_type= p_mb_type_info[mb_type].type;
4405 goto decode_intra_mb;
4408 assert(h->slice_type_nos == FF_I_TYPE);
4409 if(h->slice_type == FF_SI_TYPE && mb_type)
4413 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4417 cbp= i_mb_type_info[mb_type].cbp;
4418 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4419 mb_type= i_mb_type_info[mb_type].type;
4423 mb_type |= MB_TYPE_INTERLACED;
4425 h->slice_table[ mb_xy ]= h->slice_num;
4427 if(IS_INTRA_PCM(mb_type)){
4430 // We assume these blocks are very rare so we do not optimize it.
4431 align_get_bits(&s->gb);
4433 // The pixels are stored in the same order as levels in h->mb array.
4434 for(x=0; x < (CHROMA ? 384 : 256); x++){
4435 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4438 // In deblocking, the quantizer is 0
4439 s->current_picture.qscale_table[mb_xy]= 0;
4440 // All coeffs are present
4441 memset(h->non_zero_count[mb_xy], 16, 16);
4443 s->current_picture.mb_type[mb_xy]= mb_type;
4448 h->ref_count[0] <<= 1;
4449 h->ref_count[1] <<= 1;
4452 fill_caches(h, mb_type, 0);
4455 if(IS_INTRA(mb_type)){
4457 // init_top_left_availability(h);
4458 if(IS_INTRA4x4(mb_type)){
4461 if(dct8x8_allowed && get_bits1(&s->gb)){
4462 mb_type |= MB_TYPE_8x8DCT;
4466 // fill_intra4x4_pred_table(h);
4467 for(i=0; i<16; i+=di){
4468 int mode= pred_intra_mode(h, i);
4470 if(!get_bits1(&s->gb)){
4471 const int rem_mode= get_bits(&s->gb, 3);
4472 mode = rem_mode + (rem_mode >= mode);
4476 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4478 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4480 write_back_intra_pred_mode(h);
4481 if( check_intra4x4_pred_mode(h) < 0)
4484 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4485 if(h->intra16x16_pred_mode < 0)
4489 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4492 h->chroma_pred_mode= pred_mode;
4494 }else if(partition_count==4){
4495 int i, j, sub_partition_count[4], list, ref[2][4];
4497 if(h->slice_type_nos == FF_B_TYPE){
4499 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4500 if(h->sub_mb_type[i] >=13){
4501 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4504 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4505 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4507 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4508 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4509 pred_direct_motion(h, &mb_type);
4510 h->ref_cache[0][scan8[4]] =
4511 h->ref_cache[1][scan8[4]] =
4512 h->ref_cache[0][scan8[12]] =
4513 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4516 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4518 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4519 if(h->sub_mb_type[i] >=4){
4520 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4523 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4524 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4528 for(list=0; list<h->list_count; list++){
4529 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4531 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4532 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4536 }else if(ref_count == 2){
4537 tmp= get_bits1(&s->gb)^1;
4539 tmp= get_ue_golomb_31(&s->gb);
4541 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4554 dct8x8_allowed = get_dct8x8_allowed(h);
4556 for(list=0; list<h->list_count; list++){
4558 if(IS_DIRECT(h->sub_mb_type[i])) {
4559 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4562 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4563 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4565 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4566 const int sub_mb_type= h->sub_mb_type[i];
4567 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4568 for(j=0; j<sub_partition_count[i]; j++){
4570 const int index= 4*i + block_width*j;
4571 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4572 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4573 mx += get_se_golomb(&s->gb);
4574 my += get_se_golomb(&s->gb);
4575 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4577 if(IS_SUB_8X8(sub_mb_type)){
4579 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4581 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4582 }else if(IS_SUB_8X4(sub_mb_type)){
4583 mv_cache[ 1 ][0]= mx;
4584 mv_cache[ 1 ][1]= my;
4585 }else if(IS_SUB_4X8(sub_mb_type)){
4586 mv_cache[ 8 ][0]= mx;
4587 mv_cache[ 8 ][1]= my;
4589 mv_cache[ 0 ][0]= mx;
4590 mv_cache[ 0 ][1]= my;
4593 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4599 }else if(IS_DIRECT(mb_type)){
4600 pred_direct_motion(h, &mb_type);
4601 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4603 int list, mx, my, i;
4604 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4605 if(IS_16X16(mb_type)){
4606 for(list=0; list<h->list_count; list++){
4608 if(IS_DIR(mb_type, 0, list)){
4609 if(h->ref_count[list]==1){
4611 }else if(h->ref_count[list]==2){
4612 val= get_bits1(&s->gb)^1;
4614 val= get_ue_golomb_31(&s->gb);
4615 if(val >= h->ref_count[list]){
4616 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4621 val= LIST_NOT_USED&0xFF;
4622 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4624 for(list=0; list<h->list_count; list++){
4626 if(IS_DIR(mb_type, 0, list)){
4627 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4628 mx += get_se_golomb(&s->gb);
4629 my += get_se_golomb(&s->gb);
4630 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4632 val= pack16to32(mx,my);
4635 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4638 else if(IS_16X8(mb_type)){
4639 for(list=0; list<h->list_count; list++){
4642 if(IS_DIR(mb_type, i, list)){
4643 if(h->ref_count[list] == 1){
4645 }else if(h->ref_count[list] == 2){
4646 val= get_bits1(&s->gb)^1;
4648 val= get_ue_golomb_31(&s->gb);
4649 if(val >= h->ref_count[list]){
4650 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4655 val= LIST_NOT_USED&0xFF;
4656 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4659 for(list=0; list<h->list_count; list++){
4662 if(IS_DIR(mb_type, i, list)){
4663 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4664 mx += get_se_golomb(&s->gb);
4665 my += get_se_golomb(&s->gb);
4666 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4668 val= pack16to32(mx,my);
4671 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4675 assert(IS_8X16(mb_type));
4676 for(list=0; list<h->list_count; list++){
4679 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4680 if(h->ref_count[list]==1){
4682 }else if(h->ref_count[list]==2){
4683 val= get_bits1(&s->gb)^1;
4685 val= get_ue_golomb_31(&s->gb);
4686 if(val >= h->ref_count[list]){
4687 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4692 val= LIST_NOT_USED&0xFF;
4693 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4696 for(list=0; list<h->list_count; list++){
4699 if(IS_DIR(mb_type, i, list)){
4700 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4701 mx += get_se_golomb(&s->gb);
4702 my += get_se_golomb(&s->gb);
4703 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4705 val= pack16to32(mx,my);
4708 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4714 if(IS_INTER(mb_type))
4715 write_back_motion(h, mb_type);
4717 if(!IS_INTRA16x16(mb_type)){
4718 cbp= get_ue_golomb(&s->gb);
4720 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4725 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4726 else cbp= golomb_to_inter_cbp [cbp];
4728 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4729 else cbp= golomb_to_inter_cbp_gray[cbp];
4734 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4735 if(get_bits1(&s->gb)){
4736 mb_type |= MB_TYPE_8x8DCT;
4737 h->cbp_table[mb_xy]= cbp;
4740 s->current_picture.mb_type[mb_xy]= mb_type;
4742 if(cbp || IS_INTRA16x16(mb_type)){
4743 int i8x8, i4x4, chroma_idx;
4745 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4746 const uint8_t *scan, *scan8x8, *dc_scan;
4748 // fill_non_zero_count_cache(h);
4750 if(IS_INTERLACED(mb_type)){
4751 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4752 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4753 dc_scan= luma_dc_field_scan;
4755 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4756 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4757 dc_scan= luma_dc_zigzag_scan;
4760 dquant= get_se_golomb(&s->gb);
4762 if( dquant > 25 || dquant < -26 ){
4763 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4767 s->qscale += dquant;
4768 if(((unsigned)s->qscale) > 51){
4769 if(s->qscale<0) s->qscale+= 52;
4770 else s->qscale-= 52;
4773 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4774 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4775 if(IS_INTRA16x16(mb_type)){
4776 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4777 return -1; //FIXME continue if partitioned and other return -1 too
4780 assert((cbp&15) == 0 || (cbp&15) == 15);
4783 for(i8x8=0; i8x8<4; i8x8++){
4784 for(i4x4=0; i4x4<4; i4x4++){
4785 const int index= i4x4 + 4*i8x8;
4786 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4792 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4795 for(i8x8=0; i8x8<4; i8x8++){
4796 if(cbp & (1<<i8x8)){
4797 if(IS_8x8DCT(mb_type)){
4798 DCTELEM *buf = &h->mb[64*i8x8];
4800 for(i4x4=0; i4x4<4; i4x4++){
4801 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4802 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4805 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4806 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4808 for(i4x4=0; i4x4<4; i4x4++){
4809 const int index= i4x4 + 4*i8x8;
4811 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4817 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4818 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4824 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4825 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4831 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4832 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4833 for(i4x4=0; i4x4<4; i4x4++){
4834 const int index= 16 + 4*chroma_idx + i4x4;
4835 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4841 uint8_t * const nnz= &h->non_zero_count_cache[0];
4842 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4843 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4846 uint8_t * const nnz= &h->non_zero_count_cache[0];
4847 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4848 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4849 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4851 s->current_picture.qscale_table[mb_xy]= s->qscale;
4852 write_back_non_zero_count(h);
4855 h->ref_count[0] >>= 1;
4856 h->ref_count[1] >>= 1;
4862 static int decode_cabac_field_decoding_flag(H264Context *h) {
4863 MpegEncContext * const s = &h->s;
4864 const int mb_x = s->mb_x;
4865 const int mb_y = s->mb_y & ~1;
4866 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4867 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4869 unsigned int ctx = 0;
4871 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4874 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4878 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4881 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4882 uint8_t *state= &h->cabac_state[ctx_base];
4886 MpegEncContext * const s = &h->s;
4887 const int mba_xy = h->left_mb_xy[0];
4888 const int mbb_xy = h->top_mb_xy;
4890 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4892 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4894 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4895 return 0; /* I4x4 */
4898 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4899 return 0; /* I4x4 */
4902 if( get_cabac_terminate( &h->cabac ) )
4903 return 25; /* PCM */
4905 mb_type = 1; /* I16x16 */
4906 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4907 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4908 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4909 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4910 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4914 static int decode_cabac_mb_type_b( H264Context *h ) {
4915 MpegEncContext * const s = &h->s;
4917 const int mba_xy = h->left_mb_xy[0];
4918 const int mbb_xy = h->top_mb_xy;
4921 assert(h->slice_type_nos == FF_B_TYPE);
4923 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4925 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4928 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4929 return 0; /* B_Direct_16x16 */
4931 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4932 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4935 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4936 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4937 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4938 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4940 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4941 else if( bits == 13 ) {
4942 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4943 } else if( bits == 14 )
4944 return 11; /* B_L1_L0_8x16 */
4945 else if( bits == 15 )
4946 return 22; /* B_8x8 */
4948 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4949 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4952 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4953 MpegEncContext * const s = &h->s;
4957 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4958 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4961 && h->slice_table[mba_xy] == h->slice_num
4962 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4963 mba_xy += s->mb_stride;
4965 mbb_xy = mb_xy - s->mb_stride;
4967 && h->slice_table[mbb_xy] == h->slice_num
4968 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4969 mbb_xy -= s->mb_stride;
4971 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4973 int mb_xy = h->mb_xy;
4975 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4978 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4980 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4983 if( h->slice_type_nos == FF_B_TYPE )
4985 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4988 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4991 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4994 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4995 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4996 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4998 if( mode >= pred_mode )
5004 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5005 const int mba_xy = h->left_mb_xy[0];
5006 const int mbb_xy = h->top_mb_xy;
5010 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5011 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5014 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5017 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5020 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5022 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5028 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5029 int cbp_b, cbp_a, ctx, cbp = 0;
5031 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5032 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5034 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5035 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5036 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5037 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5038 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5039 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5040 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5041 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5044 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5048 cbp_a = (h->left_cbp>>4)&0x03;
5049 cbp_b = (h-> top_cbp>>4)&0x03;
5052 if( cbp_a > 0 ) ctx++;
5053 if( cbp_b > 0 ) ctx += 2;
5054 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5058 if( cbp_a == 2 ) ctx++;
5059 if( cbp_b == 2 ) ctx += 2;
5060 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5062 static int decode_cabac_mb_dqp( H264Context *h) {
5063 int ctx= h->last_qscale_diff != 0;
5066 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5069 if(val > 102) //prevent infinite loop
5074 return (val + 1)>>1 ;
5076 return -((val + 1)>>1);
5078 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5079 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5081 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5083 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5087 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5089 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5090 return 0; /* B_Direct_8x8 */
5091 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5092 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5094 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5095 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5096 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5099 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5100 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5104 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5105 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5108 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5109 int refa = h->ref_cache[list][scan8[n] - 1];
5110 int refb = h->ref_cache[list][scan8[n] - 8];
5114 if( h->slice_type_nos == FF_B_TYPE) {
5115 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5117 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5126 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5129 if(ref >= 32 /*h->ref_list[list]*/){
5136 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5137 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5138 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5139 int ctxbase = (l == 0) ? 40 : 47;
5141 int ctx = (amvd>2) + (amvd>32);
5143 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5148 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5156 while( get_cabac_bypass( &h->cabac ) ) {
5160 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5165 if( get_cabac_bypass( &h->cabac ) )
5169 return get_cabac_bypass_sign( &h->cabac, -mvd );
5172 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5178 nza = h->left_cbp&0x100;
5179 nzb = h-> top_cbp&0x100;
5181 nza = (h->left_cbp>>(6+idx))&0x01;
5182 nzb = (h-> top_cbp>>(6+idx))&0x01;
5185 assert(cat == 1 || cat == 2 || cat == 4);
5186 nza = h->non_zero_count_cache[scan8[idx] - 1];
5187 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5196 return ctx + 4 * cat;
5199 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5200 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5201 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5202 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5203 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5206 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5207 static const int significant_coeff_flag_offset[2][6] = {
5208 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5209 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5211 static const int last_coeff_flag_offset[2][6] = {
5212 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5213 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5215 static const int coeff_abs_level_m1_offset[6] = {
5216 227+0, 227+10, 227+20, 227+30, 227+39, 426
5218 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5219 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5220 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5221 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5222 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5223 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5224 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5225 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5226 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5228 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5229 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5230 * map node ctx => cabac ctx for level=1 */
5231 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5232 /* map node ctx => cabac ctx for level>1 */
5233 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5234 static const uint8_t coeff_abs_level_transition[2][8] = {
5235 /* update node ctx after decoding a level=1 */
5236 { 1, 2, 3, 3, 4, 5, 6, 7 },
5237 /* update node ctx after decoding a level>1 */
5238 { 4, 4, 4, 4, 5, 6, 7, 7 }
5244 int coeff_count = 0;
5247 uint8_t *significant_coeff_ctx_base;
5248 uint8_t *last_coeff_ctx_base;
5249 uint8_t *abs_level_m1_ctx_base;
5252 #define CABAC_ON_STACK
5254 #ifdef CABAC_ON_STACK
5257 cc.range = h->cabac.range;
5258 cc.low = h->cabac.low;
5259 cc.bytestream= h->cabac.bytestream;
5261 #define CC &h->cabac
5265 /* cat: 0-> DC 16x16 n = 0
5266 * 1-> AC 16x16 n = luma4x4idx
5267 * 2-> Luma4x4 n = luma4x4idx
5268 * 3-> DC Chroma n = iCbCr
5269 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5270 * 5-> Luma8x8 n = 4 * luma8x8idx
5273 /* read coded block flag */
5274 if( is_dc || cat != 5 ) {
5275 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5277 h->non_zero_count_cache[scan8[n]] = 0;
5279 #ifdef CABAC_ON_STACK
5280 h->cabac.range = cc.range ;
5281 h->cabac.low = cc.low ;
5282 h->cabac.bytestream= cc.bytestream;
5288 significant_coeff_ctx_base = h->cabac_state
5289 + significant_coeff_flag_offset[MB_FIELD][cat];
5290 last_coeff_ctx_base = h->cabac_state
5291 + last_coeff_flag_offset[MB_FIELD][cat];
5292 abs_level_m1_ctx_base = h->cabac_state
5293 + coeff_abs_level_m1_offset[cat];
5295 if( !is_dc && cat == 5 ) {
5296 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5297 for(last= 0; last < coefs; last++) { \
5298 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5299 if( get_cabac( CC, sig_ctx )) { \
5300 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5301 index[coeff_count++] = last; \
5302 if( get_cabac( CC, last_ctx ) ) { \
5308 if( last == max_coeff -1 ) {\
5309 index[coeff_count++] = last;\
5311 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5312 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5313 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5315 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5317 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5319 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5322 assert(coeff_count > 0);
5326 h->cbp_table[h->mb_xy] |= 0x100;
5328 h->cbp_table[h->mb_xy] |= 0x40 << n;
5331 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5333 assert( cat == 1 || cat == 2 || cat == 4 );
5334 h->non_zero_count_cache[scan8[n]] = coeff_count;
5339 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5341 int j= scantable[index[--coeff_count]];
5343 if( get_cabac( CC, ctx ) == 0 ) {
5344 node_ctx = coeff_abs_level_transition[0][node_ctx];
5346 block[j] = get_cabac_bypass_sign( CC, -1);
5348 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5352 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5353 node_ctx = coeff_abs_level_transition[1][node_ctx];
5355 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5359 if( coeff_abs >= 15 ) {
5361 while( get_cabac_bypass( CC ) ) {
5367 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5373 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5375 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5378 } while( coeff_count );
5379 #ifdef CABAC_ON_STACK
5380 h->cabac.range = cc.range ;
5381 h->cabac.low = cc.low ;
5382 h->cabac.bytestream= cc.bytestream;
5388 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5389 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5392 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5393 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5397 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5399 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5401 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5402 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5406 static inline void compute_mb_neighbors(H264Context *h)
5408 MpegEncContext * const s = &h->s;
5409 const int mb_xy = h->mb_xy;
5410 h->top_mb_xy = mb_xy - s->mb_stride;
5411 h->left_mb_xy[0] = mb_xy - 1;
5413 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5414 const int top_pair_xy = pair_xy - s->mb_stride;
5415 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5416 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5417 const int curr_mb_field_flag = MB_FIELD;
5418 const int bottom = (s->mb_y & 1);
5420 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5421 h->top_mb_xy -= s->mb_stride;
5423 if (!left_mb_field_flag == curr_mb_field_flag) {
5424 h->left_mb_xy[0] = pair_xy - 1;
5426 } else if (FIELD_PICTURE) {
5427 h->top_mb_xy -= s->mb_stride;
5433 * decodes a macroblock
5434 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5436 static int decode_mb_cabac(H264Context *h) {
5437 MpegEncContext * const s = &h->s;
5439 int mb_type, partition_count, cbp = 0;
5440 int dct8x8_allowed= h->pps.transform_8x8_mode;
5442 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5444 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5445 if( h->slice_type_nos != FF_I_TYPE ) {
5447 /* a skipped mb needs the aff flag from the following mb */
5448 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5449 predict_field_decoding_flag(h);
5450 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5451 skip = h->next_mb_skipped;
5453 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5454 /* read skip flags */
5456 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5457 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5458 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5459 if(!h->next_mb_skipped)
5460 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5465 h->cbp_table[mb_xy] = 0;
5466 h->chroma_pred_mode_table[mb_xy] = 0;
5467 h->last_qscale_diff = 0;
5474 if( (s->mb_y&1) == 0 )
5476 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5479 h->prev_mb_skipped = 0;
5481 compute_mb_neighbors(h);
5483 if( h->slice_type_nos == FF_B_TYPE ) {
5484 mb_type = decode_cabac_mb_type_b( h );
5486 partition_count= b_mb_type_info[mb_type].partition_count;
5487 mb_type= b_mb_type_info[mb_type].type;
5490 goto decode_intra_mb;
5492 } else if( h->slice_type_nos == FF_P_TYPE ) {
5493 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5495 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5496 /* P_L0_D16x16, P_8x8 */
5497 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5499 /* P_L0_D8x16, P_L0_D16x8 */
5500 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5502 partition_count= p_mb_type_info[mb_type].partition_count;
5503 mb_type= p_mb_type_info[mb_type].type;
5505 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5506 goto decode_intra_mb;
5509 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5510 if(h->slice_type == FF_SI_TYPE && mb_type)
5512 assert(h->slice_type_nos == FF_I_TYPE);
5514 partition_count = 0;
5515 cbp= i_mb_type_info[mb_type].cbp;
5516 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5517 mb_type= i_mb_type_info[mb_type].type;
5520 mb_type |= MB_TYPE_INTERLACED;
5522 h->slice_table[ mb_xy ]= h->slice_num;
5524 if(IS_INTRA_PCM(mb_type)) {
5527 // We assume these blocks are very rare so we do not optimize it.
5528 // FIXME The two following lines get the bitstream position in the cabac
5529 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5530 ptr= h->cabac.bytestream;
5531 if(h->cabac.low&0x1) ptr--;
5533 if(h->cabac.low&0x1FF) ptr--;
5536 // The pixels are stored in the same order as levels in h->mb array.
5537 memcpy(h->mb, ptr, 256); ptr+=256;
5539 memcpy(h->mb+128, ptr, 128); ptr+=128;
5542 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5544 // All blocks are present
5545 h->cbp_table[mb_xy] = 0x1ef;
5546 h->chroma_pred_mode_table[mb_xy] = 0;
5547 // In deblocking, the quantizer is 0
5548 s->current_picture.qscale_table[mb_xy]= 0;
5549 // All coeffs are present
5550 memset(h->non_zero_count[mb_xy], 16, 16);
5551 s->current_picture.mb_type[mb_xy]= mb_type;
5552 h->last_qscale_diff = 0;
5557 h->ref_count[0] <<= 1;
5558 h->ref_count[1] <<= 1;
5561 fill_caches(h, mb_type, 0);
5563 if( IS_INTRA( mb_type ) ) {
5565 if( IS_INTRA4x4( mb_type ) ) {
5566 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5567 mb_type |= MB_TYPE_8x8DCT;
5568 for( i = 0; i < 16; i+=4 ) {
5569 int pred = pred_intra_mode( h, i );
5570 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5571 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5574 for( i = 0; i < 16; i++ ) {
5575 int pred = pred_intra_mode( h, i );
5576 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5578 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5581 write_back_intra_pred_mode(h);
5582 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5584 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5585 if( h->intra16x16_pred_mode < 0 ) return -1;
5588 h->chroma_pred_mode_table[mb_xy] =
5589 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5591 pred_mode= check_intra_pred_mode( h, pred_mode );
5592 if( pred_mode < 0 ) return -1;
5593 h->chroma_pred_mode= pred_mode;
5595 } else if( partition_count == 4 ) {
5596 int i, j, sub_partition_count[4], list, ref[2][4];
5598 if( h->slice_type_nos == FF_B_TYPE ) {
5599 for( i = 0; i < 4; i++ ) {
5600 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5601 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5602 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5604 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5605 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5606 pred_direct_motion(h, &mb_type);
5607 h->ref_cache[0][scan8[4]] =
5608 h->ref_cache[1][scan8[4]] =
5609 h->ref_cache[0][scan8[12]] =
5610 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5611 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5612 for( i = 0; i < 4; i++ )
5613 if( IS_DIRECT(h->sub_mb_type[i]) )
5614 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5618 for( i = 0; i < 4; i++ ) {
5619 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5620 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5621 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5625 for( list = 0; list < h->list_count; list++ ) {
5626 for( i = 0; i < 4; i++ ) {
5627 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5628 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5629 if( h->ref_count[list] > 1 ){
5630 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5631 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5632 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5640 h->ref_cache[list][ scan8[4*i]+1 ]=
5641 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5646 dct8x8_allowed = get_dct8x8_allowed(h);
5648 for(list=0; list<h->list_count; list++){
5650 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5651 if(IS_DIRECT(h->sub_mb_type[i])){
5652 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5656 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5657 const int sub_mb_type= h->sub_mb_type[i];
5658 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5659 for(j=0; j<sub_partition_count[i]; j++){
5662 const int index= 4*i + block_width*j;
5663 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5664 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5665 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5667 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5668 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5669 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5671 if(IS_SUB_8X8(sub_mb_type)){
5673 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5675 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5678 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5680 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5681 }else if(IS_SUB_8X4(sub_mb_type)){
5682 mv_cache[ 1 ][0]= mx;
5683 mv_cache[ 1 ][1]= my;
5685 mvd_cache[ 1 ][0]= mx - mpx;
5686 mvd_cache[ 1 ][1]= my - mpy;
5687 }else if(IS_SUB_4X8(sub_mb_type)){
5688 mv_cache[ 8 ][0]= mx;
5689 mv_cache[ 8 ][1]= my;
5691 mvd_cache[ 8 ][0]= mx - mpx;
5692 mvd_cache[ 8 ][1]= my - mpy;
5694 mv_cache[ 0 ][0]= mx;
5695 mv_cache[ 0 ][1]= my;
5697 mvd_cache[ 0 ][0]= mx - mpx;
5698 mvd_cache[ 0 ][1]= my - mpy;
5701 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5702 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5703 p[0] = p[1] = p[8] = p[9] = 0;
5704 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5708 } else if( IS_DIRECT(mb_type) ) {
5709 pred_direct_motion(h, &mb_type);
5710 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5711 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5712 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5714 int list, mx, my, i, mpx, mpy;
5715 if(IS_16X16(mb_type)){
5716 for(list=0; list<h->list_count; list++){
5717 if(IS_DIR(mb_type, 0, list)){
5719 if(h->ref_count[list] > 1){
5720 ref= decode_cabac_mb_ref(h, list, 0);
5721 if(ref >= (unsigned)h->ref_count[list]){
5722 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5727 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5729 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5731 for(list=0; list<h->list_count; list++){
5732 if(IS_DIR(mb_type, 0, list)){
5733 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5735 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5736 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5737 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5739 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5740 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5742 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5745 else if(IS_16X8(mb_type)){
5746 for(list=0; list<h->list_count; list++){
5748 if(IS_DIR(mb_type, i, list)){
5750 if(h->ref_count[list] > 1){
5751 ref= decode_cabac_mb_ref( h, list, 8*i );
5752 if(ref >= (unsigned)h->ref_count[list]){
5753 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5758 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5760 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5763 for(list=0; list<h->list_count; list++){
5765 if(IS_DIR(mb_type, i, list)){
5766 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5767 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5768 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5769 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5771 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5772 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5774 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5775 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5780 assert(IS_8X16(mb_type));
5781 for(list=0; list<h->list_count; list++){
5783 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5785 if(h->ref_count[list] > 1){
5786 ref= decode_cabac_mb_ref( h, list, 4*i );
5787 if(ref >= (unsigned)h->ref_count[list]){
5788 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5793 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5795 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5798 for(list=0; list<h->list_count; list++){
5800 if(IS_DIR(mb_type, i, list)){
5801 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5802 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5803 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5805 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5806 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5807 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5809 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5810 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5817 if( IS_INTER( mb_type ) ) {
5818 h->chroma_pred_mode_table[mb_xy] = 0;
5819 write_back_motion( h, mb_type );
5822 if( !IS_INTRA16x16( mb_type ) ) {
5823 cbp = decode_cabac_mb_cbp_luma( h );
5825 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5828 h->cbp_table[mb_xy] = h->cbp = cbp;
5830 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5831 if( decode_cabac_mb_transform_size( h ) )
5832 mb_type |= MB_TYPE_8x8DCT;
5834 s->current_picture.mb_type[mb_xy]= mb_type;
5836 if( cbp || IS_INTRA16x16( mb_type ) ) {
5837 const uint8_t *scan, *scan8x8, *dc_scan;
5838 const uint32_t *qmul;
5841 if(IS_INTERLACED(mb_type)){
5842 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5843 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5844 dc_scan= luma_dc_field_scan;
5846 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5847 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5848 dc_scan= luma_dc_zigzag_scan;
5851 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5852 if( dqp == INT_MIN ){
5853 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5857 if(((unsigned)s->qscale) > 51){
5858 if(s->qscale<0) s->qscale+= 52;
5859 else s->qscale-= 52;
5861 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5862 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5864 if( IS_INTRA16x16( mb_type ) ) {
5866 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5867 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5870 qmul = h->dequant4_coeff[0][s->qscale];
5871 for( i = 0; i < 16; i++ ) {
5872 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5873 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5876 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5880 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5881 if( cbp & (1<<i8x8) ) {
5882 if( IS_8x8DCT(mb_type) ) {
5883 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5884 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5886 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5887 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5888 const int index = 4*i8x8 + i4x4;
5889 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5891 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5892 //STOP_TIMER("decode_residual")
5896 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5897 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5904 for( c = 0; c < 2; c++ ) {
5905 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5906 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5912 for( c = 0; c < 2; c++ ) {
5913 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5914 for( i = 0; i < 4; i++ ) {
5915 const int index = 16 + 4 * c + i;
5916 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5917 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5921 uint8_t * const nnz= &h->non_zero_count_cache[0];
5922 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5923 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5926 uint8_t * const nnz= &h->non_zero_count_cache[0];
5927 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5928 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5929 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5930 h->last_qscale_diff = 0;
5933 s->current_picture.qscale_table[mb_xy]= s->qscale;
5934 write_back_non_zero_count(h);
5937 h->ref_count[0] >>= 1;
5938 h->ref_count[1] >>= 1;
5945 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5946 const int index_a = qp + h->slice_alpha_c0_offset;
5947 const int alpha = (alpha_table+52)[index_a];
5948 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5952 tc[0] = (tc0_table+52)[index_a][bS[0]];
5953 tc[1] = (tc0_table+52)[index_a][bS[1]];
5954 tc[2] = (tc0_table+52)[index_a][bS[2]];
5955 tc[3] = (tc0_table+52)[index_a][bS[3]];
5956 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5958 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5961 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5962 const int index_a = qp + h->slice_alpha_c0_offset;
5963 const int alpha = (alpha_table+52)[index_a];
5964 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5968 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5969 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5970 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5971 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5972 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5974 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5978 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5980 for( i = 0; i < 16; i++, pix += stride) {
5986 int bS_index = (i >> 1);
5989 bS_index |= (i & 1);
5992 if( bS[bS_index] == 0 ) {
5996 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5997 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5998 alpha = (alpha_table+52)[index_a];
5999 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6001 if( bS[bS_index] < 4 ) {
6002 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6003 const int p0 = pix[-1];
6004 const int p1 = pix[-2];
6005 const int p2 = pix[-3];
6006 const int q0 = pix[0];
6007 const int q1 = pix[1];
6008 const int q2 = pix[2];
6010 if( FFABS( p0 - q0 ) < alpha &&
6011 FFABS( p1 - p0 ) < beta &&
6012 FFABS( q1 - q0 ) < beta ) {
6016 if( FFABS( p2 - p0 ) < beta ) {
6017 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6020 if( FFABS( q2 - q0 ) < beta ) {
6021 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6025 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6026 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6027 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6028 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6031 const int p0 = pix[-1];
6032 const int p1 = pix[-2];
6033 const int p2 = pix[-3];
6035 const int q0 = pix[0];
6036 const int q1 = pix[1];
6037 const int q2 = pix[2];
6039 if( FFABS( p0 - q0 ) < alpha &&
6040 FFABS( p1 - p0 ) < beta &&
6041 FFABS( q1 - q0 ) < beta ) {
6043 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6044 if( FFABS( p2 - p0 ) < beta)
6046 const int p3 = pix[-4];
6048 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6049 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6050 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6053 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6055 if( FFABS( q2 - q0 ) < beta)
6057 const int q3 = pix[3];
6059 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6060 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6061 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6064 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6068 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6069 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6071 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6076 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6078 for( i = 0; i < 8; i++, pix += stride) {
6086 if( bS[bS_index] == 0 ) {
6090 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6091 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6092 alpha = (alpha_table+52)[index_a];
6093 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6095 if( bS[bS_index] < 4 ) {
6096 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6097 const int p0 = pix[-1];
6098 const int p1 = pix[-2];
6099 const int q0 = pix[0];
6100 const int q1 = pix[1];
6102 if( FFABS( p0 - q0 ) < alpha &&
6103 FFABS( p1 - p0 ) < beta &&
6104 FFABS( q1 - q0 ) < beta ) {
6105 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6107 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6108 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6109 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6112 const int p0 = pix[-1];
6113 const int p1 = pix[-2];
6114 const int q0 = pix[0];
6115 const int q1 = pix[1];
6117 if( FFABS( p0 - q0 ) < alpha &&
6118 FFABS( p1 - p0 ) < beta &&
6119 FFABS( q1 - q0 ) < beta ) {
6121 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6122 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6123 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6129 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6130 const int index_a = qp + h->slice_alpha_c0_offset;
6131 const int alpha = (alpha_table+52)[index_a];
6132 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6136 tc[0] = (tc0_table+52)[index_a][bS[0]];
6137 tc[1] = (tc0_table+52)[index_a][bS[1]];
6138 tc[2] = (tc0_table+52)[index_a][bS[2]];
6139 tc[3] = (tc0_table+52)[index_a][bS[3]];
6140 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6142 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6146 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6147 const int index_a = qp + h->slice_alpha_c0_offset;
6148 const int alpha = (alpha_table+52)[index_a];
6149 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6153 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6154 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6155 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6156 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6157 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6159 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6163 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6164 MpegEncContext * const s = &h->s;
6165 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6167 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6171 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6172 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6173 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6174 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6175 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6178 assert(!FRAME_MBAFF);
6180 mb_type = s->current_picture.mb_type[mb_xy];
6181 qp = s->current_picture.qscale_table[mb_xy];
6182 qp0 = s->current_picture.qscale_table[mb_xy-1];
6183 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6184 qpc = get_chroma_qp( h, 0, qp );
6185 qpc0 = get_chroma_qp( h, 0, qp0 );
6186 qpc1 = get_chroma_qp( h, 0, qp1 );
6187 qp0 = (qp + qp0 + 1) >> 1;
6188 qp1 = (qp + qp1 + 1) >> 1;
6189 qpc0 = (qpc + qpc0 + 1) >> 1;
6190 qpc1 = (qpc + qpc1 + 1) >> 1;
6191 qp_thresh = 15 - h->slice_alpha_c0_offset;
6192 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6193 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6196 if( IS_INTRA(mb_type) ) {
6197 int16_t bS4[4] = {4,4,4,4};
6198 int16_t bS3[4] = {3,3,3,3};
6199 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6200 if( IS_8x8DCT(mb_type) ) {
6201 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6202 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6203 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6204 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6206 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6207 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6208 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6209 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6210 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6211 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6212 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6213 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6215 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6216 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6217 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6218 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6219 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6220 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6221 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6222 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6225 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6226 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6228 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6230 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6232 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6233 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6234 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6235 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6237 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6238 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6239 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6240 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6242 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6243 bSv[0][0] = 0x0004000400040004ULL;
6244 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6245 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6247 #define FILTER(hv,dir,edge)\
6248 if(bSv[dir][edge]) {\
6249 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6251 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6252 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6258 } else if( IS_8x8DCT(mb_type) ) {
6278 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6279 MpegEncContext * const s = &h->s;
6281 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6282 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6283 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6284 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6285 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6287 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6288 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6289 // how often to recheck mv-based bS when iterating between edges
6290 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6291 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6292 // how often to recheck mv-based bS when iterating along each edge
6293 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6295 if (first_vertical_edge_done) {
6299 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6302 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6303 && !IS_INTERLACED(mb_type)
6304 && IS_INTERLACED(mbm_type)
6306 // This is a special case in the norm where the filtering must
6307 // be done twice (one each of the field) even if we are in a
6308 // frame macroblock.
6310 static const int nnz_idx[4] = {4,5,6,3};
6311 unsigned int tmp_linesize = 2 * linesize;
6312 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6313 int mbn_xy = mb_xy - 2 * s->mb_stride;
6318 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6319 if( IS_INTRA(mb_type) ||
6320 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6321 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6323 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6324 for( i = 0; i < 4; i++ ) {
6325 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6326 mbn_nnz[nnz_idx[i]] != 0 )
6332 // Do not use s->qscale as luma quantizer because it has not the same
6333 // value in IPCM macroblocks.
6334 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6335 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6336 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6337 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6338 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6339 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6340 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6341 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6348 for( edge = start; edge < edges; edge++ ) {
6349 /* mbn_xy: neighbor macroblock */
6350 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6351 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6352 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6356 if( (edge&1) && IS_8x8DCT(mb_type) )
6359 if( IS_INTRA(mb_type) ||
6360 IS_INTRA(mbn_type) ) {
6363 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6364 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6373 bS[0] = bS[1] = bS[2] = bS[3] = value;
6378 if( edge & mask_edge ) {
6379 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6382 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6383 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6386 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6387 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6388 int bn_idx= b_idx - (dir ? 8:1);
6391 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6392 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6393 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6394 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6397 if(h->slice_type_nos == FF_B_TYPE && v){
6399 for( l = 0; !v && l < 2; l++ ) {
6401 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6402 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6403 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6407 bS[0] = bS[1] = bS[2] = bS[3] = v;
6413 for( i = 0; i < 4; i++ ) {
6414 int x = dir == 0 ? edge : i;
6415 int y = dir == 0 ? i : edge;
6416 int b_idx= 8 + 4 + x + 8*y;
6417 int bn_idx= b_idx - (dir ? 8:1);
6419 if( h->non_zero_count_cache[b_idx] |
6420 h->non_zero_count_cache[bn_idx] ) {
6426 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6427 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6428 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6429 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6435 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6437 for( l = 0; l < 2; l++ ) {
6439 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6440 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6441 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6450 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6455 // Do not use s->qscale as luma quantizer because it has not the same
6456 // value in IPCM macroblocks.
6457 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6458 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6459 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6460 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6462 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6463 if( (edge&1) == 0 ) {
6464 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6465 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6466 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6467 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6470 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6471 if( (edge&1) == 0 ) {
6472 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6473 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6474 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6475 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6481 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6482 MpegEncContext * const s = &h->s;
6483 const int mb_xy= mb_x + mb_y*s->mb_stride;
6484 const int mb_type = s->current_picture.mb_type[mb_xy];
6485 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6486 int first_vertical_edge_done = 0;
6489 //for sufficiently low qp, filtering wouldn't do anything
6490 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6492 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6493 int qp = s->current_picture.qscale_table[mb_xy];
6495 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6496 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6501 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6502 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6503 int top_type, left_type[2];
6504 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6505 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6506 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6508 if(IS_8x8DCT(top_type)){
6509 h->non_zero_count_cache[4+8*0]=
6510 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6511 h->non_zero_count_cache[6+8*0]=
6512 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6514 if(IS_8x8DCT(left_type[0])){
6515 h->non_zero_count_cache[3+8*1]=
6516 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6518 if(IS_8x8DCT(left_type[1])){
6519 h->non_zero_count_cache[3+8*3]=
6520 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6523 if(IS_8x8DCT(mb_type)){
6524 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6525 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6527 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6528 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6530 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6531 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6533 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6534 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6539 // left mb is in picture
6540 && h->slice_table[mb_xy-1] != 0xFFFF
6541 // and current and left pair do not have the same interlaced type
6542 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6543 // and left mb is in the same slice if deblocking_filter == 2
6544 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6545 /* First vertical edge is different in MBAFF frames
6546 * There are 8 different bS to compute and 2 different Qp
6548 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6549 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6554 int mb_qp, mbn0_qp, mbn1_qp;
6556 first_vertical_edge_done = 1;
6558 if( IS_INTRA(mb_type) )
6559 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6561 for( i = 0; i < 8; i++ ) {
6562 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6564 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6566 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6567 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6568 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6570 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6577 mb_qp = s->current_picture.qscale_table[mb_xy];
6578 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6579 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6580 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6581 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6582 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6583 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6584 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6585 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6586 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6587 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6588 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6589 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6592 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6593 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6594 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6595 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6596 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6600 for( dir = 0; dir < 2; dir++ )
6601 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6603 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6604 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6608 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6609 H264Context *h = *(void**)arg;
6610 MpegEncContext * const s = &h->s;
6611 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6615 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6616 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6618 if( h->pps.cabac ) {
6622 align_get_bits( &s->gb );
6625 ff_init_cabac_states( &h->cabac);
6626 ff_init_cabac_decoder( &h->cabac,
6627 s->gb.buffer + get_bits_count(&s->gb)/8,
6628 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6629 /* calculate pre-state */
6630 for( i= 0; i < 460; i++ ) {
6632 if( h->slice_type_nos == FF_I_TYPE )
6633 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6635 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6638 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6640 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6645 int ret = decode_mb_cabac(h);
6647 //STOP_TIMER("decode_mb_cabac")
6649 if(ret>=0) hl_decode_mb(h);
6651 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6654 ret = decode_mb_cabac(h);
6656 if(ret>=0) hl_decode_mb(h);
6659 eos = get_cabac_terminate( &h->cabac );
6661 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6662 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6663 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6667 if( ++s->mb_x >= s->mb_width ) {
6669 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6671 if(FIELD_OR_MBAFF_PICTURE) {
6676 if( eos || s->mb_y >= s->mb_height ) {
6677 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6678 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6685 int ret = decode_mb_cavlc(h);
6687 if(ret>=0) hl_decode_mb(h);
6689 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6691 ret = decode_mb_cavlc(h);
6693 if(ret>=0) hl_decode_mb(h);
6698 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6699 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6704 if(++s->mb_x >= s->mb_width){
6706 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6708 if(FIELD_OR_MBAFF_PICTURE) {
6711 if(s->mb_y >= s->mb_height){
6712 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6714 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6715 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6719 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6726 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6727 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6728 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6729 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6733 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6742 for(;s->mb_y < s->mb_height; s->mb_y++){
6743 for(;s->mb_x < s->mb_width; s->mb_x++){
6744 int ret= decode_mb(h);
6749 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6755 if(++s->mb_x >= s->mb_width){
6757 if(++s->mb_y >= s->mb_height){
6758 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6759 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6763 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6770 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6771 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6772 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6776 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6783 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6786 return -1; //not reached
6789 static int decode_picture_timing(H264Context *h){
6790 MpegEncContext * const s = &h->s;
6791 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6792 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6793 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6795 if(h->sps.pic_struct_present_flag){
6796 unsigned int i, num_clock_ts;
6797 h->sei_pic_struct = get_bits(&s->gb, 4);
6799 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6802 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6804 for (i = 0 ; i < num_clock_ts ; i++){
6805 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6806 unsigned int full_timestamp_flag;
6807 skip_bits(&s->gb, 2); /* ct_type */
6808 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6809 skip_bits(&s->gb, 5); /* counting_type */
6810 full_timestamp_flag = get_bits(&s->gb, 1);
6811 skip_bits(&s->gb, 1); /* discontinuity_flag */
6812 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6813 skip_bits(&s->gb, 8); /* n_frames */
6814 if(full_timestamp_flag){
6815 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6816 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6817 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6819 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6820 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6821 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6822 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6823 if(get_bits(&s->gb, 1)) /* hours_flag */
6824 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6828 if(h->sps.time_offset_length > 0)
6829 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6836 static int decode_unregistered_user_data(H264Context *h, int size){
6837 MpegEncContext * const s = &h->s;
6838 uint8_t user_data[16+256];
6844 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6845 user_data[i]= get_bits(&s->gb, 8);
6849 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6850 if(e==1 && build>=0)
6851 h->x264_build= build;
6853 if(s->avctx->debug & FF_DEBUG_BUGS)
6854 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6857 skip_bits(&s->gb, 8);
6862 static int decode_recovery_point(H264Context *h){
6863 MpegEncContext * const s = &h->s;
6865 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6866 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6871 static int decode_buffering_period(H264Context *h){
6872 MpegEncContext * const s = &h->s;
6873 unsigned int sps_id;
6877 sps_id = get_ue_golomb_31(&s->gb);
6878 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6879 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6882 sps = h->sps_buffers[sps_id];
6884 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6885 if (sps->nal_hrd_parameters_present_flag) {
6886 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6887 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6888 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6891 if (sps->vcl_hrd_parameters_present_flag) {
6892 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6893 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6894 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6898 h->sei_buffering_period_present = 1;
6902 int ff_h264_decode_sei(H264Context *h){
6903 MpegEncContext * const s = &h->s;
6905 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6910 type+= show_bits(&s->gb, 8);
6911 }while(get_bits(&s->gb, 8) == 255);
6915 size+= show_bits(&s->gb, 8);
6916 }while(get_bits(&s->gb, 8) == 255);
6919 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6920 if(decode_picture_timing(h) < 0)
6923 case SEI_TYPE_USER_DATA_UNREGISTERED:
6924 if(decode_unregistered_user_data(h, size) < 0)
6927 case SEI_TYPE_RECOVERY_POINT:
6928 if(decode_recovery_point(h) < 0)
6931 case SEI_BUFFERING_PERIOD:
6932 if(decode_buffering_period(h) < 0)
6936 skip_bits(&s->gb, 8*size);
6939 //FIXME check bits here
6940 align_get_bits(&s->gb);
6946 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6947 MpegEncContext * const s = &h->s;
6949 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6951 if(cpb_count > 32U){
6952 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6956 get_bits(&s->gb, 4); /* bit_rate_scale */
6957 get_bits(&s->gb, 4); /* cpb_size_scale */
6958 for(i=0; i<cpb_count; i++){
6959 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6960 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6961 get_bits1(&s->gb); /* cbr_flag */
6963 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6964 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6965 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6966 sps->time_offset_length = get_bits(&s->gb, 5);
6967 sps->cpb_cnt = cpb_count;
6971 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6972 MpegEncContext * const s = &h->s;
6973 int aspect_ratio_info_present_flag;
6974 unsigned int aspect_ratio_idc;
6976 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6978 if( aspect_ratio_info_present_flag ) {
6979 aspect_ratio_idc= get_bits(&s->gb, 8);
6980 if( aspect_ratio_idc == EXTENDED_SAR ) {
6981 sps->sar.num= get_bits(&s->gb, 16);
6982 sps->sar.den= get_bits(&s->gb, 16);
6983 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6984 sps->sar= pixel_aspect[aspect_ratio_idc];
6986 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6993 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6995 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6996 get_bits1(&s->gb); /* overscan_appropriate_flag */
6999 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7000 get_bits(&s->gb, 3); /* video_format */
7001 get_bits1(&s->gb); /* video_full_range_flag */
7002 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7003 get_bits(&s->gb, 8); /* colour_primaries */
7004 get_bits(&s->gb, 8); /* transfer_characteristics */
7005 get_bits(&s->gb, 8); /* matrix_coefficients */
7009 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7010 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7011 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7014 sps->timing_info_present_flag = get_bits1(&s->gb);
7015 if(sps->timing_info_present_flag){
7016 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7017 sps->time_scale = get_bits_long(&s->gb, 32);
7018 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7021 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7022 if(sps->nal_hrd_parameters_present_flag)
7023 if(decode_hrd_parameters(h, sps) < 0)
7025 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7026 if(sps->vcl_hrd_parameters_present_flag)
7027 if(decode_hrd_parameters(h, sps) < 0)
7029 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7030 get_bits1(&s->gb); /* low_delay_hrd_flag */
7031 sps->pic_struct_present_flag = get_bits1(&s->gb);
7033 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7034 if(sps->bitstream_restriction_flag){
7035 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7036 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7037 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7038 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7039 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7040 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7041 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7043 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7044 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7052 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7053 const uint8_t *jvt_list, const uint8_t *fallback_list){
7054 MpegEncContext * const s = &h->s;
7055 int i, last = 8, next = 8;
7056 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7057 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7058 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7060 for(i=0;i<size;i++){
7062 next = (last + get_se_golomb(&s->gb)) & 0xff;
7063 if(!i && !next){ /* matrix not written, we use the preset one */
7064 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7067 last = factors[scan[i]] = next ? next : last;
7071 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7072 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7073 MpegEncContext * const s = &h->s;
7074 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7075 const uint8_t *fallback[4] = {
7076 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7077 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7078 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7079 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7081 if(get_bits1(&s->gb)){
7082 sps->scaling_matrix_present |= is_sps;
7083 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7084 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7085 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7086 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7087 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7088 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7089 if(is_sps || pps->transform_8x8_mode){
7090 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7091 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7096 int ff_h264_decode_seq_parameter_set(H264Context *h){
7097 MpegEncContext * const s = &h->s;
7098 int profile_idc, level_idc;
7099 unsigned int sps_id;
7103 profile_idc= get_bits(&s->gb, 8);
7104 get_bits1(&s->gb); //constraint_set0_flag
7105 get_bits1(&s->gb); //constraint_set1_flag
7106 get_bits1(&s->gb); //constraint_set2_flag
7107 get_bits1(&s->gb); //constraint_set3_flag
7108 get_bits(&s->gb, 4); // reserved
7109 level_idc= get_bits(&s->gb, 8);
7110 sps_id= get_ue_golomb_31(&s->gb);
7112 if(sps_id >= MAX_SPS_COUNT) {
7113 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7116 sps= av_mallocz(sizeof(SPS));
7120 sps->profile_idc= profile_idc;
7121 sps->level_idc= level_idc;
7123 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7124 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7125 sps->scaling_matrix_present = 0;
7127 if(sps->profile_idc >= 100){ //high profile
7128 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7129 if(sps->chroma_format_idc == 3)
7130 sps->residual_color_transform_flag = get_bits1(&s->gb);
7131 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7132 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7133 sps->transform_bypass = get_bits1(&s->gb);
7134 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7136 sps->chroma_format_idc= 1;
7139 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7140 sps->poc_type= get_ue_golomb_31(&s->gb);
7142 if(sps->poc_type == 0){ //FIXME #define
7143 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7144 } else if(sps->poc_type == 1){//FIXME #define
7145 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7146 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7147 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7148 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7150 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7151 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7155 for(i=0; i<sps->poc_cycle_length; i++)
7156 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7157 }else if(sps->poc_type != 2){
7158 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7162 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7163 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7164 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7167 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7168 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7169 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7170 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7171 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7172 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7176 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7177 if(!sps->frame_mbs_only_flag)
7178 sps->mb_aff= get_bits1(&s->gb);
7182 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7184 #ifndef ALLOW_INTERLACE
7186 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7188 sps->crop= get_bits1(&s->gb);
7190 sps->crop_left = get_ue_golomb(&s->gb);
7191 sps->crop_right = get_ue_golomb(&s->gb);
7192 sps->crop_top = get_ue_golomb(&s->gb);
7193 sps->crop_bottom= get_ue_golomb(&s->gb);
7194 if(sps->crop_left || sps->crop_top){
7195 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7197 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7198 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7204 sps->crop_bottom= 0;
7207 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7208 if( sps->vui_parameters_present_flag )
7209 decode_vui_parameters(h, sps);
7211 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7212 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7213 sps_id, sps->profile_idc, sps->level_idc,
7215 sps->ref_frame_count,
7216 sps->mb_width, sps->mb_height,
7217 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7218 sps->direct_8x8_inference_flag ? "8B8" : "",
7219 sps->crop_left, sps->crop_right,
7220 sps->crop_top, sps->crop_bottom,
7221 sps->vui_parameters_present_flag ? "VUI" : "",
7222 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7226 av_free(h->sps_buffers[sps_id]);
7227 h->sps_buffers[sps_id]= sps;
7236 build_qp_table(PPS *pps, int t, int index)
7239 for(i = 0; i < 52; i++)
7240 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7243 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7244 MpegEncContext * const s = &h->s;
7245 unsigned int pps_id= get_ue_golomb(&s->gb);
7248 if(pps_id >= MAX_PPS_COUNT) {
7249 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7253 pps= av_mallocz(sizeof(PPS));
7256 pps->sps_id= get_ue_golomb_31(&s->gb);
7257 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7258 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7262 pps->cabac= get_bits1(&s->gb);
7263 pps->pic_order_present= get_bits1(&s->gb);
7264 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7265 if(pps->slice_group_count > 1 ){
7266 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7267 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7268 switch(pps->mb_slice_group_map_type){
7271 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7272 | run_length[ i ] |1 |ue(v) |
7277 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7279 | top_left_mb[ i ] |1 |ue(v) |
7280 | bottom_right_mb[ i ] |1 |ue(v) |
7288 | slice_group_change_direction_flag |1 |u(1) |
7289 | slice_group_change_rate_minus1 |1 |ue(v) |
7294 | slice_group_id_cnt_minus1 |1 |ue(v) |
7295 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7297 | slice_group_id[ i ] |1 |u(v) |
7302 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7303 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7304 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7305 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7309 pps->weighted_pred= get_bits1(&s->gb);
7310 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7311 pps->init_qp= get_se_golomb(&s->gb) + 26;
7312 pps->init_qs= get_se_golomb(&s->gb) + 26;
7313 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7314 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7315 pps->constrained_intra_pred= get_bits1(&s->gb);
7316 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7318 pps->transform_8x8_mode= 0;
7319 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7320 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7321 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7323 if(get_bits_count(&s->gb) < bit_length){
7324 pps->transform_8x8_mode= get_bits1(&s->gb);
7325 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7326 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7328 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7331 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7332 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7333 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7334 h->pps.chroma_qp_diff= 1;
7336 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7337 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7338 pps_id, pps->sps_id,
7339 pps->cabac ? "CABAC" : "CAVLC",
7340 pps->slice_group_count,
7341 pps->ref_count[0], pps->ref_count[1],
7342 pps->weighted_pred ? "weighted" : "",
7343 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7344 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7345 pps->constrained_intra_pred ? "CONSTR" : "",
7346 pps->redundant_pic_cnt_present ? "REDU" : "",
7347 pps->transform_8x8_mode ? "8x8DCT" : ""
7351 av_free(h->pps_buffers[pps_id]);
7352 h->pps_buffers[pps_id]= pps;
7360 * Call decode_slice() for each context.
7362 * @param h h264 master context
7363 * @param context_count number of contexts to execute
7365 static void execute_decode_slices(H264Context *h, int context_count){
7366 MpegEncContext * const s = &h->s;
7367 AVCodecContext * const avctx= s->avctx;
7371 if (s->avctx->hwaccel)
7373 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7375 if(context_count == 1) {
7376 decode_slice(avctx, &h);
7378 for(i = 1; i < context_count; i++) {
7379 hx = h->thread_context[i];
7380 hx->s.error_recognition = avctx->error_recognition;
7381 hx->s.error_count = 0;
7384 avctx->execute(avctx, (void *)decode_slice,
7385 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7387 /* pull back stuff from slices to master context */
7388 hx = h->thread_context[context_count - 1];
7389 s->mb_x = hx->s.mb_x;
7390 s->mb_y = hx->s.mb_y;
7391 s->dropable = hx->s.dropable;
7392 s->picture_structure = hx->s.picture_structure;
7393 for(i = 1; i < context_count; i++)
7394 h->s.error_count += h->thread_context[i]->s.error_count;
7399 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7400 MpegEncContext * const s = &h->s;
7401 AVCodecContext * const avctx= s->avctx;
7403 H264Context *hx; ///< thread context
7404 int context_count = 0;
7406 h->max_contexts = avctx->thread_count;
7409 for(i=0; i<50; i++){
7410 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7413 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7414 h->current_slice = 0;
7415 if (!s->first_field)
7416 s->current_picture_ptr= NULL;
7429 if(buf_index >= buf_size) break;
7431 for(i = 0; i < h->nal_length_size; i++)
7432 nalsize = (nalsize << 8) | buf[buf_index++];
7433 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7438 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7443 // start code prefix search
7444 for(; buf_index + 3 < buf_size; buf_index++){
7445 // This should always succeed in the first iteration.
7446 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7450 if(buf_index+3 >= buf_size) break;
7455 hx = h->thread_context[context_count];
7457 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7458 if (ptr==NULL || dst_length < 0){
7461 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7463 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7465 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7466 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7469 if (h->is_avc && (nalsize != consumed)){
7470 int i, debug_level = AV_LOG_DEBUG;
7471 for (i = consumed; i < nalsize; i++)
7472 if (buf[buf_index+i])
7473 debug_level = AV_LOG_ERROR;
7474 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7478 buf_index += consumed;
7480 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7481 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7486 switch(hx->nal_unit_type){
7488 if (h->nal_unit_type != NAL_IDR_SLICE) {
7489 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7492 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7494 init_get_bits(&hx->s.gb, ptr, bit_length);
7496 hx->inter_gb_ptr= &hx->s.gb;
7497 hx->s.data_partitioning = 0;
7499 if((err = decode_slice_header(hx, h)))
7502 if (s->avctx->hwaccel && h->current_slice == 1) {
7503 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7507 s->current_picture_ptr->key_frame |=
7508 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7509 (h->sei_recovery_frame_cnt >= 0);
7510 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7511 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7512 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7513 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7514 && avctx->skip_frame < AVDISCARD_ALL){
7515 if(avctx->hwaccel) {
7516 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7519 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7520 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7521 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7522 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7528 init_get_bits(&hx->s.gb, ptr, bit_length);
7530 hx->inter_gb_ptr= NULL;
7531 hx->s.data_partitioning = 1;
7533 err = decode_slice_header(hx, h);
7536 init_get_bits(&hx->intra_gb, ptr, bit_length);
7537 hx->intra_gb_ptr= &hx->intra_gb;
7540 init_get_bits(&hx->inter_gb, ptr, bit_length);
7541 hx->inter_gb_ptr= &hx->inter_gb;
7543 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7544 && s->context_initialized
7546 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7547 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7548 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7549 && avctx->skip_frame < AVDISCARD_ALL)
7553 init_get_bits(&s->gb, ptr, bit_length);
7554 ff_h264_decode_sei(h);
7557 init_get_bits(&s->gb, ptr, bit_length);
7558 ff_h264_decode_seq_parameter_set(h);
7560 if(s->flags& CODEC_FLAG_LOW_DELAY)
7563 if(avctx->has_b_frames < 2)
7564 avctx->has_b_frames= !s->low_delay;
7567 init_get_bits(&s->gb, ptr, bit_length);
7569 ff_h264_decode_picture_parameter_set(h, bit_length);
7573 case NAL_END_SEQUENCE:
7574 case NAL_END_STREAM:
7575 case NAL_FILLER_DATA:
7577 case NAL_AUXILIARY_SLICE:
7580 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7583 if(context_count == h->max_contexts) {
7584 execute_decode_slices(h, context_count);
7589 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7591 /* Slice could not be decoded in parallel mode, copy down
7592 * NAL unit stuff to context 0 and restart. Note that
7593 * rbsp_buffer is not transferred, but since we no longer
7594 * run in parallel mode this should not be an issue. */
7595 h->nal_unit_type = hx->nal_unit_type;
7596 h->nal_ref_idc = hx->nal_ref_idc;
7602 execute_decode_slices(h, context_count);
7607 * returns the number of bytes consumed for building the current frame
7609 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7610 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7611 if(pos+10>buf_size) pos=buf_size; // oops ;)
7616 static int decode_frame(AVCodecContext *avctx,
7617 void *data, int *data_size,
7618 const uint8_t *buf, int buf_size)
7620 H264Context *h = avctx->priv_data;
7621 MpegEncContext *s = &h->s;
7622 AVFrame *pict = data;
7625 s->flags= avctx->flags;
7626 s->flags2= avctx->flags2;
7628 /* end of stream, output what is still in the buffers */
7629 if (buf_size == 0) {
7633 //FIXME factorize this with the output code below
7634 out = h->delayed_pic[0];
7636 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7637 if(h->delayed_pic[i]->poc < out->poc){
7638 out = h->delayed_pic[i];
7642 for(i=out_idx; h->delayed_pic[i]; i++)
7643 h->delayed_pic[i] = h->delayed_pic[i+1];
7646 *data_size = sizeof(AVFrame);
7647 *pict= *(AVFrame*)out;
7653 if(h->is_avc && !h->got_avcC) {
7654 int i, cnt, nalsize;
7655 unsigned char *p = avctx->extradata;
7656 if(avctx->extradata_size < 7) {
7657 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7661 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7664 /* sps and pps in the avcC always have length coded with 2 bytes,
7665 so put a fake nal_length_size = 2 while parsing them */
7666 h->nal_length_size = 2;
7667 // Decode sps from avcC
7668 cnt = *(p+5) & 0x1f; // Number of sps
7670 for (i = 0; i < cnt; i++) {
7671 nalsize = AV_RB16(p) + 2;
7672 if(decode_nal_units(h, p, nalsize) < 0) {
7673 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7678 // Decode pps from avcC
7679 cnt = *(p++); // Number of pps
7680 for (i = 0; i < cnt; i++) {
7681 nalsize = AV_RB16(p) + 2;
7682 if(decode_nal_units(h, p, nalsize) != nalsize) {
7683 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7688 // Now store right nal length size, that will be use to parse all other nals
7689 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7690 // Do not reparse avcC
7694 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7695 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7700 buf_index=decode_nal_units(h, buf, buf_size);
7704 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7705 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7706 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7710 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7711 Picture *out = s->current_picture_ptr;
7712 Picture *cur = s->current_picture_ptr;
7713 int i, pics, cross_idr, out_of_order, out_idx;
7717 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7718 s->current_picture_ptr->pict_type= s->pict_type;
7720 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7721 ff_vdpau_h264_set_reference_frames(s);
7724 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7725 h->prev_poc_msb= h->poc_msb;
7726 h->prev_poc_lsb= h->poc_lsb;
7728 h->prev_frame_num_offset= h->frame_num_offset;
7729 h->prev_frame_num= h->frame_num;
7731 if (avctx->hwaccel) {
7732 if (avctx->hwaccel->end_frame(avctx) < 0)
7733 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
7736 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7737 ff_vdpau_h264_picture_complete(s);
7740 * FIXME: Error handling code does not seem to support interlaced
7741 * when slices span multiple rows
7742 * The ff_er_add_slice calls don't work right for bottom
7743 * fields; they cause massive erroneous error concealing
7744 * Error marking covers both fields (top and bottom).
7745 * This causes a mismatched s->error_count
7746 * and a bad error table. Further, the error count goes to
7747 * INT_MAX when called for bottom field, because mb_y is
7748 * past end by one (callers fault) and resync_mb_y != 0
7749 * causes problems for the first MB line, too.
7756 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7757 /* Wait for second field. */
7761 cur->repeat_pict = 0;
7763 /* Signal interlacing information externally. */
7764 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7765 if(h->sps.pic_struct_present_flag){
7766 switch (h->sei_pic_struct)
7768 case SEI_PIC_STRUCT_FRAME:
7769 cur->interlaced_frame = 0;
7771 case SEI_PIC_STRUCT_TOP_FIELD:
7772 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7773 case SEI_PIC_STRUCT_TOP_BOTTOM:
7774 case SEI_PIC_STRUCT_BOTTOM_TOP:
7775 cur->interlaced_frame = 1;
7777 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7778 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7779 // Signal the possibility of telecined film externally (pic_struct 5,6)
7780 // From these hints, let the applications decide if they apply deinterlacing.
7781 cur->repeat_pict = 1;
7782 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7784 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7785 // Force progressive here, as doubling interlaced frame is a bad idea.
7786 cur->interlaced_frame = 0;
7787 cur->repeat_pict = 2;
7789 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7790 cur->interlaced_frame = 0;
7791 cur->repeat_pict = 4;
7795 /* Derive interlacing flag from used decoding process. */
7796 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7799 if (cur->field_poc[0] != cur->field_poc[1]){
7800 /* Derive top_field_first from field pocs. */
7801 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7803 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7804 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7805 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7806 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7807 cur->top_field_first = 1;
7809 cur->top_field_first = 0;
7811 /* Most likely progressive */
7812 cur->top_field_first = 0;
7816 //FIXME do something with unavailable reference frames
7818 /* Sort B-frames into display order */
7820 if(h->sps.bitstream_restriction_flag
7821 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7822 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7826 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7827 && !h->sps.bitstream_restriction_flag){
7828 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7833 while(h->delayed_pic[pics]) pics++;
7835 assert(pics <= MAX_DELAYED_PIC_COUNT);
7837 h->delayed_pic[pics++] = cur;
7838 if(cur->reference == 0)
7839 cur->reference = DELAYED_PIC_REF;
7841 out = h->delayed_pic[0];
7843 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7844 if(h->delayed_pic[i]->poc < out->poc){
7845 out = h->delayed_pic[i];
7848 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7850 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7852 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7854 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7856 ((!cross_idr && out->poc > h->outputed_poc + 2)
7857 || cur->pict_type == FF_B_TYPE)))
7860 s->avctx->has_b_frames++;
7863 if(out_of_order || pics > s->avctx->has_b_frames){
7864 out->reference &= ~DELAYED_PIC_REF;
7865 for(i=out_idx; h->delayed_pic[i]; i++)
7866 h->delayed_pic[i] = h->delayed_pic[i+1];
7868 if(!out_of_order && pics > s->avctx->has_b_frames){
7869 *data_size = sizeof(AVFrame);
7871 h->outputed_poc = out->poc;
7872 *pict= *(AVFrame*)out;
7874 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7879 assert(pict->data[0] || !*data_size);
7880 ff_print_debug_info(s, pict);
7881 //printf("out %d\n", (int)pict->data[0]);
7884 /* Return the Picture timestamp as the frame number */
7885 /* we subtract 1 because it is added on utils.c */
7886 avctx->frame_number = s->picture_number - 1;
7888 return get_consumed_bytes(s, buf_index, buf_size);
7891 static inline void fill_mb_avail(H264Context *h){
7892 MpegEncContext * const s = &h->s;
7893 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7896 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7897 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7898 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7904 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7905 h->mb_avail[4]= 1; //FIXME move out
7906 h->mb_avail[5]= 0; //FIXME move out
7914 #define SIZE (COUNT*40)
7920 // int int_temp[10000];
7922 AVCodecContext avctx;
7924 dsputil_init(&dsp, &avctx);
7926 init_put_bits(&pb, temp, SIZE);
7927 printf("testing unsigned exp golomb\n");
7928 for(i=0; i<COUNT; i++){
7930 set_ue_golomb(&pb, i);
7931 STOP_TIMER("set_ue_golomb");
7933 flush_put_bits(&pb);
7935 init_get_bits(&gb, temp, 8*SIZE);
7936 for(i=0; i<COUNT; i++){
7939 s= show_bits(&gb, 24);
7942 j= get_ue_golomb(&gb);
7944 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7947 STOP_TIMER("get_ue_golomb");
7951 init_put_bits(&pb, temp, SIZE);
7952 printf("testing signed exp golomb\n");
7953 for(i=0; i<COUNT; i++){
7955 set_se_golomb(&pb, i - COUNT/2);
7956 STOP_TIMER("set_se_golomb");
7958 flush_put_bits(&pb);
7960 init_get_bits(&gb, temp, 8*SIZE);
7961 for(i=0; i<COUNT; i++){
7964 s= show_bits(&gb, 24);
7967 j= get_se_golomb(&gb);
7968 if(j != i - COUNT/2){
7969 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7972 STOP_TIMER("get_se_golomb");
7976 printf("testing 4x4 (I)DCT\n");
7979 uint8_t src[16], ref[16];
7980 uint64_t error= 0, max_error=0;
7982 for(i=0; i<COUNT; i++){
7984 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7985 for(j=0; j<16; j++){
7986 ref[j]= random()%255;
7987 src[j]= random()%255;
7990 h264_diff_dct_c(block, src, ref, 4);
7993 for(j=0; j<16; j++){
7994 // printf("%d ", block[j]);
7995 block[j]= block[j]*4;
7996 if(j&1) block[j]= (block[j]*4 + 2)/5;
7997 if(j&4) block[j]= (block[j]*4 + 2)/5;
8001 s->dsp.h264_idct_add(ref, block, 4);
8002 /* for(j=0; j<16; j++){
8003 printf("%d ", ref[j]);
8007 for(j=0; j<16; j++){
8008 int diff= FFABS(src[j] - ref[j]);
8011 max_error= FFMAX(max_error, diff);
8014 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8015 printf("testing quantizer\n");
8016 for(qp=0; qp<52; qp++){
8018 src1_block[i]= src2_block[i]= random()%255;
8021 printf("Testing NAL layer\n");
8023 uint8_t bitstream[COUNT];
8024 uint8_t nal[COUNT*2];
8026 memset(&h, 0, sizeof(H264Context));
8028 for(i=0; i<COUNT; i++){
8036 for(j=0; j<COUNT; j++){
8037 bitstream[j]= (random() % 255) + 1;
8040 for(j=0; j<zeros; j++){
8041 int pos= random() % COUNT;
8042 while(bitstream[pos] == 0){
8051 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8053 printf("encoding failed\n");
8057 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8061 if(out_length != COUNT){
8062 printf("incorrect length %d %d\n", out_length, COUNT);
8066 if(consumed != nal_length){
8067 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8071 if(memcmp(bitstream, out, COUNT)){
8072 printf("mismatch\n");
8078 printf("Testing RBSP\n");
8086 static av_cold int decode_end(AVCodecContext *avctx)
8088 H264Context *h = avctx->priv_data;
8089 MpegEncContext *s = &h->s;
8092 av_freep(&h->rbsp_buffer[0]);
8093 av_freep(&h->rbsp_buffer[1]);
8094 free_tables(h); //FIXME cleanup init stuff perhaps
8096 for(i = 0; i < MAX_SPS_COUNT; i++)
8097 av_freep(h->sps_buffers + i);
8099 for(i = 0; i < MAX_PPS_COUNT; i++)
8100 av_freep(h->pps_buffers + i);
8104 // memset(h, 0, sizeof(H264Context));
8110 AVCodec h264_decoder = {
8114 sizeof(H264Context),
8119 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8121 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8122 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8125 #if CONFIG_H264_VDPAU_DECODER
8126 AVCodec h264_vdpau_decoder = {
8130 sizeof(H264Context),
8135 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8137 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8141 #if CONFIG_SVQ3_DECODER