2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
36 #include "rectangle.h"
37 #include "vdpau_internal.h"
41 #include "x86/h264_i386.h"
48 * Value of Picture.reference when Picture is not a reference picture, but
49 * is held for delayed output.
51 #define DELAYED_PIC_REF 4
53 static VLC coeff_token_vlc[4];
54 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
55 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57 static VLC chroma_dc_coeff_token_vlc;
58 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
59 static const int chroma_dc_coeff_token_vlc_table_size = 256;
61 static VLC total_zeros_vlc[15];
62 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
63 static const int total_zeros_vlc_tables_size = 512;
65 static VLC chroma_dc_total_zeros_vlc[3];
66 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
67 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
69 static VLC run_vlc[6];
70 static VLC_TYPE run_vlc_tables[6][8][2];
71 static const int run_vlc_tables_size = 8;
74 static VLC_TYPE run7_vlc_table[96][2];
75 static const int run7_vlc_table_size = 96;
77 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
78 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
79 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static Picture * remove_long(H264Context *h, int i, int ref_mask);
83 static av_always_inline uint32_t pack16to32(int a, int b){
84 #ifdef WORDS_BIGENDIAN
85 return (b&0xFFFF) + (a<<16);
87 return (a&0xFFFF) + (b<<16);
91 static const uint8_t rem6[52]={
92 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
95 static const uint8_t div6[52]={
96 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
99 static const uint8_t left_block_options[4][8]={
106 #define LEVEL_TAB_BITS 8
107 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
110 MpegEncContext * const s = &h->s;
111 const int mb_xy= h->mb_xy;
112 int topleft_xy, top_xy, topright_xy, left_xy[2];
113 int topleft_type, top_type, topright_type, left_type[2];
114 const uint8_t * left_block;
115 int topleft_partition= -1;
118 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
120 //FIXME deblocking could skip the intra and nnz parts.
121 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
124 /* Wow, what a mess, why didn't they simplify the interlacing & intra
125 * stuff, I can't imagine that these complex rules are worth it. */
127 topleft_xy = top_xy - 1;
128 topright_xy= top_xy + 1;
129 left_xy[1] = left_xy[0] = mb_xy-1;
130 left_block = left_block_options[0];
132 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
133 const int top_pair_xy = pair_xy - s->mb_stride;
134 const int topleft_pair_xy = top_pair_xy - 1;
135 const int topright_pair_xy = top_pair_xy + 1;
136 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
137 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
138 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
139 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
140 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
141 const int bottom = (s->mb_y & 1);
142 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
144 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
145 top_xy -= s->mb_stride;
147 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
155 topright_xy -= s->mb_stride;
157 if (left_mb_field_flag != curr_mb_field_flag) {
158 left_xy[1] = left_xy[0] = pair_xy - 1;
159 if (curr_mb_field_flag) {
160 left_xy[1] += s->mb_stride;
161 left_block = left_block_options[3];
163 left_block= left_block_options[2 - bottom];
168 h->top_mb_xy = top_xy;
169 h->left_mb_xy[0] = left_xy[0];
170 h->left_mb_xy[1] = left_xy[1];
174 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
175 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
176 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
178 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 for(list=0; list<h->list_count; list++){
181 //These values where changed for ease of performing MC, we need to change them back
182 //FIXME maybe we can make MC and loop filter use the same values or prevent
183 //the MC code from changing ref_cache and rather use a temporary array.
184 if(USES_LIST(mb_type,list)){
185 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
196 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
197 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
198 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
199 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
201 if(IS_INTRA(mb_type)){
202 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
203 h->topleft_samples_available=
204 h->top_samples_available=
205 h->left_samples_available= 0xFFFF;
206 h->topright_samples_available= 0xEEEA;
208 if(!(top_type & type_mask)){
209 h->topleft_samples_available= 0xB3FF;
210 h->top_samples_available= 0x33FF;
211 h->topright_samples_available= 0x26EA;
213 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
214 if(IS_INTERLACED(mb_type)){
215 if(!(left_type[0] & type_mask)){
216 h->topleft_samples_available&= 0xDFFF;
217 h->left_samples_available&= 0x5FFF;
219 if(!(left_type[1] & type_mask)){
220 h->topleft_samples_available&= 0xFF5F;
221 h->left_samples_available&= 0xFF5F;
224 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
225 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
226 assert(left_xy[0] == left_xy[1]);
227 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
228 h->topleft_samples_available&= 0xDF5F;
229 h->left_samples_available&= 0x5F5F;
233 if(!(left_type[0] & type_mask)){
234 h->topleft_samples_available&= 0xDF5F;
235 h->left_samples_available&= 0x5F5F;
239 if(!(topleft_type & type_mask))
240 h->topleft_samples_available&= 0x7FFF;
242 if(!(topright_type & type_mask))
243 h->topright_samples_available&= 0xFBFF;
245 if(IS_INTRA4x4(mb_type)){
246 if(IS_INTRA4x4(top_type)){
247 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
248 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
249 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
250 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
253 if(!(top_type & type_mask))
258 h->intra4x4_pred_mode_cache[4+8*0]=
259 h->intra4x4_pred_mode_cache[5+8*0]=
260 h->intra4x4_pred_mode_cache[6+8*0]=
261 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 if(IS_INTRA4x4(left_type[i])){
265 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
266 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
269 if(!(left_type[i] & type_mask))
274 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
275 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
294 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
295 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
296 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
298 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
299 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
301 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
302 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 h->non_zero_count_cache[4+8*0]=
306 h->non_zero_count_cache[5+8*0]=
307 h->non_zero_count_cache[6+8*0]=
308 h->non_zero_count_cache[7+8*0]=
310 h->non_zero_count_cache[1+8*0]=
311 h->non_zero_count_cache[2+8*0]=
313 h->non_zero_count_cache[1+8*3]=
314 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 for (i=0; i<2; i++) {
320 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
321 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
322 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 h->non_zero_count_cache[3+8*1 + 2*8*i]=
326 h->non_zero_count_cache[3+8*2 + 2*8*i]=
327 h->non_zero_count_cache[0+8*1 + 8*i]=
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
335 h->top_cbp = h->cbp_table[top_xy];
336 } else if(IS_INTRA(mb_type)) {
343 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
344 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 for(list=0; list<h->list_count; list++){
361 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
362 /*if(!h->mv_cache_clean[list]){
363 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
364 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
365 h->mv_cache_clean[list]= 1;
369 h->mv_cache_clean[list]= 0;
371 if(USES_LIST(top_type, list)){
372 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
373 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
378 h->ref_cache[list][scan8[0] + 0 - 1*8]=
379 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
380 h->ref_cache[list][scan8[0] + 2 - 1*8]=
381 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
387 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 int cache_idx = scan8[0] - 1 + i*2*8;
392 if(USES_LIST(left_type[i], list)){
393 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
394 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
395 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
396 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
397 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
398 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 *(uint32_t*)h->mv_cache [list][cache_idx ]=
401 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
402 h->ref_cache[list][cache_idx ]=
403 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
407 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
410 if(USES_LIST(topleft_type, list)){
411 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
412 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
413 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
414 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
417 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
420 if(USES_LIST(topright_type, list)){
421 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
422 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
423 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
424 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
427 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
430 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
433 h->ref_cache[list][scan8[5 ]+1] =
434 h->ref_cache[list][scan8[7 ]+1] =
435 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
436 h->ref_cache[list][scan8[4 ]] =
437 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
438 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
441 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
442 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 /* XXX beurk, Load mvd */
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 if(USES_LIST(left_type[0], list)){
459 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 if(USES_LIST(left_type[1], list)){
467 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
477 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
478 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
480 if(h->slice_type_nos == FF_B_TYPE){
481 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483 if(IS_DIRECT(top_type)){
484 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
485 }else if(IS_8X8(top_type)){
486 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
487 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
488 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
493 if(IS_DIRECT(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
495 else if(IS_8X8(left_type[0]))
496 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500 if(IS_DIRECT(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
502 else if(IS_8X8(left_type[1]))
503 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
511 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
512 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
517 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
520 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 #define MAP_F2F(idx, mb_type)\
523 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
524 h->ref_cache[list][idx] <<= 1;\
525 h->mv_cache[list][idx][1] /= 2;\
526 h->mvd_cache[list][idx][1] /= 2;\
531 #define MAP_F2F(idx, mb_type)\
532 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
533 h->ref_cache[list][idx] >>= 1;\
534 h->mv_cache[list][idx][1] <<= 1;\
535 h->mvd_cache[list][idx][1] <<= 1;\
545 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
548 static inline void write_back_intra_pred_mode(H264Context *h){
549 const int mb_xy= h->mb_xy;
551 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
552 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
553 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
554 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
555 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
556 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
557 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
561 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 static inline int check_intra4x4_pred_mode(H264Context *h){
564 MpegEncContext * const s = &h->s;
565 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
566 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
569 if(!(h->top_samples_available&0x8000)){
571 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
576 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
581 if((h->left_samples_available&0x8888)!=0x8888){
582 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 if(!(h->left_samples_available&mask[i])){
585 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
590 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 } //FIXME cleanup like next
600 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 static inline int check_intra_pred_mode(H264Context *h, int mode){
603 MpegEncContext * const s = &h->s;
604 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
605 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
612 if(!(h->top_samples_available&0x8000)){
615 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
620 if((h->left_samples_available&0x8080) != 0x8080){
622 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
623 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
635 * gets the predicted intra4x4 prediction mode.
637 static inline int pred_intra_mode(H264Context *h, int n){
638 const int index8= scan8[n];
639 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
640 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
641 const int min= FFMIN(left, top);
643 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
645 if(min<0) return DC_PRED;
649 static inline void write_back_non_zero_count(H264Context *h){
650 const int mb_xy= h->mb_xy;
652 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
653 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
654 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
655 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
656 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
657 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
658 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
660 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
661 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
662 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
664 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
665 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
666 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
670 * gets the predicted number of non-zero coefficients.
671 * @param n block index
673 static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
679 if(i<64) i= (i+1)>>1;
681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
686 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
688 MpegEncContext *s = &h->s;
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
705 if(!USES_LIST(mb_type,list))\
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
736 tprintf(s->avctx, "topright MV not available\n");
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
750 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 int diagonal_ref, match_count;
759 assert(part_width==1 || part_width==2 || part_width==4);
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
779 }else if(top_ref==ref){
787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
805 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
831 pred_motion(h, n, 4, list, ref, mx, my);
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
840 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860 if(diagonal_ref == ref){
868 pred_motion(h, n, 2, list, ref, mx, my);
871 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
878 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
879 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
885 pred_motion(h, 0, 4, 0, 0, mx, my);
890 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
891 int poc0 = h->ref_list[0][i].poc;
892 int td = av_clip(poc1 - poc0, -128, 127);
893 if(td == 0 || h->ref_list[0][i].long_ref){
896 int tb = av_clip(poc - poc0, -128, 127);
897 int tx = (16384 + (FFABS(td) >> 1)) / td;
898 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
902 static inline void direct_dist_scale_factor(H264Context * const h){
903 MpegEncContext * const s = &h->s;
904 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
905 const int poc1 = h->ref_list[1][0].poc;
907 for(field=0; field<2; field++){
908 const int poc = h->s.current_picture_ptr->field_poc[field];
909 const int poc1 = h->ref_list[1][0].field_poc[field];
910 for(i=0; i < 2*h->ref_count[0]; i++)
911 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
919 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 int j, old_ref, rfield;
923 int start= mbafi ? 16 : 0;
924 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
925 int interl= mbafi || s->picture_structure != PICT_FRAME;
927 /* bogus; fills in for missing frames */
928 memset(map[list], 0, sizeof(map[list]));
930 for(rfield=0; rfield<2; rfield++){
931 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
932 int poc = ref1->ref_poc[colfield][list][old_ref];
936 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
937 poc= (poc&~3) + rfield + 1;
939 for(j=start; j<end; j++){
940 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
941 int cur_ref= mbafi ? (j-16)^field : j;
942 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 map[list][old_ref] = cur_ref;
952 static inline void direct_ref_list_init(H264Context * const h){
953 MpegEncContext * const s = &h->s;
954 Picture * const ref1 = &h->ref_list[1][0];
955 Picture * const cur = s->current_picture_ptr;
957 int sidx= (s->picture_structure&1)^1;
958 int ref1sidx= (ref1->reference&1)^1;
960 for(list=0; list<2; list++){
961 cur->ref_count[sidx][list] = h->ref_count[list];
962 for(j=0; j<h->ref_count[list]; j++)
963 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
966 if(s->picture_structure == PICT_FRAME){
967 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
968 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
971 cur->mbaff= FRAME_MBAFF;
973 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
976 for(list=0; list<2; list++){
977 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
978 for(field=0; field<2; field++)
979 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
983 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
984 MpegEncContext * const s = &h->s;
985 int b8_stride = h->b8_stride;
986 int b4_stride = h->b_stride;
987 int mb_xy = h->mb_xy;
989 const int16_t (*l1mv0)[2], (*l1mv1)[2];
990 const int8_t *l1ref0, *l1ref1;
991 const int is_b8x8 = IS_8X8(*mb_type);
992 unsigned int sub_mb_type;
995 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
997 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
998 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
999 int cur_poc = s->current_picture_ptr->poc;
1000 int *col_poc = h->ref_list[1]->field_poc;
1001 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1002 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1004 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1005 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1006 mb_xy += s->mb_stride*fieldoff;
1009 }else{ // AFL/AFR/FR/FL -> AFR/FR
1010 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1011 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1012 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1013 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1016 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1017 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1018 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 }else{ // AFR/FR -> AFR/FR
1029 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1030 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1031 /* FIXME save sub mb types from previous frames (or derive from MVs)
1032 * so we know exactly what block size to use */
1033 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1034 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1035 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1036 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1037 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1045 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1046 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1047 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1048 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1051 l1ref0 += h->b8_stride;
1052 l1ref1 += h->b8_stride;
1053 l1mv0 += 2*b4_stride;
1054 l1mv1 += 2*b4_stride;
1058 if(h->direct_spatial_mv_pred){
1063 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1065 /* ref = min(neighbors) */
1066 for(list=0; list<2; list++){
1067 int refa = h->ref_cache[list][scan8[0] - 1];
1068 int refb = h->ref_cache[list][scan8[0] - 8];
1069 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1070 if(refc == PART_NOT_AVAILABLE)
1071 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1072 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1077 if(ref[0] < 0 && ref[1] < 0){
1078 ref[0] = ref[1] = 0;
1079 mv[0][0] = mv[0][1] =
1080 mv[1][0] = mv[1][1] = 0;
1082 for(list=0; list<2; list++){
1084 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1086 mv[list][0] = mv[list][1] = 0;
1092 *mb_type &= ~MB_TYPE_L1;
1093 sub_mb_type &= ~MB_TYPE_L1;
1094 }else if(ref[0] < 0){
1096 *mb_type &= ~MB_TYPE_L0;
1097 sub_mb_type &= ~MB_TYPE_L0;
1100 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1101 for(i8=0; i8<4; i8++){
1104 int xy8 = x8+y8*b8_stride;
1105 int xy4 = 3*x8+y8*b4_stride;
1108 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1110 h->sub_mb_type[i8] = sub_mb_type;
1112 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1114 if(!IS_INTRA(mb_type_col[y8])
1115 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1116 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1118 a= pack16to32(mv[0][0],mv[0][1]);
1120 b= pack16to32(mv[1][0],mv[1][1]);
1122 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1126 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1128 }else if(IS_16X16(*mb_type)){
1131 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1132 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1133 if(!IS_INTRA(mb_type_col[0])
1134 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1135 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1136 && (h->x264_build>33 || !h->x264_build)))){
1138 a= pack16to32(mv[0][0],mv[0][1]);
1140 b= pack16to32(mv[1][0],mv[1][1]);
1142 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1146 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1148 for(i8=0; i8<4; i8++){
1149 const int x8 = i8&1;
1150 const int y8 = i8>>1;
1152 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1154 h->sub_mb_type[i8] = sub_mb_type;
1156 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1158 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1159 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1162 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1163 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1164 && (h->x264_build>33 || !h->x264_build)))){
1165 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1166 if(IS_SUB_8X8(sub_mb_type)){
1167 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1168 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1170 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1172 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 for(i4=0; i4<4; i4++){
1176 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1177 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1179 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1181 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1187 }else{ /* direct temporal mv pred */
1188 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1189 const int *dist_scale_factor = h->dist_scale_factor;
1192 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1193 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1194 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1195 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1197 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1200 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1201 /* FIXME assumes direct_8x8_inference == 1 */
1202 int y_shift = 2*!IS_INTERLACED(*mb_type);
1204 for(i8=0; i8<4; i8++){
1205 const int x8 = i8&1;
1206 const int y8 = i8>>1;
1208 const int16_t (*l1mv)[2]= l1mv0;
1210 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1212 h->sub_mb_type[i8] = sub_mb_type;
1214 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 if(IS_INTRA(mb_type_col[y8])){
1216 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1217 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1218 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 ref0 = l1ref0[x8 + y8*b8_stride];
1224 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1226 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1229 scale = dist_scale_factor[ref0];
1230 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1233 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1234 int my_col = (mv_col[1]<<y_shift)/2;
1235 int mx = (scale * mv_col[0] + 128) >> 8;
1236 int my = (scale * my_col + 128) >> 8;
1237 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1238 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 /* one-to-one mv scaling */
1246 if(IS_16X16(*mb_type)){
1249 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1250 if(IS_INTRA(mb_type_col[0])){
1253 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1254 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1255 const int scale = dist_scale_factor[ref0];
1256 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1258 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1259 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1261 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1262 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1264 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1265 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1266 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1268 for(i8=0; i8<4; i8++){
1269 const int x8 = i8&1;
1270 const int y8 = i8>>1;
1272 const int16_t (*l1mv)[2]= l1mv0;
1274 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1276 h->sub_mb_type[i8] = sub_mb_type;
1277 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col[0])){
1279 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1280 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1281 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1287 ref0 = map_col_to_list0[0][ref0];
1289 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1292 scale = dist_scale_factor[ref0];
1294 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1295 if(IS_SUB_8X8(sub_mb_type)){
1296 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1297 int mx = (scale * mv_col[0] + 128) >> 8;
1298 int my = (scale * mv_col[1] + 128) >> 8;
1299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1302 for(i4=0; i4<4; i4++){
1303 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1304 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1305 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1306 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1307 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1308 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1315 static inline void write_back_motion(H264Context *h, int mb_type){
1316 MpegEncContext * const s = &h->s;
1317 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1318 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1321 if(!USES_LIST(mb_type, 0))
1322 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1324 for(list=0; list<h->list_count; list++){
1326 if(!USES_LIST(mb_type, list))
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1333 if( h->pps.cabac ) {
1334 if(IS_SKIP(mb_type))
1335 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1338 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1344 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1345 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1346 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1347 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1348 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1352 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1353 if(IS_8X8(mb_type)){
1354 uint8_t *direct_table = &h->direct_table[b8_xy];
1355 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1356 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1357 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1363 * Decodes a network abstraction layer unit.
1364 * @param consumed is the number of bytes used as input
1365 * @param length is the length of the array
1366 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1367 * @returns decoded bytes, might be src+1 if no escapes
1369 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1374 // src[0]&0x80; //forbidden bit
1375 h->nal_ref_idc= src[0]>>5;
1376 h->nal_unit_type= src[0]&0x1F;
1380 for(i=0; i<length; i++)
1381 printf("%2X ", src[i]);
1384 #if HAVE_FAST_UNALIGNED
1385 # if HAVE_FAST_64BIT
1387 for(i=0; i+1<length; i+=9){
1388 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1391 for(i=0; i+1<length; i+=5){
1392 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1395 if(i>0 && !src[i]) i--;
1399 for(i=0; i+1<length; i+=2){
1400 if(src[i]) continue;
1401 if(i>0 && src[i-1]==0) i--;
1403 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1405 /* startcode, so we must be past the end */
1413 if(i>=length-1){ //no escaped 0
1414 *dst_length= length;
1415 *consumed= length+1; //+1 for the header
1419 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1420 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1421 dst= h->rbsp_buffer[bufidx];
1427 //printf("decoding esc\n");
1428 memcpy(dst, src, i);
1431 //remove escapes (very rare 1:2^22)
1433 dst[di++]= src[si++];
1434 dst[di++]= src[si++];
1435 }else if(src[si]==0 && src[si+1]==0){
1436 if(src[si+2]==3){ //escape
1441 }else //next start code
1445 dst[di++]= src[si++];
1448 dst[di++]= src[si++];
1451 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1454 *consumed= si + 1;//+1 for the header
1455 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1460 * identifies the exact end of the bitstream
1461 * @return the length of the trailing, or 0 if damaged
1463 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1467 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1477 * IDCT transforms the 16 dc values and dequantizes them.
1478 * @param qp quantization parameter
1480 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1483 int temp[16]; //FIXME check if this is a good idea
1484 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1485 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1487 //memset(block, 64, 2*256);
1490 const int offset= y_offset[i];
1491 const int z0= block[offset+stride*0] + block[offset+stride*4];
1492 const int z1= block[offset+stride*0] - block[offset+stride*4];
1493 const int z2= block[offset+stride*1] - block[offset+stride*5];
1494 const int z3= block[offset+stride*1] + block[offset+stride*5];
1503 const int offset= x_offset[i];
1504 const int z0= temp[4*0+i] + temp[4*2+i];
1505 const int z1= temp[4*0+i] - temp[4*2+i];
1506 const int z2= temp[4*1+i] - temp[4*3+i];
1507 const int z3= temp[4*1+i] + temp[4*3+i];
1509 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1510 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1511 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1512 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1518 * DCT transforms the 16 dc values.
1519 * @param qp quantization parameter ??? FIXME
1521 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1522 // const int qmul= dequant_coeff[qp][0];
1524 int temp[16]; //FIXME check if this is a good idea
1525 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1526 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1529 const int offset= y_offset[i];
1530 const int z0= block[offset+stride*0] + block[offset+stride*4];
1531 const int z1= block[offset+stride*0] - block[offset+stride*4];
1532 const int z2= block[offset+stride*1] - block[offset+stride*5];
1533 const int z3= block[offset+stride*1] + block[offset+stride*5];
1542 const int offset= x_offset[i];
1543 const int z0= temp[4*0+i] + temp[4*2+i];
1544 const int z1= temp[4*0+i] - temp[4*2+i];
1545 const int z2= temp[4*1+i] - temp[4*3+i];
1546 const int z3= temp[4*1+i] + temp[4*3+i];
1548 block[stride*0 +offset]= (z0 + z3)>>1;
1549 block[stride*2 +offset]= (z1 + z2)>>1;
1550 block[stride*8 +offset]= (z1 - z2)>>1;
1551 block[stride*10+offset]= (z0 - z3)>>1;
1559 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1560 const int stride= 16*2;
1561 const int xStride= 16;
1564 a= block[stride*0 + xStride*0];
1565 b= block[stride*0 + xStride*1];
1566 c= block[stride*1 + xStride*0];
1567 d= block[stride*1 + xStride*1];
1574 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1575 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1576 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1577 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1581 static void chroma_dc_dct_c(DCTELEM *block){
1582 const int stride= 16*2;
1583 const int xStride= 16;
1586 a= block[stride*0 + xStride*0];
1587 b= block[stride*0 + xStride*1];
1588 c= block[stride*1 + xStride*0];
1589 d= block[stride*1 + xStride*1];
1596 block[stride*0 + xStride*0]= (a+c);
1597 block[stride*0 + xStride*1]= (e+b);
1598 block[stride*1 + xStride*0]= (a-c);
1599 block[stride*1 + xStride*1]= (e-b);
1604 * gets the chroma qp.
1606 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1607 return h->pps.chroma_qp_table[t][qscale];
1610 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1611 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1612 int src_x_offset, int src_y_offset,
1613 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1614 MpegEncContext * const s = &h->s;
1615 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1616 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1617 const int luma_xy= (mx&3) + ((my&3)<<2);
1618 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1619 uint8_t * src_cb, * src_cr;
1620 int extra_width= h->emu_edge_width;
1621 int extra_height= h->emu_edge_height;
1623 const int full_mx= mx>>2;
1624 const int full_my= my>>2;
1625 const int pic_width = 16*s->mb_width;
1626 const int pic_height = 16*s->mb_height >> MB_FIELD;
1628 if(mx&7) extra_width -= 3;
1629 if(my&7) extra_height -= 3;
1631 if( full_mx < 0-extra_width
1632 || full_my < 0-extra_height
1633 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1634 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1635 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1636 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1640 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1642 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1645 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1648 // chroma offset when predicting from a field of opposite parity
1649 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1650 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1652 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1656 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1657 src_cb= s->edge_emu_buffer;
1659 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1662 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1663 src_cr= s->edge_emu_buffer;
1665 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1668 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1669 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1670 int x_offset, int y_offset,
1671 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1672 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1673 int list0, int list1){
1674 MpegEncContext * const s = &h->s;
1675 qpel_mc_func *qpix_op= qpix_put;
1676 h264_chroma_mc_func chroma_op= chroma_put;
1678 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1679 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1680 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1681 x_offset += 8*s->mb_x;
1682 y_offset += 8*(s->mb_y >> MB_FIELD);
1685 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1691 chroma_op= chroma_avg;
1695 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1696 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1697 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1698 qpix_op, chroma_op);
1702 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1703 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1704 int x_offset, int y_offset,
1705 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1706 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1707 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1708 int list0, int list1){
1709 MpegEncContext * const s = &h->s;
1711 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1712 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1713 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1714 x_offset += 8*s->mb_x;
1715 y_offset += 8*(s->mb_y >> MB_FIELD);
1718 /* don't optimize for luma-only case, since B-frames usually
1719 * use implicit weights => chroma too. */
1720 uint8_t *tmp_cb = s->obmc_scratchpad;
1721 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1722 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1723 int refn0 = h->ref_cache[0][ scan8[n] ];
1724 int refn1 = h->ref_cache[1][ scan8[n] ];
1726 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1727 dest_y, dest_cb, dest_cr,
1728 x_offset, y_offset, qpix_put, chroma_put);
1729 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1730 tmp_y, tmp_cb, tmp_cr,
1731 x_offset, y_offset, qpix_put, chroma_put);
1733 if(h->use_weight == 2){
1734 int weight0 = h->implicit_weight[refn0][refn1];
1735 int weight1 = 64 - weight0;
1736 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1737 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1740 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1741 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1742 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1743 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1745 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1746 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1748 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1751 int list = list1 ? 1 : 0;
1752 int refn = h->ref_cache[list][ scan8[n] ];
1753 Picture *ref= &h->ref_list[list][refn];
1754 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1755 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1756 qpix_put, chroma_put);
1758 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1759 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1760 if(h->use_weight_chroma){
1761 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1762 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1763 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1764 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1769 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1770 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1771 int x_offset, int y_offset,
1772 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1773 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1774 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1775 int list0, int list1){
1776 if((h->use_weight==2 && list0 && list1
1777 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1778 || h->use_weight==1)
1779 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1780 x_offset, y_offset, qpix_put, chroma_put,
1781 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1783 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1784 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1787 static inline void prefetch_motion(H264Context *h, int list){
1788 /* fetch pixels for estimated mv 4 macroblocks ahead
1789 * optimized for 64byte cache lines */
1790 MpegEncContext * const s = &h->s;
1791 const int refn = h->ref_cache[list][scan8[0]];
1793 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1794 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1795 uint8_t **src= h->ref_list[list][refn].data;
1796 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1797 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1798 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1799 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1803 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1804 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1805 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1806 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1807 MpegEncContext * const s = &h->s;
1808 const int mb_xy= h->mb_xy;
1809 const int mb_type= s->current_picture.mb_type[mb_xy];
1811 assert(IS_INTER(mb_type));
1813 prefetch_motion(h, 0);
1815 if(IS_16X16(mb_type)){
1816 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1817 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1818 &weight_op[0], &weight_avg[0],
1819 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1820 }else if(IS_16X8(mb_type)){
1821 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1822 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1823 &weight_op[1], &weight_avg[1],
1824 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1825 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1826 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1827 &weight_op[1], &weight_avg[1],
1828 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1829 }else if(IS_8X16(mb_type)){
1830 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1832 &weight_op[2], &weight_avg[2],
1833 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1834 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1835 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1836 &weight_op[2], &weight_avg[2],
1837 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1841 assert(IS_8X8(mb_type));
1844 const int sub_mb_type= h->sub_mb_type[i];
1846 int x_offset= (i&1)<<2;
1847 int y_offset= (i&2)<<1;
1849 if(IS_SUB_8X8(sub_mb_type)){
1850 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1851 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1852 &weight_op[3], &weight_avg[3],
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 }else if(IS_SUB_8X4(sub_mb_type)){
1855 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1856 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1857 &weight_op[4], &weight_avg[4],
1858 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1859 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1860 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1861 &weight_op[4], &weight_avg[4],
1862 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1863 }else if(IS_SUB_4X8(sub_mb_type)){
1864 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1865 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1866 &weight_op[5], &weight_avg[5],
1867 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1868 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870 &weight_op[5], &weight_avg[5],
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1874 assert(IS_SUB_4X4(sub_mb_type));
1876 int sub_x_offset= x_offset + 2*(j&1);
1877 int sub_y_offset= y_offset + (j&2);
1878 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1879 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1880 &weight_op[6], &weight_avg[6],
1881 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1887 prefetch_motion(h, 1);
1890 static av_cold void init_cavlc_level_tab(void){
1891 int suffix_length, mask;
1894 for(suffix_length=0; suffix_length<7; suffix_length++){
1895 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1896 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1897 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1899 mask= -(level_code&1);
1900 level_code= (((2+level_code)>>1) ^ mask) - mask;
1901 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1902 cavlc_level_tab[suffix_length][i][0]= level_code;
1903 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1904 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1905 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1906 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1908 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1909 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1915 static av_cold void decode_init_vlc(void){
1916 static int done = 0;
1923 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1924 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1925 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1926 &chroma_dc_coeff_token_len [0], 1, 1,
1927 &chroma_dc_coeff_token_bits[0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1932 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1933 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1934 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1935 &coeff_token_len [i][0], 1, 1,
1936 &coeff_token_bits[i][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1938 offset += coeff_token_vlc_tables_size[i];
1941 * This is a one time safety check to make sure that
1942 * the packed static coeff_token_vlc table sizes
1943 * were initialized correctly.
1945 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1948 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1949 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1950 init_vlc(&chroma_dc_total_zeros_vlc[i],
1951 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1952 &chroma_dc_total_zeros_len [i][0], 1, 1,
1953 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1954 INIT_VLC_USE_NEW_STATIC);
1956 for(i=0; i<15; i++){
1957 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1958 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1959 init_vlc(&total_zeros_vlc[i],
1960 TOTAL_ZEROS_VLC_BITS, 16,
1961 &total_zeros_len [i][0], 1, 1,
1962 &total_zeros_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
1967 run_vlc[i].table = run_vlc_tables[i];
1968 run_vlc[i].table_allocated = run_vlc_tables_size;
1969 init_vlc(&run_vlc[i],
1971 &run_len [i][0], 1, 1,
1972 &run_bits[i][0], 1, 1,
1973 INIT_VLC_USE_NEW_STATIC);
1975 run7_vlc.table = run7_vlc_table,
1976 run7_vlc.table_allocated = run7_vlc_table_size;
1977 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1978 &run_len [6][0], 1, 1,
1979 &run_bits[6][0], 1, 1,
1980 INIT_VLC_USE_NEW_STATIC);
1982 init_cavlc_level_tab();
1986 static void free_tables(H264Context *h){
1989 av_freep(&h->intra4x4_pred_mode);
1990 av_freep(&h->chroma_pred_mode_table);
1991 av_freep(&h->cbp_table);
1992 av_freep(&h->mvd_table[0]);
1993 av_freep(&h->mvd_table[1]);
1994 av_freep(&h->direct_table);
1995 av_freep(&h->non_zero_count);
1996 av_freep(&h->slice_table_base);
1997 h->slice_table= NULL;
1999 av_freep(&h->mb2b_xy);
2000 av_freep(&h->mb2b8_xy);
2002 for(i = 0; i < h->s.avctx->thread_count; i++) {
2003 hx = h->thread_context[i];
2005 av_freep(&hx->top_borders[1]);
2006 av_freep(&hx->top_borders[0]);
2007 av_freep(&hx->s.obmc_scratchpad);
2011 static void init_dequant8_coeff_table(H264Context *h){
2013 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2014 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2015 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2017 for(i=0; i<2; i++ ){
2018 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2019 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2023 for(q=0; q<52; q++){
2024 int shift = div6[q];
2027 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2028 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2029 h->pps.scaling_matrix8[i][x]) << shift;
2034 static void init_dequant4_coeff_table(H264Context *h){
2036 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2037 for(i=0; i<6; i++ ){
2038 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2040 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2041 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2048 for(q=0; q<52; q++){
2049 int shift = div6[q] + 2;
2052 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2053 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2054 h->pps.scaling_matrix4[i][x]) << shift;
2059 static void init_dequant_tables(H264Context *h){
2061 init_dequant4_coeff_table(h);
2062 if(h->pps.transform_8x8_mode)
2063 init_dequant8_coeff_table(h);
2064 if(h->sps.transform_bypass){
2067 h->dequant4_coeff[i][0][x] = 1<<6;
2068 if(h->pps.transform_8x8_mode)
2071 h->dequant8_coeff[i][0][x] = 1<<6;
2078 * needs width/height
2080 static int alloc_tables(H264Context *h){
2081 MpegEncContext * const s = &h->s;
2082 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2085 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2087 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2088 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2089 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2091 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2092 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2093 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2094 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2096 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2097 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2099 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2100 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2101 for(y=0; y<s->mb_height; y++){
2102 for(x=0; x<s->mb_width; x++){
2103 const int mb_xy= x + y*s->mb_stride;
2104 const int b_xy = 4*x + 4*y*h->b_stride;
2105 const int b8_xy= 2*x + 2*y*h->b8_stride;
2107 h->mb2b_xy [mb_xy]= b_xy;
2108 h->mb2b8_xy[mb_xy]= b8_xy;
2112 s->obmc_scratchpad = NULL;
2114 if(!h->dequant4_coeff[0])
2115 init_dequant_tables(h);
2124 * Mimic alloc_tables(), but for every context thread.
2126 static void clone_tables(H264Context *dst, H264Context *src){
2127 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2128 dst->non_zero_count = src->non_zero_count;
2129 dst->slice_table = src->slice_table;
2130 dst->cbp_table = src->cbp_table;
2131 dst->mb2b_xy = src->mb2b_xy;
2132 dst->mb2b8_xy = src->mb2b8_xy;
2133 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2134 dst->mvd_table[0] = src->mvd_table[0];
2135 dst->mvd_table[1] = src->mvd_table[1];
2136 dst->direct_table = src->direct_table;
2138 dst->s.obmc_scratchpad = NULL;
2139 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2144 * Allocate buffers which are not shared amongst multiple threads.
2146 static int context_init(H264Context *h){
2147 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2148 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2152 return -1; // free_tables will clean up for us
2155 static av_cold void common_init(H264Context *h){
2156 MpegEncContext * const s = &h->s;
2158 s->width = s->avctx->width;
2159 s->height = s->avctx->height;
2160 s->codec_id= s->avctx->codec->id;
2162 ff_h264_pred_init(&h->hpc, s->codec_id);
2164 h->dequant_coeff_pps= -1;
2165 s->unrestricted_mv=1;
2166 s->decode=1; //FIXME
2168 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2170 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2171 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2174 static av_cold int decode_init(AVCodecContext *avctx){
2175 H264Context *h= avctx->priv_data;
2176 MpegEncContext * const s = &h->s;
2178 MPV_decode_defaults(s);
2183 s->out_format = FMT_H264;
2184 s->workaround_bugs= avctx->workaround_bugs;
2187 // s->decode_mb= ff_h263_decode_mb;
2188 s->quarter_sample = 1;
2191 if(avctx->codec_id == CODEC_ID_SVQ3)
2192 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2193 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2194 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2196 avctx->pix_fmt= PIX_FMT_YUV420P;
2200 if(avctx->extradata_size > 0 && avctx->extradata &&
2201 *(char *)avctx->extradata == 1){
2208 h->thread_context[0] = h;
2209 h->outputed_poc = INT_MIN;
2210 h->prev_poc_msb= 1<<16;
2211 h->sei_recovery_frame_cnt = -1;
2215 static int frame_start(H264Context *h){
2216 MpegEncContext * const s = &h->s;
2219 if(MPV_frame_start(s, s->avctx) < 0)
2221 ff_er_frame_start(s);
2223 * MPV_frame_start uses pict_type to derive key_frame.
2224 * This is incorrect for H.264; IDR markings must be used.
2225 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2226 * See decode_nal_units().
2228 s->current_picture_ptr->key_frame= 0;
2230 assert(s->linesize && s->uvlinesize);
2232 for(i=0; i<16; i++){
2233 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2234 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2237 h->block_offset[16+i]=
2238 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2239 h->block_offset[24+16+i]=
2240 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2243 /* can't be in alloc_tables because linesize isn't known there.
2244 * FIXME: redo bipred weight to not require extra buffer? */
2245 for(i = 0; i < s->avctx->thread_count; i++)
2246 if(!h->thread_context[i]->s.obmc_scratchpad)
2247 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2249 /* some macroblocks will be accessed before they're available */
2250 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2251 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2253 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2255 // We mark the current picture as non-reference after allocating it, so
2256 // that if we break out due to an error it can be released automatically
2257 // in the next MPV_frame_start().
2258 // SVQ3 as well as most other codecs have only last/next/current and thus
2259 // get released even with set reference, besides SVQ3 and others do not
2260 // mark frames as reference later "naturally".
2261 if(s->codec_id != CODEC_ID_SVQ3)
2262 s->current_picture_ptr->reference= 0;
2264 s->current_picture_ptr->field_poc[0]=
2265 s->current_picture_ptr->field_poc[1]= INT_MAX;
2266 assert(s->current_picture_ptr->long_ref==0);
2271 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2272 MpegEncContext * const s = &h->s;
2281 src_cb -= uvlinesize;
2282 src_cr -= uvlinesize;
2284 if(!simple && FRAME_MBAFF){
2286 offset = MB_MBAFF ? 1 : 17;
2287 uvoffset= MB_MBAFF ? 1 : 9;
2289 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2290 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2291 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2293 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2298 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2299 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2300 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2301 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2307 top_idx = MB_MBAFF ? 0 : 1;
2309 step= MB_MBAFF ? 2 : 1;
2312 // There are two lines saved, the line above the the top macroblock of a pair,
2313 // and the line above the bottom macroblock
2314 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2315 for(i=1; i<17 - skiplast; i++){
2316 h->left_border[offset+i*step]= src_y[15+i* linesize];
2319 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2320 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2322 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2323 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2324 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2325 for(i=1; i<9 - skiplast; i++){
2326 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2327 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2329 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2330 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2334 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2335 MpegEncContext * const s = &h->s;
2346 if(!simple && FRAME_MBAFF){
2348 offset = MB_MBAFF ? 1 : 17;
2349 uvoffset= MB_MBAFF ? 1 : 9;
2353 top_idx = MB_MBAFF ? 0 : 1;
2355 step= MB_MBAFF ? 2 : 1;
2358 if(h->deblocking_filter == 2) {
2360 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2361 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2363 deblock_left = (s->mb_x > 0);
2364 deblock_top = (s->mb_y > !!MB_FIELD);
2367 src_y -= linesize + 1;
2368 src_cb -= uvlinesize + 1;
2369 src_cr -= uvlinesize + 1;
2371 #define XCHG(a,b,t,xchg)\
2378 for(i = !deblock_top; i<16; i++){
2379 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2381 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2385 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2386 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2387 if(s->mb_x+1 < s->mb_width){
2388 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2392 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2394 for(i = !deblock_top; i<8; i++){
2395 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2396 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2398 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2399 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2402 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2403 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2408 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2409 MpegEncContext * const s = &h->s;
2410 const int mb_x= s->mb_x;
2411 const int mb_y= s->mb_y;
2412 const int mb_xy= h->mb_xy;
2413 const int mb_type= s->current_picture.mb_type[mb_xy];
2414 uint8_t *dest_y, *dest_cb, *dest_cr;
2415 int linesize, uvlinesize /*dct_offset*/;
2417 int *block_offset = &h->block_offset[0];
2418 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2419 /* is_h264 should always be true if SVQ3 is disabled. */
2420 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2421 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2422 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2424 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2425 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2426 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2428 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2429 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2431 if (!simple && MB_FIELD) {
2432 linesize = h->mb_linesize = s->linesize * 2;
2433 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2434 block_offset = &h->block_offset[24];
2435 if(mb_y&1){ //FIXME move out of this function?
2436 dest_y -= s->linesize*15;
2437 dest_cb-= s->uvlinesize*7;
2438 dest_cr-= s->uvlinesize*7;
2442 for(list=0; list<h->list_count; list++){
2443 if(!USES_LIST(mb_type, list))
2445 if(IS_16X16(mb_type)){
2446 int8_t *ref = &h->ref_cache[list][scan8[0]];
2447 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2449 for(i=0; i<16; i+=4){
2450 int ref = h->ref_cache[list][scan8[i]];
2452 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2458 linesize = h->mb_linesize = s->linesize;
2459 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2460 // dct_offset = s->linesize * 16;
2463 if (!simple && IS_INTRA_PCM(mb_type)) {
2464 for (i=0; i<16; i++) {
2465 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2467 for (i=0; i<8; i++) {
2468 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2469 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2472 if(IS_INTRA(mb_type)){
2473 if(h->deblocking_filter)
2474 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2476 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2477 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2478 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2481 if(IS_INTRA4x4(mb_type)){
2482 if(simple || !s->encoding){
2483 if(IS_8x8DCT(mb_type)){
2484 if(transform_bypass){
2486 idct_add = s->dsp.add_pixels8;
2488 idct_dc_add = s->dsp.h264_idct8_dc_add;
2489 idct_add = s->dsp.h264_idct8_add;
2491 for(i=0; i<16; i+=4){
2492 uint8_t * const ptr= dest_y + block_offset[i];
2493 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2494 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2495 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2497 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2498 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2499 (h->topright_samples_available<<i)&0x4000, linesize);
2501 if(nnz == 1 && h->mb[i*16])
2502 idct_dc_add(ptr, h->mb + i*16, linesize);
2504 idct_add (ptr, h->mb + i*16, linesize);
2509 if(transform_bypass){
2511 idct_add = s->dsp.add_pixels4;
2513 idct_dc_add = s->dsp.h264_idct_dc_add;
2514 idct_add = s->dsp.h264_idct_add;
2516 for(i=0; i<16; i++){
2517 uint8_t * const ptr= dest_y + block_offset[i];
2518 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2520 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2521 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2525 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2526 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2527 assert(mb_y || linesize <= block_offset[i]);
2528 if(!topright_avail){
2529 tr= ptr[3 - linesize]*0x01010101;
2530 topright= (uint8_t*) &tr;
2532 topright= ptr + 4 - linesize;
2536 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2537 nnz = h->non_zero_count_cache[ scan8[i] ];
2540 if(nnz == 1 && h->mb[i*16])
2541 idct_dc_add(ptr, h->mb + i*16, linesize);
2543 idct_add (ptr, h->mb + i*16, linesize);
2545 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2552 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2554 if(!transform_bypass)
2555 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2557 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2559 if(h->deblocking_filter)
2560 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2562 hl_motion(h, dest_y, dest_cb, dest_cr,
2563 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2564 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2565 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2569 if(!IS_INTRA4x4(mb_type)){
2571 if(IS_INTRA16x16(mb_type)){
2572 if(transform_bypass){
2573 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2574 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2576 for(i=0; i<16; i++){
2577 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2578 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2582 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2584 }else if(h->cbp&15){
2585 if(transform_bypass){
2586 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2587 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2588 for(i=0; i<16; i+=di){
2589 if(h->non_zero_count_cache[ scan8[i] ]){
2590 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2594 if(IS_8x8DCT(mb_type)){
2595 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2597 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2602 for(i=0; i<16; i++){
2603 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2604 uint8_t * const ptr= dest_y + block_offset[i];
2605 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2611 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2612 uint8_t *dest[2] = {dest_cb, dest_cr};
2613 if(transform_bypass){
2614 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2615 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2616 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2618 idct_add = s->dsp.add_pixels4;
2619 for(i=16; i<16+8; i++){
2620 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2621 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2625 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2626 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2628 idct_add = s->dsp.h264_idct_add;
2629 idct_dc_add = s->dsp.h264_idct_dc_add;
2630 for(i=16; i<16+8; i++){
2631 if(h->non_zero_count_cache[ scan8[i] ])
2632 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2633 else if(h->mb[i*16])
2634 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2637 for(i=16; i<16+8; i++){
2638 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2639 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2640 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2647 if(h->cbp || IS_INTRA(mb_type))
2648 s->dsp.clear_blocks(h->mb);
2650 if(h->deblocking_filter) {
2651 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2652 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2653 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2654 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2655 if (!simple && FRAME_MBAFF) {
2656 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2658 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2664 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2666 static void hl_decode_mb_simple(H264Context *h){
2667 hl_decode_mb_internal(h, 1);
2671 * Process a macroblock; this handles edge cases, such as interlacing.
2673 static void av_noinline hl_decode_mb_complex(H264Context *h){
2674 hl_decode_mb_internal(h, 0);
2677 static void hl_decode_mb(H264Context *h){
2678 MpegEncContext * const s = &h->s;
2679 const int mb_xy= h->mb_xy;
2680 const int mb_type= s->current_picture.mb_type[mb_xy];
2681 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2684 hl_decode_mb_complex(h);
2685 else hl_decode_mb_simple(h);
2688 static void pic_as_field(Picture *pic, const int parity){
2690 for (i = 0; i < 4; ++i) {
2691 if (parity == PICT_BOTTOM_FIELD)
2692 pic->data[i] += pic->linesize[i];
2693 pic->reference = parity;
2694 pic->linesize[i] *= 2;
2696 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2699 static int split_field_copy(Picture *dest, Picture *src,
2700 int parity, int id_add){
2701 int match = !!(src->reference & parity);
2705 if(parity != PICT_FRAME){
2706 pic_as_field(dest, parity);
2708 dest->pic_id += id_add;
2715 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2719 while(i[0]<len || i[1]<len){
2720 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2722 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2725 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2726 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2729 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2730 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2737 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2742 best_poc= dir ? INT_MIN : INT_MAX;
2744 for(i=0; i<len; i++){
2745 const int poc= src[i]->poc;
2746 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2748 sorted[out_i]= src[i];
2751 if(best_poc == (dir ? INT_MIN : INT_MAX))
2753 limit= sorted[out_i++]->poc - dir;
2759 * fills the default_ref_list.
2761 static int fill_default_ref_list(H264Context *h){
2762 MpegEncContext * const s = &h->s;
2765 if(h->slice_type_nos==FF_B_TYPE){
2766 Picture *sorted[32];
2771 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2773 cur_poc= s->current_picture_ptr->poc;
2775 for(list= 0; list<2; list++){
2776 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2777 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2779 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2780 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2783 if(len < h->ref_count[list])
2784 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2788 if(lens[0] == lens[1] && lens[1] > 1){
2789 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2791 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2794 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2795 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2797 if(len < h->ref_count[0])
2798 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2801 for (i=0; i<h->ref_count[0]; i++) {
2802 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2804 if(h->slice_type_nos==FF_B_TYPE){
2805 for (i=0; i<h->ref_count[1]; i++) {
2806 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2813 static void print_short_term(H264Context *h);
2814 static void print_long_term(H264Context *h);
2817 * Extract structure information about the picture described by pic_num in
2818 * the current decoding context (frame or field). Note that pic_num is
2819 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2820 * @param pic_num picture number for which to extract structure information
2821 * @param structure one of PICT_XXX describing structure of picture
2823 * @return frame number (short term) or long term index of picture
2824 * described by pic_num
2826 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2827 MpegEncContext * const s = &h->s;
2829 *structure = s->picture_structure;
2832 /* opposite field */
2833 *structure ^= PICT_FRAME;
2840 static int decode_ref_pic_list_reordering(H264Context *h){
2841 MpegEncContext * const s = &h->s;
2842 int list, index, pic_structure;
2844 print_short_term(h);
2847 for(list=0; list<h->list_count; list++){
2848 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2850 if(get_bits1(&s->gb)){
2851 int pred= h->curr_pic_num;
2853 for(index=0; ; index++){
2854 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2855 unsigned int pic_id;
2857 Picture *ref = NULL;
2859 if(reordering_of_pic_nums_idc==3)
2862 if(index >= h->ref_count[list]){
2863 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2867 if(reordering_of_pic_nums_idc<3){
2868 if(reordering_of_pic_nums_idc<2){
2869 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2872 if(abs_diff_pic_num > h->max_pic_num){
2873 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2877 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2878 else pred+= abs_diff_pic_num;
2879 pred &= h->max_pic_num - 1;
2881 frame_num = pic_num_extract(h, pred, &pic_structure);
2883 for(i= h->short_ref_count-1; i>=0; i--){
2884 ref = h->short_ref[i];
2885 assert(ref->reference);
2886 assert(!ref->long_ref);
2888 ref->frame_num == frame_num &&
2889 (ref->reference & pic_structure)
2897 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2899 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2902 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2905 ref = h->long_ref[long_idx];
2906 assert(!(ref && !ref->reference));
2907 if(ref && (ref->reference & pic_structure)){
2908 ref->pic_id= pic_id;
2909 assert(ref->long_ref);
2917 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2918 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2920 for(i=index; i+1<h->ref_count[list]; i++){
2921 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2924 for(; i > index; i--){
2925 h->ref_list[list][i]= h->ref_list[list][i-1];
2927 h->ref_list[list][index]= *ref;
2929 pic_as_field(&h->ref_list[list][index], pic_structure);
2933 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2939 for(list=0; list<h->list_count; list++){
2940 for(index= 0; index < h->ref_count[list]; index++){
2941 if(!h->ref_list[list][index].data[0]){
2942 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2943 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2951 static void fill_mbaff_ref_list(H264Context *h){
2953 for(list=0; list<2; list++){ //FIXME try list_count
2954 for(i=0; i<h->ref_count[list]; i++){
2955 Picture *frame = &h->ref_list[list][i];
2956 Picture *field = &h->ref_list[list][16+2*i];
2959 field[0].linesize[j] <<= 1;
2960 field[0].reference = PICT_TOP_FIELD;
2961 field[0].poc= field[0].field_poc[0];
2962 field[1] = field[0];
2964 field[1].data[j] += frame->linesize[j];
2965 field[1].reference = PICT_BOTTOM_FIELD;
2966 field[1].poc= field[1].field_poc[1];
2968 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2969 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2971 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2972 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2976 for(j=0; j<h->ref_count[1]; j++){
2977 for(i=0; i<h->ref_count[0]; i++)
2978 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2979 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2980 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2984 static int pred_weight_table(H264Context *h){
2985 MpegEncContext * const s = &h->s;
2987 int luma_def, chroma_def;
2990 h->use_weight_chroma= 0;
2991 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2992 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2993 luma_def = 1<<h->luma_log2_weight_denom;
2994 chroma_def = 1<<h->chroma_log2_weight_denom;
2996 for(list=0; list<2; list++){
2997 h->luma_weight_flag[list] = 0;
2998 h->chroma_weight_flag[list] = 0;
2999 for(i=0; i<h->ref_count[list]; i++){
3000 int luma_weight_flag, chroma_weight_flag;
3002 luma_weight_flag= get_bits1(&s->gb);
3003 if(luma_weight_flag){
3004 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3005 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3006 if( h->luma_weight[list][i] != luma_def
3007 || h->luma_offset[list][i] != 0) {
3009 h->luma_weight_flag[list]= 1;
3012 h->luma_weight[list][i]= luma_def;
3013 h->luma_offset[list][i]= 0;
3017 chroma_weight_flag= get_bits1(&s->gb);
3018 if(chroma_weight_flag){
3021 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3022 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3023 if( h->chroma_weight[list][i][j] != chroma_def
3024 || h->chroma_offset[list][i][j] != 0) {
3025 h->use_weight_chroma= 1;
3026 h->chroma_weight_flag[list]= 1;
3032 h->chroma_weight[list][i][j]= chroma_def;
3033 h->chroma_offset[list][i][j]= 0;
3038 if(h->slice_type_nos != FF_B_TYPE) break;
3040 h->use_weight= h->use_weight || h->use_weight_chroma;
3044 static void implicit_weight_table(H264Context *h){
3045 MpegEncContext * const s = &h->s;
3047 int cur_poc = s->current_picture_ptr->poc;
3049 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3050 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3052 h->use_weight_chroma= 0;
3057 h->use_weight_chroma= 2;
3058 h->luma_log2_weight_denom= 5;
3059 h->chroma_log2_weight_denom= 5;
3060 for (i = 0; i < 2; i++) {
3061 h->luma_weight_flag[i] = 0;
3062 h->chroma_weight_flag[i] = 0;
3065 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3066 int poc0 = h->ref_list[0][ref0].poc;
3067 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3068 int poc1 = h->ref_list[1][ref1].poc;
3069 int td = av_clip(poc1 - poc0, -128, 127);
3071 int tb = av_clip(cur_poc - poc0, -128, 127);
3072 int tx = (16384 + (FFABS(td) >> 1)) / td;
3073 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3074 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3075 h->implicit_weight[ref0][ref1] = 32;
3077 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3079 h->implicit_weight[ref0][ref1] = 32;
3085 * Mark a picture as no longer needed for reference. The refmask
3086 * argument allows unreferencing of individual fields or the whole frame.
3087 * If the picture becomes entirely unreferenced, but is being held for
3088 * display purposes, it is marked as such.
3089 * @param refmask mask of fields to unreference; the mask is bitwise
3090 * anded with the reference marking of pic
3091 * @return non-zero if pic becomes entirely unreferenced (except possibly
3092 * for display purposes) zero if one of the fields remains in
3095 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3097 if (pic->reference &= refmask) {
3100 for(i = 0; h->delayed_pic[i]; i++)
3101 if(pic == h->delayed_pic[i]){
3102 pic->reference=DELAYED_PIC_REF;
3110 * instantaneous decoder refresh.
3112 static void idr(H264Context *h){
3115 for(i=0; i<16; i++){
3116 remove_long(h, i, 0);
3118 assert(h->long_ref_count==0);
3120 for(i=0; i<h->short_ref_count; i++){
3121 unreference_pic(h, h->short_ref[i], 0);
3122 h->short_ref[i]= NULL;
3124 h->short_ref_count=0;
3125 h->prev_frame_num= 0;
3126 h->prev_frame_num_offset= 0;
3131 /* forget old pics after a seek */
3132 static void flush_dpb(AVCodecContext *avctx){
3133 H264Context *h= avctx->priv_data;
3135 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3136 if(h->delayed_pic[i])
3137 h->delayed_pic[i]->reference= 0;
3138 h->delayed_pic[i]= NULL;
3140 h->outputed_poc= INT_MIN;
3142 if(h->s.current_picture_ptr)
3143 h->s.current_picture_ptr->reference= 0;
3144 h->s.first_field= 0;
3145 h->sei_recovery_frame_cnt = -1;
3146 ff_mpeg_flush(avctx);
3150 * Find a Picture in the short term reference list by frame number.
3151 * @param frame_num frame number to search for
3152 * @param idx the index into h->short_ref where returned picture is found
3153 * undefined if no picture found.
3154 * @return pointer to the found picture, or NULL if no pic with the provided
3155 * frame number is found
3157 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3158 MpegEncContext * const s = &h->s;
3161 for(i=0; i<h->short_ref_count; i++){
3162 Picture *pic= h->short_ref[i];
3163 if(s->avctx->debug&FF_DEBUG_MMCO)
3164 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3165 if(pic->frame_num == frame_num) {
3174 * Remove a picture from the short term reference list by its index in
3175 * that list. This does no checking on the provided index; it is assumed
3176 * to be valid. Other list entries are shifted down.
3177 * @param i index into h->short_ref of picture to remove.
3179 static void remove_short_at_index(H264Context *h, int i){
3180 assert(i >= 0 && i < h->short_ref_count);
3181 h->short_ref[i]= NULL;
3182 if (--h->short_ref_count)
3183 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3188 * @return the removed picture or NULL if an error occurs
3190 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3191 MpegEncContext * const s = &h->s;
3195 if(s->avctx->debug&FF_DEBUG_MMCO)
3196 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3198 pic = find_short(h, frame_num, &i);
3200 if(unreference_pic(h, pic, ref_mask))
3201 remove_short_at_index(h, i);
3208 * Remove a picture from the long term reference list by its index in
3210 * @return the removed picture or NULL if an error occurs
3212 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3215 pic= h->long_ref[i];
3217 if(unreference_pic(h, pic, ref_mask)){
3218 assert(h->long_ref[i]->long_ref == 1);
3219 h->long_ref[i]->long_ref= 0;
3220 h->long_ref[i]= NULL;
3221 h->long_ref_count--;
3229 * print short term list
3231 static void print_short_term(H264Context *h) {
3233 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3234 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3235 for(i=0; i<h->short_ref_count; i++){
3236 Picture *pic= h->short_ref[i];
3237 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3243 * print long term list
3245 static void print_long_term(H264Context *h) {
3247 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3248 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3249 for(i = 0; i < 16; i++){
3250 Picture *pic= h->long_ref[i];
3252 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3259 * Executes the reference picture marking (memory management control operations).
3261 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3262 MpegEncContext * const s = &h->s;
3264 int current_ref_assigned=0;
3267 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3268 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3270 for(i=0; i<mmco_count; i++){
3271 int structure, frame_num;
3272 if(s->avctx->debug&FF_DEBUG_MMCO)
3273 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3275 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3276 || mmco[i].opcode == MMCO_SHORT2LONG){
3277 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3278 pic = find_short(h, frame_num, &j);
3280 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3281 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3282 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3287 switch(mmco[i].opcode){
3288 case MMCO_SHORT2UNUSED:
3289 if(s->avctx->debug&FF_DEBUG_MMCO)
3290 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3291 remove_short(h, frame_num, structure ^ PICT_FRAME);
3293 case MMCO_SHORT2LONG:
3294 if (h->long_ref[mmco[i].long_arg] != pic)
3295 remove_long(h, mmco[i].long_arg, 0);
3297 remove_short_at_index(h, j);
3298 h->long_ref[ mmco[i].long_arg ]= pic;
3299 if (h->long_ref[ mmco[i].long_arg ]){
3300 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3301 h->long_ref_count++;
3304 case MMCO_LONG2UNUSED:
3305 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3306 pic = h->long_ref[j];
3308 remove_long(h, j, structure ^ PICT_FRAME);
3309 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3310 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3313 // Comment below left from previous code as it is an interresting note.
3314 /* First field in pair is in short term list or
3315 * at a different long term index.
3316 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3317 * Report the problem and keep the pair where it is,
3318 * and mark this field valid.
3321 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3322 remove_long(h, mmco[i].long_arg, 0);
3324 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3325 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3326 h->long_ref_count++;
3329 s->current_picture_ptr->reference |= s->picture_structure;
3330 current_ref_assigned=1;
3332 case MMCO_SET_MAX_LONG:
3333 assert(mmco[i].long_arg <= 16);
3334 // just remove the long term which index is greater than new max
3335 for(j = mmco[i].long_arg; j<16; j++){
3336 remove_long(h, j, 0);
3340 while(h->short_ref_count){
3341 remove_short(h, h->short_ref[0]->frame_num, 0);
3343 for(j = 0; j < 16; j++) {
3344 remove_long(h, j, 0);
3346 s->current_picture_ptr->poc=
3347 s->current_picture_ptr->field_poc[0]=
3348 s->current_picture_ptr->field_poc[1]=
3352 s->current_picture_ptr->frame_num= 0;
3358 if (!current_ref_assigned) {
3359 /* Second field of complementary field pair; the first field of
3360 * which is already referenced. If short referenced, it
3361 * should be first entry in short_ref. If not, it must exist
3362 * in long_ref; trying to put it on the short list here is an
3363 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3365 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3366 /* Just mark the second field valid */
3367 s->current_picture_ptr->reference = PICT_FRAME;
3368 } else if (s->current_picture_ptr->long_ref) {
3369 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3370 "assignment for second field "
3371 "in complementary field pair "
3372 "(first field is long term)\n");
3374 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3376 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3379 if(h->short_ref_count)
3380 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3382 h->short_ref[0]= s->current_picture_ptr;
3383 h->short_ref_count++;
3384 s->current_picture_ptr->reference |= s->picture_structure;
3388 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3390 /* We have too many reference frames, probably due to corrupted
3391 * stream. Need to discard one frame. Prevents overrun of the
3392 * short_ref and long_ref buffers.
3394 av_log(h->s.avctx, AV_LOG_ERROR,
3395 "number of reference frames exceeds max (probably "
3396 "corrupt input), discarding one\n");
3398 if (h->long_ref_count && !h->short_ref_count) {
3399 for (i = 0; i < 16; ++i)
3404 remove_long(h, i, 0);
3406 pic = h->short_ref[h->short_ref_count - 1];
3407 remove_short(h, pic->frame_num, 0);
3411 print_short_term(h);
3416 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3417 MpegEncContext * const s = &h->s;
3421 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3422 s->broken_link= get_bits1(gb) -1;
3424 h->mmco[0].opcode= MMCO_LONG;
3425 h->mmco[0].long_arg= 0;
3429 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3430 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3431 MMCOOpcode opcode= get_ue_golomb_31(gb);
3433 h->mmco[i].opcode= opcode;
3434 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3435 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3436 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3437 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3441 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3442 unsigned int long_arg= get_ue_golomb_31(gb);
3443 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3444 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3447 h->mmco[i].long_arg= long_arg;
3450 if(opcode > (unsigned)MMCO_LONG){
3451 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3454 if(opcode == MMCO_END)
3459 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3461 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3462 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3463 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3464 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3466 if (FIELD_PICTURE) {
3467 h->mmco[0].short_pic_num *= 2;
3468 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3469 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3479 static int init_poc(H264Context *h){
3480 MpegEncContext * const s = &h->s;
3481 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3483 Picture *cur = s->current_picture_ptr;
3485 h->frame_num_offset= h->prev_frame_num_offset;
3486 if(h->frame_num < h->prev_frame_num)
3487 h->frame_num_offset += max_frame_num;
3489 if(h->sps.poc_type==0){
3490 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3492 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3493 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3494 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3495 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3497 h->poc_msb = h->prev_poc_msb;
3498 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3500 field_poc[1] = h->poc_msb + h->poc_lsb;
3501 if(s->picture_structure == PICT_FRAME)
3502 field_poc[1] += h->delta_poc_bottom;
3503 }else if(h->sps.poc_type==1){
3504 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3507 if(h->sps.poc_cycle_length != 0)
3508 abs_frame_num = h->frame_num_offset + h->frame_num;
3512 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3515 expected_delta_per_poc_cycle = 0;
3516 for(i=0; i < h->sps.poc_cycle_length; i++)
3517 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3519 if(abs_frame_num > 0){
3520 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3521 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3523 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3524 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3525 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3529 if(h->nal_ref_idc == 0)
3530 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3532 field_poc[0] = expectedpoc + h->delta_poc[0];
3533 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3535 if(s->picture_structure == PICT_FRAME)
3536 field_poc[1] += h->delta_poc[1];
3538 int poc= 2*(h->frame_num_offset + h->frame_num);
3547 if(s->picture_structure != PICT_BOTTOM_FIELD)
3548 s->current_picture_ptr->field_poc[0]= field_poc[0];
3549 if(s->picture_structure != PICT_TOP_FIELD)
3550 s->current_picture_ptr->field_poc[1]= field_poc[1];
3551 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3558 * initialize scan tables
3560 static void init_scan_tables(H264Context *h){
3561 MpegEncContext * const s = &h->s;
3563 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3564 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3565 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3567 for(i=0; i<16; i++){
3568 #define T(x) (x>>2) | ((x<<2) & 0xF)
3569 h->zigzag_scan[i] = T(zigzag_scan[i]);
3570 h-> field_scan[i] = T( field_scan[i]);
3574 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3575 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3576 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3577 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3578 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3580 for(i=0; i<64; i++){
3581 #define T(x) (x>>3) | ((x&7)<<3)
3582 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3583 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3584 h->field_scan8x8[i] = T(field_scan8x8[i]);
3585 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3589 if(h->sps.transform_bypass){ //FIXME same ugly
3590 h->zigzag_scan_q0 = zigzag_scan;
3591 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3592 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3593 h->field_scan_q0 = field_scan;
3594 h->field_scan8x8_q0 = field_scan8x8;
3595 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3597 h->zigzag_scan_q0 = h->zigzag_scan;
3598 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3599 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3600 h->field_scan_q0 = h->field_scan;
3601 h->field_scan8x8_q0 = h->field_scan8x8;
3602 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3607 * Replicates H264 "master" context to thread contexts.
3609 static void clone_slice(H264Context *dst, H264Context *src)
3611 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3612 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3613 dst->s.current_picture = src->s.current_picture;
3614 dst->s.linesize = src->s.linesize;
3615 dst->s.uvlinesize = src->s.uvlinesize;
3616 dst->s.first_field = src->s.first_field;
3618 dst->prev_poc_msb = src->prev_poc_msb;
3619 dst->prev_poc_lsb = src->prev_poc_lsb;
3620 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3621 dst->prev_frame_num = src->prev_frame_num;
3622 dst->short_ref_count = src->short_ref_count;
3624 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3625 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3626 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3627 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3629 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3630 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3634 * decodes a slice header.
3635 * This will also call MPV_common_init() and frame_start() as needed.
3637 * @param h h264context
3638 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3640 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3642 static int decode_slice_header(H264Context *h, H264Context *h0){
3643 MpegEncContext * const s = &h->s;
3644 MpegEncContext * const s0 = &h0->s;
3645 unsigned int first_mb_in_slice;
3646 unsigned int pps_id;
3647 int num_ref_idx_active_override_flag;
3648 unsigned int slice_type, tmp, i, j;
3649 int default_ref_list_done = 0;
3650 int last_pic_structure;
3652 s->dropable= h->nal_ref_idc == 0;
3654 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3655 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3656 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3658 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3659 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3662 first_mb_in_slice= get_ue_golomb(&s->gb);
3664 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3665 h0->current_slice = 0;
3666 if (!s0->first_field)
3667 s->current_picture_ptr= NULL;
3670 slice_type= get_ue_golomb_31(&s->gb);
3672 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3677 h->slice_type_fixed=1;
3679 h->slice_type_fixed=0;
3681 slice_type= golomb_to_pict_type[ slice_type ];
3682 if (slice_type == FF_I_TYPE
3683 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3684 default_ref_list_done = 1;
3686 h->slice_type= slice_type;
3687 h->slice_type_nos= slice_type & 3;
3689 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3690 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3691 av_log(h->s.avctx, AV_LOG_ERROR,
3692 "B picture before any references, skipping\n");
3696 pps_id= get_ue_golomb(&s->gb);
3697 if(pps_id>=MAX_PPS_COUNT){
3698 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3701 if(!h0->pps_buffers[pps_id]) {
3702 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3705 h->pps= *h0->pps_buffers[pps_id];
3707 if(!h0->sps_buffers[h->pps.sps_id]) {
3708 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3711 h->sps = *h0->sps_buffers[h->pps.sps_id];
3713 if(h == h0 && h->dequant_coeff_pps != pps_id){
3714 h->dequant_coeff_pps = pps_id;
3715 init_dequant_tables(h);
3718 s->mb_width= h->sps.mb_width;
3719 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3721 h->b_stride= s->mb_width*4;
3722 h->b8_stride= s->mb_width*2;
3724 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3725 if(h->sps.frame_mbs_only_flag)
3726 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3728 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3730 if (s->context_initialized
3731 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3733 return -1; // width / height changed during parallelized decoding
3735 flush_dpb(s->avctx);
3738 if (!s->context_initialized) {
3740 return -1; // we cant (re-)initialize context during parallel decoding
3741 if (MPV_common_init(s) < 0)
3745 init_scan_tables(h);
3748 for(i = 1; i < s->avctx->thread_count; i++) {
3750 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3751 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3752 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3755 init_scan_tables(c);
3759 for(i = 0; i < s->avctx->thread_count; i++)
3760 if(context_init(h->thread_context[i]) < 0)
3763 s->avctx->width = s->width;
3764 s->avctx->height = s->height;
3765 s->avctx->sample_aspect_ratio= h->sps.sar;
3766 if(!s->avctx->sample_aspect_ratio.den)
3767 s->avctx->sample_aspect_ratio.den = 1;
3769 if(h->sps.timing_info_present_flag){
3770 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3771 if(h->x264_build > 0 && h->x264_build < 44)
3772 s->avctx->time_base.den *= 2;
3773 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3774 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3778 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3781 h->mb_aff_frame = 0;
3782 last_pic_structure = s0->picture_structure;
3783 if(h->sps.frame_mbs_only_flag){
3784 s->picture_structure= PICT_FRAME;
3786 if(get_bits1(&s->gb)) { //field_pic_flag
3787 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3789 s->picture_structure= PICT_FRAME;
3790 h->mb_aff_frame = h->sps.mb_aff;
3793 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3795 if(h0->current_slice == 0){
3796 while(h->frame_num != h->prev_frame_num &&
3797 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3798 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3800 h->prev_frame_num++;
3801 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3802 s->current_picture_ptr->frame_num= h->prev_frame_num;
3803 execute_ref_pic_marking(h, NULL, 0);
3806 /* See if we have a decoded first field looking for a pair... */
3807 if (s0->first_field) {
3808 assert(s0->current_picture_ptr);
3809 assert(s0->current_picture_ptr->data[0]);
3810 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3812 /* figure out if we have a complementary field pair */
3813 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3815 * Previous field is unmatched. Don't display it, but let it
3816 * remain for reference if marked as such.
3818 s0->current_picture_ptr = NULL;
3819 s0->first_field = FIELD_PICTURE;
3822 if (h->nal_ref_idc &&
3823 s0->current_picture_ptr->reference &&
3824 s0->current_picture_ptr->frame_num != h->frame_num) {
3826 * This and previous field were reference, but had
3827 * different frame_nums. Consider this field first in
3828 * pair. Throw away previous field except for reference
3831 s0->first_field = 1;
3832 s0->current_picture_ptr = NULL;
3835 /* Second field in complementary pair */
3836 s0->first_field = 0;
3841 /* Frame or first field in a potentially complementary pair */
3842 assert(!s0->current_picture_ptr);
3843 s0->first_field = FIELD_PICTURE;
3846 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3847 s0->first_field = 0;
3854 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3856 assert(s->mb_num == s->mb_width * s->mb_height);
3857 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3858 first_mb_in_slice >= s->mb_num){
3859 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3862 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3863 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3864 if (s->picture_structure == PICT_BOTTOM_FIELD)
3865 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3866 assert(s->mb_y < s->mb_height);
3868 if(s->picture_structure==PICT_FRAME){
3869 h->curr_pic_num= h->frame_num;
3870 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3872 h->curr_pic_num= 2*h->frame_num + 1;
3873 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3876 if(h->nal_unit_type == NAL_IDR_SLICE){
3877 get_ue_golomb(&s->gb); /* idr_pic_id */
3880 if(h->sps.poc_type==0){
3881 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3883 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3884 h->delta_poc_bottom= get_se_golomb(&s->gb);
3888 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3889 h->delta_poc[0]= get_se_golomb(&s->gb);
3891 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3892 h->delta_poc[1]= get_se_golomb(&s->gb);
3897 if(h->pps.redundant_pic_cnt_present){
3898 h->redundant_pic_count= get_ue_golomb(&s->gb);
3901 //set defaults, might be overridden a few lines later
3902 h->ref_count[0]= h->pps.ref_count[0];
3903 h->ref_count[1]= h->pps.ref_count[1];
3905 if(h->slice_type_nos != FF_I_TYPE){
3906 if(h->slice_type_nos == FF_B_TYPE){
3907 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3909 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3911 if(num_ref_idx_active_override_flag){
3912 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3913 if(h->slice_type_nos==FF_B_TYPE)
3914 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3916 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3917 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3918 h->ref_count[0]= h->ref_count[1]= 1;
3922 if(h->slice_type_nos == FF_B_TYPE)
3929 if(!default_ref_list_done){
3930 fill_default_ref_list(h);
3933 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3936 if(h->slice_type_nos!=FF_I_TYPE){
3937 s->last_picture_ptr= &h->ref_list[0][0];
3938 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3940 if(h->slice_type_nos==FF_B_TYPE){
3941 s->next_picture_ptr= &h->ref_list[1][0];
3942 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3945 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3946 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3947 pred_weight_table(h);
3948 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3949 implicit_weight_table(h);
3952 for (i = 0; i < 2; i++) {
3953 h->luma_weight_flag[i] = 0;
3954 h->chroma_weight_flag[i] = 0;
3959 decode_ref_pic_marking(h0, &s->gb);
3962 fill_mbaff_ref_list(h);
3964 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3965 direct_dist_scale_factor(h);
3966 direct_ref_list_init(h);
3968 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3969 tmp = get_ue_golomb_31(&s->gb);
3971 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3974 h->cabac_init_idc= tmp;
3977 h->last_qscale_diff = 0;
3978 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3980 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3984 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3985 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3986 //FIXME qscale / qp ... stuff
3987 if(h->slice_type == FF_SP_TYPE){
3988 get_bits1(&s->gb); /* sp_for_switch_flag */
3990 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3991 get_se_golomb(&s->gb); /* slice_qs_delta */
3994 h->deblocking_filter = 1;
3995 h->slice_alpha_c0_offset = 0;
3996 h->slice_beta_offset = 0;
3997 if( h->pps.deblocking_filter_parameters_present ) {
3998 tmp= get_ue_golomb_31(&s->gb);
4000 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4003 h->deblocking_filter= tmp;
4004 if(h->deblocking_filter < 2)
4005 h->deblocking_filter^= 1; // 1<->0
4007 if( h->deblocking_filter ) {
4008 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4009 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4013 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4014 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4015 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4016 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4017 h->deblocking_filter= 0;
4019 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4020 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4021 /* Cheat slightly for speed:
4022 Do not bother to deblock across slices. */
4023 h->deblocking_filter = 2;
4025 h0->max_contexts = 1;
4026 if(!h0->single_decode_warning) {
4027 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4028 h0->single_decode_warning = 1;
4031 return 1; // deblocking switched inside frame
4036 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4037 slice_group_change_cycle= get_bits(&s->gb, ?);
4040 h0->last_slice_type = slice_type;
4041 h->slice_num = ++h0->current_slice;
4042 if(h->slice_num >= MAX_SLICES){
4043 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4047 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4051 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4052 +(h->ref_list[j][i].reference&3);
4055 for(i=16; i<48; i++)
4056 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4057 +(h->ref_list[j][i].reference&3);
4060 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4061 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4063 s->avctx->refs= h->sps.ref_frame_count;
4065 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4066 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4068 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4070 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4071 pps_id, h->frame_num,
4072 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4073 h->ref_count[0], h->ref_count[1],
4075 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4077 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4078 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4088 static inline int get_level_prefix(GetBitContext *gb){
4092 OPEN_READER(re, gb);
4093 UPDATE_CACHE(re, gb);
4094 buf=GET_CACHE(re, gb);
4096 log= 32 - av_log2(buf);
4098 print_bin(buf>>(32-log), log);
4099 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4102 LAST_SKIP_BITS(re, gb, log);
4103 CLOSE_READER(re, gb);
4108 static inline int get_dct8x8_allowed(H264Context *h){
4109 if(h->sps.direct_8x8_inference_flag)
4110 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4112 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4116 * decodes a residual block.
4117 * @param n block index
4118 * @param scantable scantable
4119 * @param max_coeff number of coefficients in the block
4120 * @return <0 if an error occurred
4122 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4123 MpegEncContext * const s = &h->s;
4124 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4126 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4128 //FIXME put trailing_onex into the context
4130 if(n == CHROMA_DC_BLOCK_INDEX){
4131 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4132 total_coeff= coeff_token>>2;
4134 if(n == LUMA_DC_BLOCK_INDEX){
4135 total_coeff= pred_non_zero_count(h, 0);
4136 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4137 total_coeff= coeff_token>>2;
4139 total_coeff= pred_non_zero_count(h, n);
4140 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4141 total_coeff= coeff_token>>2;
4142 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4146 //FIXME set last_non_zero?
4150 if(total_coeff > (unsigned)max_coeff) {
4151 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4155 trailing_ones= coeff_token&3;
4156 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4157 assert(total_coeff<=16);
4159 i = show_bits(gb, 3);
4160 skip_bits(gb, trailing_ones);
4161 level[0] = 1-((i&4)>>1);
4162 level[1] = 1-((i&2) );
4163 level[2] = 1-((i&1)<<1);
4165 if(trailing_ones<total_coeff) {
4167 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4168 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4169 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4171 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4172 if(level_code >= 100){
4173 prefix= level_code - 100;
4174 if(prefix == LEVEL_TAB_BITS)
4175 prefix += get_level_prefix(gb);
4177 //first coefficient has suffix_length equal to 0 or 1
4178 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4180 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4182 level_code= (prefix<<suffix_length); //part
4183 }else if(prefix==14){
4185 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4187 level_code= prefix + get_bits(gb, 4); //part
4189 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4190 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4192 level_code += (1<<(prefix-3))-4096;
4195 if(trailing_ones < 3) level_code += 2;
4198 mask= -(level_code&1);
4199 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4201 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4204 if(level_code + 3U > 6U)
4206 level[trailing_ones]= level_code;
4209 //remaining coefficients have suffix_length > 0
4210 for(i=trailing_ones+1;i<total_coeff;i++) {
4211 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4212 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4213 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4215 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4216 if(level_code >= 100){
4217 prefix= level_code - 100;
4218 if(prefix == LEVEL_TAB_BITS){
4219 prefix += get_level_prefix(gb);
4222 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4224 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4226 level_code += (1<<(prefix-3))-4096;
4228 mask= -(level_code&1);
4229 level_code= (((2+level_code)>>1) ^ mask) - mask;
4231 level[i]= level_code;
4233 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4238 if(total_coeff == max_coeff)
4241 if(n == CHROMA_DC_BLOCK_INDEX)
4242 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4244 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4247 coeff_num = zeros_left + total_coeff - 1;
4248 j = scantable[coeff_num];
4250 block[j] = level[0];
4251 for(i=1;i<total_coeff;i++) {
4254 else if(zeros_left < 7){
4255 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4257 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4259 zeros_left -= run_before;
4260 coeff_num -= 1 + run_before;
4261 j= scantable[ coeff_num ];
4266 block[j] = (level[0] * qmul[j] + 32)>>6;
4267 for(i=1;i<total_coeff;i++) {
4270 else if(zeros_left < 7){
4271 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4273 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4275 zeros_left -= run_before;
4276 coeff_num -= 1 + run_before;
4277 j= scantable[ coeff_num ];
4279 block[j]= (level[i] * qmul[j] + 32)>>6;
4284 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4291 static void predict_field_decoding_flag(H264Context *h){
4292 MpegEncContext * const s = &h->s;
4293 const int mb_xy= h->mb_xy;
4294 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4295 ? s->current_picture.mb_type[mb_xy-1]
4296 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4297 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4299 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4303 * decodes a P_SKIP or B_SKIP macroblock
4305 static void decode_mb_skip(H264Context *h){
4306 MpegEncContext * const s = &h->s;
4307 const int mb_xy= h->mb_xy;
4310 memset(h->non_zero_count[mb_xy], 0, 16);
4311 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4314 mb_type|= MB_TYPE_INTERLACED;
4316 if( h->slice_type_nos == FF_B_TYPE )
4318 // just for fill_caches. pred_direct_motion will set the real mb_type
4319 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4321 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4322 pred_direct_motion(h, &mb_type);
4323 mb_type|= MB_TYPE_SKIP;
4328 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4330 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4331 pred_pskip_motion(h, &mx, &my);
4332 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4333 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4336 write_back_motion(h, mb_type);
4337 s->current_picture.mb_type[mb_xy]= mb_type;
4338 s->current_picture.qscale_table[mb_xy]= s->qscale;
4339 h->slice_table[ mb_xy ]= h->slice_num;
4340 h->prev_mb_skipped= 1;
4344 * decodes a macroblock
4345 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4347 static int decode_mb_cavlc(H264Context *h){
4348 MpegEncContext * const s = &h->s;
4350 int partition_count;
4351 unsigned int mb_type, cbp;
4352 int dct8x8_allowed= h->pps.transform_8x8_mode;
4354 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4356 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4357 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4359 if(h->slice_type_nos != FF_I_TYPE){
4360 if(s->mb_skip_run==-1)
4361 s->mb_skip_run= get_ue_golomb(&s->gb);
4363 if (s->mb_skip_run--) {
4364 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4365 if(s->mb_skip_run==0)
4366 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4368 predict_field_decoding_flag(h);
4375 if( (s->mb_y&1) == 0 )
4376 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4379 h->prev_mb_skipped= 0;
4381 mb_type= get_ue_golomb(&s->gb);
4382 if(h->slice_type_nos == FF_B_TYPE){
4384 partition_count= b_mb_type_info[mb_type].partition_count;
4385 mb_type= b_mb_type_info[mb_type].type;
4388 goto decode_intra_mb;
4390 }else if(h->slice_type_nos == FF_P_TYPE){
4392 partition_count= p_mb_type_info[mb_type].partition_count;
4393 mb_type= p_mb_type_info[mb_type].type;
4396 goto decode_intra_mb;
4399 assert(h->slice_type_nos == FF_I_TYPE);
4400 if(h->slice_type == FF_SI_TYPE && mb_type)
4404 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4408 cbp= i_mb_type_info[mb_type].cbp;
4409 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4410 mb_type= i_mb_type_info[mb_type].type;
4414 mb_type |= MB_TYPE_INTERLACED;
4416 h->slice_table[ mb_xy ]= h->slice_num;
4418 if(IS_INTRA_PCM(mb_type)){
4421 // We assume these blocks are very rare so we do not optimize it.
4422 align_get_bits(&s->gb);
4424 // The pixels are stored in the same order as levels in h->mb array.
4425 for(x=0; x < (CHROMA ? 384 : 256); x++){
4426 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4429 // In deblocking, the quantizer is 0
4430 s->current_picture.qscale_table[mb_xy]= 0;
4431 // All coeffs are present
4432 memset(h->non_zero_count[mb_xy], 16, 16);
4434 s->current_picture.mb_type[mb_xy]= mb_type;
4439 h->ref_count[0] <<= 1;
4440 h->ref_count[1] <<= 1;
4443 fill_caches(h, mb_type, 0);
4446 if(IS_INTRA(mb_type)){
4448 // init_top_left_availability(h);
4449 if(IS_INTRA4x4(mb_type)){
4452 if(dct8x8_allowed && get_bits1(&s->gb)){
4453 mb_type |= MB_TYPE_8x8DCT;
4457 // fill_intra4x4_pred_table(h);
4458 for(i=0; i<16; i+=di){
4459 int mode= pred_intra_mode(h, i);
4461 if(!get_bits1(&s->gb)){
4462 const int rem_mode= get_bits(&s->gb, 3);
4463 mode = rem_mode + (rem_mode >= mode);
4467 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4469 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4471 write_back_intra_pred_mode(h);
4472 if( check_intra4x4_pred_mode(h) < 0)
4475 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4476 if(h->intra16x16_pred_mode < 0)
4480 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4483 h->chroma_pred_mode= pred_mode;
4485 }else if(partition_count==4){
4486 int i, j, sub_partition_count[4], list, ref[2][4];
4488 if(h->slice_type_nos == FF_B_TYPE){
4490 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4491 if(h->sub_mb_type[i] >=13){
4492 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4495 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4496 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4498 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4499 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4500 pred_direct_motion(h, &mb_type);
4501 h->ref_cache[0][scan8[4]] =
4502 h->ref_cache[1][scan8[4]] =
4503 h->ref_cache[0][scan8[12]] =
4504 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4507 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4509 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4510 if(h->sub_mb_type[i] >=4){
4511 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4514 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4515 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4519 for(list=0; list<h->list_count; list++){
4520 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4522 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4523 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4527 }else if(ref_count == 2){
4528 tmp= get_bits1(&s->gb)^1;
4530 tmp= get_ue_golomb_31(&s->gb);
4532 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4545 dct8x8_allowed = get_dct8x8_allowed(h);
4547 for(list=0; list<h->list_count; list++){
4549 if(IS_DIRECT(h->sub_mb_type[i])) {
4550 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4553 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4554 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4556 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4557 const int sub_mb_type= h->sub_mb_type[i];
4558 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4559 for(j=0; j<sub_partition_count[i]; j++){
4561 const int index= 4*i + block_width*j;
4562 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4563 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4564 mx += get_se_golomb(&s->gb);
4565 my += get_se_golomb(&s->gb);
4566 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4568 if(IS_SUB_8X8(sub_mb_type)){
4570 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4572 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4573 }else if(IS_SUB_8X4(sub_mb_type)){
4574 mv_cache[ 1 ][0]= mx;
4575 mv_cache[ 1 ][1]= my;
4576 }else if(IS_SUB_4X8(sub_mb_type)){
4577 mv_cache[ 8 ][0]= mx;
4578 mv_cache[ 8 ][1]= my;
4580 mv_cache[ 0 ][0]= mx;
4581 mv_cache[ 0 ][1]= my;
4584 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4590 }else if(IS_DIRECT(mb_type)){
4591 pred_direct_motion(h, &mb_type);
4592 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4594 int list, mx, my, i;
4595 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4596 if(IS_16X16(mb_type)){
4597 for(list=0; list<h->list_count; list++){
4599 if(IS_DIR(mb_type, 0, list)){
4600 if(h->ref_count[list]==1){
4602 }else if(h->ref_count[list]==2){
4603 val= get_bits1(&s->gb)^1;
4605 val= get_ue_golomb_31(&s->gb);
4606 if(val >= h->ref_count[list]){
4607 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4612 val= LIST_NOT_USED&0xFF;
4613 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4615 for(list=0; list<h->list_count; list++){
4617 if(IS_DIR(mb_type, 0, list)){
4618 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4619 mx += get_se_golomb(&s->gb);
4620 my += get_se_golomb(&s->gb);
4621 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4623 val= pack16to32(mx,my);
4626 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4629 else if(IS_16X8(mb_type)){
4630 for(list=0; list<h->list_count; list++){
4633 if(IS_DIR(mb_type, i, list)){
4634 if(h->ref_count[list] == 1){
4636 }else if(h->ref_count[list] == 2){
4637 val= get_bits1(&s->gb)^1;
4639 val= get_ue_golomb_31(&s->gb);
4640 if(val >= h->ref_count[list]){
4641 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4646 val= LIST_NOT_USED&0xFF;
4647 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4650 for(list=0; list<h->list_count; list++){
4653 if(IS_DIR(mb_type, i, list)){
4654 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4655 mx += get_se_golomb(&s->gb);
4656 my += get_se_golomb(&s->gb);
4657 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4659 val= pack16to32(mx,my);
4662 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4666 assert(IS_8X16(mb_type));
4667 for(list=0; list<h->list_count; list++){
4670 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4671 if(h->ref_count[list]==1){
4673 }else if(h->ref_count[list]==2){
4674 val= get_bits1(&s->gb)^1;
4676 val= get_ue_golomb_31(&s->gb);
4677 if(val >= h->ref_count[list]){
4678 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4683 val= LIST_NOT_USED&0xFF;
4684 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4687 for(list=0; list<h->list_count; list++){
4690 if(IS_DIR(mb_type, i, list)){
4691 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4692 mx += get_se_golomb(&s->gb);
4693 my += get_se_golomb(&s->gb);
4694 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4696 val= pack16to32(mx,my);
4699 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4705 if(IS_INTER(mb_type))
4706 write_back_motion(h, mb_type);
4708 if(!IS_INTRA16x16(mb_type)){
4709 cbp= get_ue_golomb(&s->gb);
4711 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4716 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4717 else cbp= golomb_to_inter_cbp [cbp];
4719 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4720 else cbp= golomb_to_inter_cbp_gray[cbp];
4725 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4726 if(get_bits1(&s->gb)){
4727 mb_type |= MB_TYPE_8x8DCT;
4728 h->cbp_table[mb_xy]= cbp;
4731 s->current_picture.mb_type[mb_xy]= mb_type;
4733 if(cbp || IS_INTRA16x16(mb_type)){
4734 int i8x8, i4x4, chroma_idx;
4736 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4737 const uint8_t *scan, *scan8x8, *dc_scan;
4739 // fill_non_zero_count_cache(h);
4741 if(IS_INTERLACED(mb_type)){
4742 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4743 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4744 dc_scan= luma_dc_field_scan;
4746 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4747 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4748 dc_scan= luma_dc_zigzag_scan;
4751 dquant= get_se_golomb(&s->gb);
4753 if( dquant > 25 || dquant < -26 ){
4754 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4758 s->qscale += dquant;
4759 if(((unsigned)s->qscale) > 51){
4760 if(s->qscale<0) s->qscale+= 52;
4761 else s->qscale-= 52;
4764 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4765 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4766 if(IS_INTRA16x16(mb_type)){
4767 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4768 return -1; //FIXME continue if partitioned and other return -1 too
4771 assert((cbp&15) == 0 || (cbp&15) == 15);
4774 for(i8x8=0; i8x8<4; i8x8++){
4775 for(i4x4=0; i4x4<4; i4x4++){
4776 const int index= i4x4 + 4*i8x8;
4777 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4783 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4786 for(i8x8=0; i8x8<4; i8x8++){
4787 if(cbp & (1<<i8x8)){
4788 if(IS_8x8DCT(mb_type)){
4789 DCTELEM *buf = &h->mb[64*i8x8];
4791 for(i4x4=0; i4x4<4; i4x4++){
4792 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4793 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4796 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4797 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4799 for(i4x4=0; i4x4<4; i4x4++){
4800 const int index= i4x4 + 4*i8x8;
4802 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4808 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4809 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4815 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4816 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4822 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4823 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4824 for(i4x4=0; i4x4<4; i4x4++){
4825 const int index= 16 + 4*chroma_idx + i4x4;
4826 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4832 uint8_t * const nnz= &h->non_zero_count_cache[0];
4833 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4834 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4837 uint8_t * const nnz= &h->non_zero_count_cache[0];
4838 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4839 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4840 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4842 s->current_picture.qscale_table[mb_xy]= s->qscale;
4843 write_back_non_zero_count(h);
4846 h->ref_count[0] >>= 1;
4847 h->ref_count[1] >>= 1;
4853 static int decode_cabac_field_decoding_flag(H264Context *h) {
4854 MpegEncContext * const s = &h->s;
4855 const int mb_x = s->mb_x;
4856 const int mb_y = s->mb_y & ~1;
4857 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4858 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4860 unsigned int ctx = 0;
4862 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4865 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4869 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4872 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4873 uint8_t *state= &h->cabac_state[ctx_base];
4877 MpegEncContext * const s = &h->s;
4878 const int mba_xy = h->left_mb_xy[0];
4879 const int mbb_xy = h->top_mb_xy;
4881 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4883 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4885 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4886 return 0; /* I4x4 */
4889 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4890 return 0; /* I4x4 */
4893 if( get_cabac_terminate( &h->cabac ) )
4894 return 25; /* PCM */
4896 mb_type = 1; /* I16x16 */
4897 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4898 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4899 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4900 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4901 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4905 static int decode_cabac_mb_type_b( H264Context *h ) {
4906 MpegEncContext * const s = &h->s;
4908 const int mba_xy = h->left_mb_xy[0];
4909 const int mbb_xy = h->top_mb_xy;
4912 assert(h->slice_type_nos == FF_B_TYPE);
4914 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4916 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4919 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4920 return 0; /* B_Direct_16x16 */
4922 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4923 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4926 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4927 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4928 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4929 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4931 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4932 else if( bits == 13 ) {
4933 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4934 } else if( bits == 14 )
4935 return 11; /* B_L1_L0_8x16 */
4936 else if( bits == 15 )
4937 return 22; /* B_8x8 */
4939 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4940 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4943 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4944 MpegEncContext * const s = &h->s;
4948 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4949 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4952 && h->slice_table[mba_xy] == h->slice_num
4953 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4954 mba_xy += s->mb_stride;
4956 mbb_xy = mb_xy - s->mb_stride;
4958 && h->slice_table[mbb_xy] == h->slice_num
4959 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4960 mbb_xy -= s->mb_stride;
4962 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4964 int mb_xy = h->mb_xy;
4966 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4969 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4971 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4974 if( h->slice_type_nos == FF_B_TYPE )
4976 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4979 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4982 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4985 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4986 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4987 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4989 if( mode >= pred_mode )
4995 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4996 const int mba_xy = h->left_mb_xy[0];
4997 const int mbb_xy = h->top_mb_xy;
5001 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5002 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5005 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5008 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5011 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5013 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5019 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5020 int cbp_b, cbp_a, ctx, cbp = 0;
5022 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5023 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5025 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5026 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5027 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5028 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5029 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5030 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5031 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5032 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5035 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5039 cbp_a = (h->left_cbp>>4)&0x03;
5040 cbp_b = (h-> top_cbp>>4)&0x03;
5043 if( cbp_a > 0 ) ctx++;
5044 if( cbp_b > 0 ) ctx += 2;
5045 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5049 if( cbp_a == 2 ) ctx++;
5050 if( cbp_b == 2 ) ctx += 2;
5051 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5053 static int decode_cabac_mb_dqp( H264Context *h) {
5054 int ctx= h->last_qscale_diff != 0;
5057 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5060 if(val > 102) //prevent infinite loop
5065 return (val + 1)>>1 ;
5067 return -((val + 1)>>1);
5069 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5070 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5072 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5074 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5078 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5080 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5081 return 0; /* B_Direct_8x8 */
5082 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5083 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5085 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5086 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5087 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5090 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5091 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5095 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5096 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5099 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5100 int refa = h->ref_cache[list][scan8[n] - 1];
5101 int refb = h->ref_cache[list][scan8[n] - 8];
5105 if( h->slice_type_nos == FF_B_TYPE) {
5106 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5108 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5117 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5120 if(ref >= 32 /*h->ref_list[list]*/){
5127 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5128 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5129 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5130 int ctxbase = (l == 0) ? 40 : 47;
5132 int ctx = (amvd>2) + (amvd>32);
5134 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5139 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5147 while( get_cabac_bypass( &h->cabac ) ) {
5151 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5156 if( get_cabac_bypass( &h->cabac ) )
5160 return get_cabac_bypass_sign( &h->cabac, -mvd );
5163 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5169 nza = h->left_cbp&0x100;
5170 nzb = h-> top_cbp&0x100;
5172 nza = (h->left_cbp>>(6+idx))&0x01;
5173 nzb = (h-> top_cbp>>(6+idx))&0x01;
5176 assert(cat == 1 || cat == 2 || cat == 4);
5177 nza = h->non_zero_count_cache[scan8[idx] - 1];
5178 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5187 return ctx + 4 * cat;
5190 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5191 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5192 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5193 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5194 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5197 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5198 static const int significant_coeff_flag_offset[2][6] = {
5199 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5200 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5202 static const int last_coeff_flag_offset[2][6] = {
5203 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5204 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5206 static const int coeff_abs_level_m1_offset[6] = {
5207 227+0, 227+10, 227+20, 227+30, 227+39, 426
5209 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5210 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5211 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5212 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5213 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5214 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5215 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5216 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5217 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5219 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5220 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5221 * map node ctx => cabac ctx for level=1 */
5222 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5223 /* map node ctx => cabac ctx for level>1 */
5224 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5225 static const uint8_t coeff_abs_level_transition[2][8] = {
5226 /* update node ctx after decoding a level=1 */
5227 { 1, 2, 3, 3, 4, 5, 6, 7 },
5228 /* update node ctx after decoding a level>1 */
5229 { 4, 4, 4, 4, 5, 6, 7, 7 }
5235 int coeff_count = 0;
5238 uint8_t *significant_coeff_ctx_base;
5239 uint8_t *last_coeff_ctx_base;
5240 uint8_t *abs_level_m1_ctx_base;
5243 #define CABAC_ON_STACK
5245 #ifdef CABAC_ON_STACK
5248 cc.range = h->cabac.range;
5249 cc.low = h->cabac.low;
5250 cc.bytestream= h->cabac.bytestream;
5252 #define CC &h->cabac
5256 /* cat: 0-> DC 16x16 n = 0
5257 * 1-> AC 16x16 n = luma4x4idx
5258 * 2-> Luma4x4 n = luma4x4idx
5259 * 3-> DC Chroma n = iCbCr
5260 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5261 * 5-> Luma8x8 n = 4 * luma8x8idx
5264 /* read coded block flag */
5265 if( is_dc || cat != 5 ) {
5266 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5268 h->non_zero_count_cache[scan8[n]] = 0;
5270 #ifdef CABAC_ON_STACK
5271 h->cabac.range = cc.range ;
5272 h->cabac.low = cc.low ;
5273 h->cabac.bytestream= cc.bytestream;
5279 significant_coeff_ctx_base = h->cabac_state
5280 + significant_coeff_flag_offset[MB_FIELD][cat];
5281 last_coeff_ctx_base = h->cabac_state
5282 + last_coeff_flag_offset[MB_FIELD][cat];
5283 abs_level_m1_ctx_base = h->cabac_state
5284 + coeff_abs_level_m1_offset[cat];
5286 if( !is_dc && cat == 5 ) {
5287 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5288 for(last= 0; last < coefs; last++) { \
5289 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5290 if( get_cabac( CC, sig_ctx )) { \
5291 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5292 index[coeff_count++] = last; \
5293 if( get_cabac( CC, last_ctx ) ) { \
5299 if( last == max_coeff -1 ) {\
5300 index[coeff_count++] = last;\
5302 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5303 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5304 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5306 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5308 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5310 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5313 assert(coeff_count > 0);
5317 h->cbp_table[h->mb_xy] |= 0x100;
5319 h->cbp_table[h->mb_xy] |= 0x40 << n;
5322 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5324 assert( cat == 1 || cat == 2 || cat == 4 );
5325 h->non_zero_count_cache[scan8[n]] = coeff_count;
5330 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5332 int j= scantable[index[--coeff_count]];
5334 if( get_cabac( CC, ctx ) == 0 ) {
5335 node_ctx = coeff_abs_level_transition[0][node_ctx];
5337 block[j] = get_cabac_bypass_sign( CC, -1);
5339 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5343 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5344 node_ctx = coeff_abs_level_transition[1][node_ctx];
5346 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5350 if( coeff_abs >= 15 ) {
5352 while( get_cabac_bypass( CC ) ) {
5358 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5364 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5366 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5369 } while( coeff_count );
5370 #ifdef CABAC_ON_STACK
5371 h->cabac.range = cc.range ;
5372 h->cabac.low = cc.low ;
5373 h->cabac.bytestream= cc.bytestream;
5379 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5380 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5383 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5384 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5388 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5390 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5392 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5393 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5397 static inline void compute_mb_neighbors(H264Context *h)
5399 MpegEncContext * const s = &h->s;
5400 const int mb_xy = h->mb_xy;
5401 h->top_mb_xy = mb_xy - s->mb_stride;
5402 h->left_mb_xy[0] = mb_xy - 1;
5404 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5405 const int top_pair_xy = pair_xy - s->mb_stride;
5406 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5407 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5408 const int curr_mb_field_flag = MB_FIELD;
5409 const int bottom = (s->mb_y & 1);
5411 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5412 h->top_mb_xy -= s->mb_stride;
5414 if (!left_mb_field_flag == curr_mb_field_flag) {
5415 h->left_mb_xy[0] = pair_xy - 1;
5417 } else if (FIELD_PICTURE) {
5418 h->top_mb_xy -= s->mb_stride;
5424 * decodes a macroblock
5425 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5427 static int decode_mb_cabac(H264Context *h) {
5428 MpegEncContext * const s = &h->s;
5430 int mb_type, partition_count, cbp = 0;
5431 int dct8x8_allowed= h->pps.transform_8x8_mode;
5433 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5435 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5436 if( h->slice_type_nos != FF_I_TYPE ) {
5438 /* a skipped mb needs the aff flag from the following mb */
5439 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5440 predict_field_decoding_flag(h);
5441 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5442 skip = h->next_mb_skipped;
5444 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5445 /* read skip flags */
5447 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5448 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5449 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5450 if(!h->next_mb_skipped)
5451 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5456 h->cbp_table[mb_xy] = 0;
5457 h->chroma_pred_mode_table[mb_xy] = 0;
5458 h->last_qscale_diff = 0;
5465 if( (s->mb_y&1) == 0 )
5467 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5470 h->prev_mb_skipped = 0;
5472 compute_mb_neighbors(h);
5474 if( h->slice_type_nos == FF_B_TYPE ) {
5475 mb_type = decode_cabac_mb_type_b( h );
5477 partition_count= b_mb_type_info[mb_type].partition_count;
5478 mb_type= b_mb_type_info[mb_type].type;
5481 goto decode_intra_mb;
5483 } else if( h->slice_type_nos == FF_P_TYPE ) {
5484 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5486 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5487 /* P_L0_D16x16, P_8x8 */
5488 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5490 /* P_L0_D8x16, P_L0_D16x8 */
5491 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5493 partition_count= p_mb_type_info[mb_type].partition_count;
5494 mb_type= p_mb_type_info[mb_type].type;
5496 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5497 goto decode_intra_mb;
5500 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5501 if(h->slice_type == FF_SI_TYPE && mb_type)
5503 assert(h->slice_type_nos == FF_I_TYPE);
5505 partition_count = 0;
5506 cbp= i_mb_type_info[mb_type].cbp;
5507 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5508 mb_type= i_mb_type_info[mb_type].type;
5511 mb_type |= MB_TYPE_INTERLACED;
5513 h->slice_table[ mb_xy ]= h->slice_num;
5515 if(IS_INTRA_PCM(mb_type)) {
5518 // We assume these blocks are very rare so we do not optimize it.
5519 // FIXME The two following lines get the bitstream position in the cabac
5520 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5521 ptr= h->cabac.bytestream;
5522 if(h->cabac.low&0x1) ptr--;
5524 if(h->cabac.low&0x1FF) ptr--;
5527 // The pixels are stored in the same order as levels in h->mb array.
5528 memcpy(h->mb, ptr, 256); ptr+=256;
5530 memcpy(h->mb+128, ptr, 128); ptr+=128;
5533 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5535 // All blocks are present
5536 h->cbp_table[mb_xy] = 0x1ef;
5537 h->chroma_pred_mode_table[mb_xy] = 0;
5538 // In deblocking, the quantizer is 0
5539 s->current_picture.qscale_table[mb_xy]= 0;
5540 // All coeffs are present
5541 memset(h->non_zero_count[mb_xy], 16, 16);
5542 s->current_picture.mb_type[mb_xy]= mb_type;
5543 h->last_qscale_diff = 0;
5548 h->ref_count[0] <<= 1;
5549 h->ref_count[1] <<= 1;
5552 fill_caches(h, mb_type, 0);
5554 if( IS_INTRA( mb_type ) ) {
5556 if( IS_INTRA4x4( mb_type ) ) {
5557 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5558 mb_type |= MB_TYPE_8x8DCT;
5559 for( i = 0; i < 16; i+=4 ) {
5560 int pred = pred_intra_mode( h, i );
5561 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5562 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5565 for( i = 0; i < 16; i++ ) {
5566 int pred = pred_intra_mode( h, i );
5567 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5569 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5572 write_back_intra_pred_mode(h);
5573 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5575 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5576 if( h->intra16x16_pred_mode < 0 ) return -1;
5579 h->chroma_pred_mode_table[mb_xy] =
5580 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5582 pred_mode= check_intra_pred_mode( h, pred_mode );
5583 if( pred_mode < 0 ) return -1;
5584 h->chroma_pred_mode= pred_mode;
5586 } else if( partition_count == 4 ) {
5587 int i, j, sub_partition_count[4], list, ref[2][4];
5589 if( h->slice_type_nos == FF_B_TYPE ) {
5590 for( i = 0; i < 4; i++ ) {
5591 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5592 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5593 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5595 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5596 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5597 pred_direct_motion(h, &mb_type);
5598 h->ref_cache[0][scan8[4]] =
5599 h->ref_cache[1][scan8[4]] =
5600 h->ref_cache[0][scan8[12]] =
5601 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5602 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5603 for( i = 0; i < 4; i++ )
5604 if( IS_DIRECT(h->sub_mb_type[i]) )
5605 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5609 for( i = 0; i < 4; i++ ) {
5610 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5611 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5612 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5616 for( list = 0; list < h->list_count; list++ ) {
5617 for( i = 0; i < 4; i++ ) {
5618 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5619 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5620 if( h->ref_count[list] > 1 ){
5621 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5622 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5623 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5631 h->ref_cache[list][ scan8[4*i]+1 ]=
5632 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5637 dct8x8_allowed = get_dct8x8_allowed(h);
5639 for(list=0; list<h->list_count; list++){
5641 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5642 if(IS_DIRECT(h->sub_mb_type[i])){
5643 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5647 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5648 const int sub_mb_type= h->sub_mb_type[i];
5649 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5650 for(j=0; j<sub_partition_count[i]; j++){
5653 const int index= 4*i + block_width*j;
5654 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5655 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5656 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5658 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5659 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5660 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5662 if(IS_SUB_8X8(sub_mb_type)){
5664 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5666 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5669 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5671 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5672 }else if(IS_SUB_8X4(sub_mb_type)){
5673 mv_cache[ 1 ][0]= mx;
5674 mv_cache[ 1 ][1]= my;
5676 mvd_cache[ 1 ][0]= mx - mpx;
5677 mvd_cache[ 1 ][1]= my - mpy;
5678 }else if(IS_SUB_4X8(sub_mb_type)){
5679 mv_cache[ 8 ][0]= mx;
5680 mv_cache[ 8 ][1]= my;
5682 mvd_cache[ 8 ][0]= mx - mpx;
5683 mvd_cache[ 8 ][1]= my - mpy;
5685 mv_cache[ 0 ][0]= mx;
5686 mv_cache[ 0 ][1]= my;
5688 mvd_cache[ 0 ][0]= mx - mpx;
5689 mvd_cache[ 0 ][1]= my - mpy;
5692 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5693 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5694 p[0] = p[1] = p[8] = p[9] = 0;
5695 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5699 } else if( IS_DIRECT(mb_type) ) {
5700 pred_direct_motion(h, &mb_type);
5701 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5702 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5703 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5705 int list, mx, my, i, mpx, mpy;
5706 if(IS_16X16(mb_type)){
5707 for(list=0; list<h->list_count; list++){
5708 if(IS_DIR(mb_type, 0, list)){
5710 if(h->ref_count[list] > 1){
5711 ref= decode_cabac_mb_ref(h, list, 0);
5712 if(ref >= (unsigned)h->ref_count[list]){
5713 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5718 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5720 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5722 for(list=0; list<h->list_count; list++){
5723 if(IS_DIR(mb_type, 0, list)){
5724 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5726 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5727 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5728 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5730 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5731 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5733 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5736 else if(IS_16X8(mb_type)){
5737 for(list=0; list<h->list_count; list++){
5739 if(IS_DIR(mb_type, i, list)){
5741 if(h->ref_count[list] > 1){
5742 ref= decode_cabac_mb_ref( h, list, 8*i );
5743 if(ref >= (unsigned)h->ref_count[list]){
5744 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5749 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5751 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5754 for(list=0; list<h->list_count; list++){
5756 if(IS_DIR(mb_type, i, list)){
5757 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5758 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5759 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5760 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5762 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5763 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5765 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5766 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5771 assert(IS_8X16(mb_type));
5772 for(list=0; list<h->list_count; list++){
5774 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5776 if(h->ref_count[list] > 1){
5777 ref= decode_cabac_mb_ref( h, list, 4*i );
5778 if(ref >= (unsigned)h->ref_count[list]){
5779 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5784 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5786 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5789 for(list=0; list<h->list_count; list++){
5791 if(IS_DIR(mb_type, i, list)){
5792 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5793 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5794 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5796 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5797 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5798 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5800 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5801 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5808 if( IS_INTER( mb_type ) ) {
5809 h->chroma_pred_mode_table[mb_xy] = 0;
5810 write_back_motion( h, mb_type );
5813 if( !IS_INTRA16x16( mb_type ) ) {
5814 cbp = decode_cabac_mb_cbp_luma( h );
5816 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5819 h->cbp_table[mb_xy] = h->cbp = cbp;
5821 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5822 if( decode_cabac_mb_transform_size( h ) )
5823 mb_type |= MB_TYPE_8x8DCT;
5825 s->current_picture.mb_type[mb_xy]= mb_type;
5827 if( cbp || IS_INTRA16x16( mb_type ) ) {
5828 const uint8_t *scan, *scan8x8, *dc_scan;
5829 const uint32_t *qmul;
5832 if(IS_INTERLACED(mb_type)){
5833 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5834 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5835 dc_scan= luma_dc_field_scan;
5837 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5838 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5839 dc_scan= luma_dc_zigzag_scan;
5842 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5843 if( dqp == INT_MIN ){
5844 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5848 if(((unsigned)s->qscale) > 51){
5849 if(s->qscale<0) s->qscale+= 52;
5850 else s->qscale-= 52;
5852 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5853 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5855 if( IS_INTRA16x16( mb_type ) ) {
5857 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5858 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5861 qmul = h->dequant4_coeff[0][s->qscale];
5862 for( i = 0; i < 16; i++ ) {
5863 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5864 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5867 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5871 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5872 if( cbp & (1<<i8x8) ) {
5873 if( IS_8x8DCT(mb_type) ) {
5874 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5875 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5877 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5878 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5879 const int index = 4*i8x8 + i4x4;
5880 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5882 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5883 //STOP_TIMER("decode_residual")
5887 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5888 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5895 for( c = 0; c < 2; c++ ) {
5896 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5897 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5903 for( c = 0; c < 2; c++ ) {
5904 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5905 for( i = 0; i < 4; i++ ) {
5906 const int index = 16 + 4 * c + i;
5907 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5908 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5912 uint8_t * const nnz= &h->non_zero_count_cache[0];
5913 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5914 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5917 uint8_t * const nnz= &h->non_zero_count_cache[0];
5918 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5919 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5920 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5921 h->last_qscale_diff = 0;
5924 s->current_picture.qscale_table[mb_xy]= s->qscale;
5925 write_back_non_zero_count(h);
5928 h->ref_count[0] >>= 1;
5929 h->ref_count[1] >>= 1;
5936 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5937 const int index_a = qp + h->slice_alpha_c0_offset;
5938 const int alpha = (alpha_table+52)[index_a];
5939 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5943 tc[0] = (tc0_table+52)[index_a][bS[0]];
5944 tc[1] = (tc0_table+52)[index_a][bS[1]];
5945 tc[2] = (tc0_table+52)[index_a][bS[2]];
5946 tc[3] = (tc0_table+52)[index_a][bS[3]];
5947 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5949 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5952 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5953 const int index_a = qp + h->slice_alpha_c0_offset;
5954 const int alpha = (alpha_table+52)[index_a];
5955 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5959 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5960 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5961 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5962 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5963 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5965 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5969 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5971 for( i = 0; i < 16; i++, pix += stride) {
5977 int bS_index = (i >> 1);
5980 bS_index |= (i & 1);
5983 if( bS[bS_index] == 0 ) {
5987 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5988 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5989 alpha = (alpha_table+52)[index_a];
5990 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5992 if( bS[bS_index] < 4 ) {
5993 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5994 const int p0 = pix[-1];
5995 const int p1 = pix[-2];
5996 const int p2 = pix[-3];
5997 const int q0 = pix[0];
5998 const int q1 = pix[1];
5999 const int q2 = pix[2];
6001 if( FFABS( p0 - q0 ) < alpha &&
6002 FFABS( p1 - p0 ) < beta &&
6003 FFABS( q1 - q0 ) < beta ) {
6007 if( FFABS( p2 - p0 ) < beta ) {
6008 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6011 if( FFABS( q2 - q0 ) < beta ) {
6012 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6016 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6017 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6018 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6019 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6022 const int p0 = pix[-1];
6023 const int p1 = pix[-2];
6024 const int p2 = pix[-3];
6026 const int q0 = pix[0];
6027 const int q1 = pix[1];
6028 const int q2 = pix[2];
6030 if( FFABS( p0 - q0 ) < alpha &&
6031 FFABS( p1 - p0 ) < beta &&
6032 FFABS( q1 - q0 ) < beta ) {
6034 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6035 if( FFABS( p2 - p0 ) < beta)
6037 const int p3 = pix[-4];
6039 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6040 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6041 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6044 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6046 if( FFABS( q2 - q0 ) < beta)
6048 const int q3 = pix[3];
6050 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6051 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6052 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6055 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6059 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6060 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6062 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6067 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6069 for( i = 0; i < 8; i++, pix += stride) {
6077 if( bS[bS_index] == 0 ) {
6081 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6082 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6083 alpha = (alpha_table+52)[index_a];
6084 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6086 if( bS[bS_index] < 4 ) {
6087 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6088 const int p0 = pix[-1];
6089 const int p1 = pix[-2];
6090 const int q0 = pix[0];
6091 const int q1 = pix[1];
6093 if( FFABS( p0 - q0 ) < alpha &&
6094 FFABS( p1 - p0 ) < beta &&
6095 FFABS( q1 - q0 ) < beta ) {
6096 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6098 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6099 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6100 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6103 const int p0 = pix[-1];
6104 const int p1 = pix[-2];
6105 const int q0 = pix[0];
6106 const int q1 = pix[1];
6108 if( FFABS( p0 - q0 ) < alpha &&
6109 FFABS( p1 - p0 ) < beta &&
6110 FFABS( q1 - q0 ) < beta ) {
6112 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6113 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6114 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6120 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6121 const int index_a = qp + h->slice_alpha_c0_offset;
6122 const int alpha = (alpha_table+52)[index_a];
6123 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6127 tc[0] = (tc0_table+52)[index_a][bS[0]];
6128 tc[1] = (tc0_table+52)[index_a][bS[1]];
6129 tc[2] = (tc0_table+52)[index_a][bS[2]];
6130 tc[3] = (tc0_table+52)[index_a][bS[3]];
6131 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6133 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6137 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6138 const int index_a = qp + h->slice_alpha_c0_offset;
6139 const int alpha = (alpha_table+52)[index_a];
6140 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6144 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6145 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6146 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6147 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6148 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6150 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6154 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6155 MpegEncContext * const s = &h->s;
6156 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6158 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6162 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6163 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6164 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6165 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6166 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6169 assert(!FRAME_MBAFF);
6171 mb_type = s->current_picture.mb_type[mb_xy];
6172 qp = s->current_picture.qscale_table[mb_xy];
6173 qp0 = s->current_picture.qscale_table[mb_xy-1];
6174 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6175 qpc = get_chroma_qp( h, 0, qp );
6176 qpc0 = get_chroma_qp( h, 0, qp0 );
6177 qpc1 = get_chroma_qp( h, 0, qp1 );
6178 qp0 = (qp + qp0 + 1) >> 1;
6179 qp1 = (qp + qp1 + 1) >> 1;
6180 qpc0 = (qpc + qpc0 + 1) >> 1;
6181 qpc1 = (qpc + qpc1 + 1) >> 1;
6182 qp_thresh = 15 - h->slice_alpha_c0_offset;
6183 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6184 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6187 if( IS_INTRA(mb_type) ) {
6188 int16_t bS4[4] = {4,4,4,4};
6189 int16_t bS3[4] = {3,3,3,3};
6190 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6191 if( IS_8x8DCT(mb_type) ) {
6192 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6193 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6194 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6195 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6197 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6198 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6199 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6200 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6201 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6202 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6203 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6204 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6206 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6207 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6208 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6209 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6210 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6211 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6212 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6213 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6216 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6217 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6219 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6221 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6223 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6224 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6225 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6226 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6228 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6229 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6230 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6231 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6233 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6234 bSv[0][0] = 0x0004000400040004ULL;
6235 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6236 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6238 #define FILTER(hv,dir,edge)\
6239 if(bSv[dir][edge]) {\
6240 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6242 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6243 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6249 } else if( IS_8x8DCT(mb_type) ) {
6269 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6270 MpegEncContext * const s = &h->s;
6272 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6273 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6274 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6275 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6276 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6278 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6279 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6280 // how often to recheck mv-based bS when iterating between edges
6281 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6282 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6283 // how often to recheck mv-based bS when iterating along each edge
6284 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6286 if (first_vertical_edge_done) {
6290 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6293 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6294 && !IS_INTERLACED(mb_type)
6295 && IS_INTERLACED(mbm_type)
6297 // This is a special case in the norm where the filtering must
6298 // be done twice (one each of the field) even if we are in a
6299 // frame macroblock.
6301 static const int nnz_idx[4] = {4,5,6,3};
6302 unsigned int tmp_linesize = 2 * linesize;
6303 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6304 int mbn_xy = mb_xy - 2 * s->mb_stride;
6309 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6310 if( IS_INTRA(mb_type) ||
6311 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6312 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6314 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6315 for( i = 0; i < 4; i++ ) {
6316 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6317 mbn_nnz[nnz_idx[i]] != 0 )
6323 // Do not use s->qscale as luma quantizer because it has not the same
6324 // value in IPCM macroblocks.
6325 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6326 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6327 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6328 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6329 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6330 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6331 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6332 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6339 for( edge = start; edge < edges; edge++ ) {
6340 /* mbn_xy: neighbor macroblock */
6341 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6342 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6343 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6347 if( (edge&1) && IS_8x8DCT(mb_type) )
6350 if( IS_INTRA(mb_type) ||
6351 IS_INTRA(mbn_type) ) {
6354 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6355 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6364 bS[0] = bS[1] = bS[2] = bS[3] = value;
6369 if( edge & mask_edge ) {
6370 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6373 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6374 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6377 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6378 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6379 int bn_idx= b_idx - (dir ? 8:1);
6382 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6383 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6384 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6385 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6388 if(h->slice_type_nos == FF_B_TYPE && v){
6390 for( l = 0; !v && l < 2; l++ ) {
6392 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6393 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6394 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6398 bS[0] = bS[1] = bS[2] = bS[3] = v;
6404 for( i = 0; i < 4; i++ ) {
6405 int x = dir == 0 ? edge : i;
6406 int y = dir == 0 ? i : edge;
6407 int b_idx= 8 + 4 + x + 8*y;
6408 int bn_idx= b_idx - (dir ? 8:1);
6410 if( h->non_zero_count_cache[b_idx] |
6411 h->non_zero_count_cache[bn_idx] ) {
6417 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6418 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6419 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6420 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6426 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6428 for( l = 0; l < 2; l++ ) {
6430 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6431 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6432 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6441 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6446 // Do not use s->qscale as luma quantizer because it has not the same
6447 // value in IPCM macroblocks.
6448 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6449 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6450 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6451 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6453 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6454 if( (edge&1) == 0 ) {
6455 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6456 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6457 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6458 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6461 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6462 if( (edge&1) == 0 ) {
6463 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6464 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6465 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6466 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6472 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6473 MpegEncContext * const s = &h->s;
6474 const int mb_xy= mb_x + mb_y*s->mb_stride;
6475 const int mb_type = s->current_picture.mb_type[mb_xy];
6476 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6477 int first_vertical_edge_done = 0;
6480 //for sufficiently low qp, filtering wouldn't do anything
6481 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6483 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6484 int qp = s->current_picture.qscale_table[mb_xy];
6486 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6487 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6492 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6493 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6494 int top_type, left_type[2];
6495 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6496 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6497 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6499 if(IS_8x8DCT(top_type)){
6500 h->non_zero_count_cache[4+8*0]=
6501 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6502 h->non_zero_count_cache[6+8*0]=
6503 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6505 if(IS_8x8DCT(left_type[0])){
6506 h->non_zero_count_cache[3+8*1]=
6507 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6509 if(IS_8x8DCT(left_type[1])){
6510 h->non_zero_count_cache[3+8*3]=
6511 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6514 if(IS_8x8DCT(mb_type)){
6515 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6516 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6518 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6519 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6521 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6522 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6524 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6525 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6530 // left mb is in picture
6531 && h->slice_table[mb_xy-1] != 0xFFFF
6532 // and current and left pair do not have the same interlaced type
6533 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6534 // and left mb is in the same slice if deblocking_filter == 2
6535 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6536 /* First vertical edge is different in MBAFF frames
6537 * There are 8 different bS to compute and 2 different Qp
6539 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6540 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6545 int mb_qp, mbn0_qp, mbn1_qp;
6547 first_vertical_edge_done = 1;
6549 if( IS_INTRA(mb_type) )
6550 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6552 for( i = 0; i < 8; i++ ) {
6553 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6555 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6557 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6558 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6559 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6561 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6568 mb_qp = s->current_picture.qscale_table[mb_xy];
6569 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6570 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6571 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6572 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6573 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6574 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6575 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6576 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6577 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6578 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6579 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6580 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6583 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6584 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6585 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6586 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6587 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6591 for( dir = 0; dir < 2; dir++ )
6592 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6594 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6595 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6599 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6600 H264Context *h = *(void**)arg;
6601 MpegEncContext * const s = &h->s;
6602 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6606 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6607 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6609 if( h->pps.cabac ) {
6613 align_get_bits( &s->gb );
6616 ff_init_cabac_states( &h->cabac);
6617 ff_init_cabac_decoder( &h->cabac,
6618 s->gb.buffer + get_bits_count(&s->gb)/8,
6619 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6620 /* calculate pre-state */
6621 for( i= 0; i < 460; i++ ) {
6623 if( h->slice_type_nos == FF_I_TYPE )
6624 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6626 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6629 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6631 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6636 int ret = decode_mb_cabac(h);
6638 //STOP_TIMER("decode_mb_cabac")
6640 if(ret>=0) hl_decode_mb(h);
6642 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6645 ret = decode_mb_cabac(h);
6647 if(ret>=0) hl_decode_mb(h);
6650 eos = get_cabac_terminate( &h->cabac );
6652 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6653 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6654 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6658 if( ++s->mb_x >= s->mb_width ) {
6660 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6662 if(FIELD_OR_MBAFF_PICTURE) {
6667 if( eos || s->mb_y >= s->mb_height ) {
6668 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6669 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6676 int ret = decode_mb_cavlc(h);
6678 if(ret>=0) hl_decode_mb(h);
6680 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6682 ret = decode_mb_cavlc(h);
6684 if(ret>=0) hl_decode_mb(h);
6689 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6690 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6695 if(++s->mb_x >= s->mb_width){
6697 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6699 if(FIELD_OR_MBAFF_PICTURE) {
6702 if(s->mb_y >= s->mb_height){
6703 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6705 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6706 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6710 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6717 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6718 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6719 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6720 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6724 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6733 for(;s->mb_y < s->mb_height; s->mb_y++){
6734 for(;s->mb_x < s->mb_width; s->mb_x++){
6735 int ret= decode_mb(h);
6740 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6741 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6746 if(++s->mb_x >= s->mb_width){
6748 if(++s->mb_y >= s->mb_height){
6749 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6754 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6761 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6762 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6763 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6767 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6774 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6777 return -1; //not reached
6780 static int decode_picture_timing(H264Context *h){
6781 MpegEncContext * const s = &h->s;
6782 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6783 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6784 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6786 if(h->sps.pic_struct_present_flag){
6787 unsigned int i, num_clock_ts;
6788 h->sei_pic_struct = get_bits(&s->gb, 4);
6790 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6793 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6795 for (i = 0 ; i < num_clock_ts ; i++){
6796 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6797 unsigned int full_timestamp_flag;
6798 skip_bits(&s->gb, 2); /* ct_type */
6799 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6800 skip_bits(&s->gb, 5); /* counting_type */
6801 full_timestamp_flag = get_bits(&s->gb, 1);
6802 skip_bits(&s->gb, 1); /* discontinuity_flag */
6803 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6804 skip_bits(&s->gb, 8); /* n_frames */
6805 if(full_timestamp_flag){
6806 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6807 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6808 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6810 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6811 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6812 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6813 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6814 if(get_bits(&s->gb, 1)) /* hours_flag */
6815 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6819 if(h->sps.time_offset_length > 0)
6820 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6827 static int decode_unregistered_user_data(H264Context *h, int size){
6828 MpegEncContext * const s = &h->s;
6829 uint8_t user_data[16+256];
6835 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6836 user_data[i]= get_bits(&s->gb, 8);
6840 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6841 if(e==1 && build>=0)
6842 h->x264_build= build;
6844 if(s->avctx->debug & FF_DEBUG_BUGS)
6845 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6848 skip_bits(&s->gb, 8);
6853 static int decode_recovery_point(H264Context *h){
6854 MpegEncContext * const s = &h->s;
6856 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6857 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6862 static int decode_sei(H264Context *h){
6863 MpegEncContext * const s = &h->s;
6865 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6870 type+= show_bits(&s->gb, 8);
6871 }while(get_bits(&s->gb, 8) == 255);
6875 size+= show_bits(&s->gb, 8);
6876 }while(get_bits(&s->gb, 8) == 255);
6879 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6880 if(decode_picture_timing(h) < 0)
6883 case SEI_TYPE_USER_DATA_UNREGISTERED:
6884 if(decode_unregistered_user_data(h, size) < 0)
6887 case SEI_TYPE_RECOVERY_POINT:
6888 if(decode_recovery_point(h) < 0)
6892 skip_bits(&s->gb, 8*size);
6895 //FIXME check bits here
6896 align_get_bits(&s->gb);
6902 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6903 MpegEncContext * const s = &h->s;
6905 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6907 if(cpb_count > 32U){
6908 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6912 get_bits(&s->gb, 4); /* bit_rate_scale */
6913 get_bits(&s->gb, 4); /* cpb_size_scale */
6914 for(i=0; i<cpb_count; i++){
6915 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6916 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6917 get_bits1(&s->gb); /* cbr_flag */
6919 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6920 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6921 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6922 sps->time_offset_length = get_bits(&s->gb, 5);
6926 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6927 MpegEncContext * const s = &h->s;
6928 int aspect_ratio_info_present_flag;
6929 unsigned int aspect_ratio_idc;
6931 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6933 if( aspect_ratio_info_present_flag ) {
6934 aspect_ratio_idc= get_bits(&s->gb, 8);
6935 if( aspect_ratio_idc == EXTENDED_SAR ) {
6936 sps->sar.num= get_bits(&s->gb, 16);
6937 sps->sar.den= get_bits(&s->gb, 16);
6938 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6939 sps->sar= pixel_aspect[aspect_ratio_idc];
6941 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6948 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6950 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6951 get_bits1(&s->gb); /* overscan_appropriate_flag */
6954 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6955 get_bits(&s->gb, 3); /* video_format */
6956 get_bits1(&s->gb); /* video_full_range_flag */
6957 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6958 get_bits(&s->gb, 8); /* colour_primaries */
6959 get_bits(&s->gb, 8); /* transfer_characteristics */
6960 get_bits(&s->gb, 8); /* matrix_coefficients */
6964 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6965 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6966 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6969 sps->timing_info_present_flag = get_bits1(&s->gb);
6970 if(sps->timing_info_present_flag){
6971 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6972 sps->time_scale = get_bits_long(&s->gb, 32);
6973 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6976 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6977 if(sps->nal_hrd_parameters_present_flag)
6978 if(decode_hrd_parameters(h, sps) < 0)
6980 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6981 if(sps->vcl_hrd_parameters_present_flag)
6982 if(decode_hrd_parameters(h, sps) < 0)
6984 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6985 get_bits1(&s->gb); /* low_delay_hrd_flag */
6986 sps->pic_struct_present_flag = get_bits1(&s->gb);
6988 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6989 if(sps->bitstream_restriction_flag){
6990 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6991 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6992 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6993 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6994 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6995 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6996 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6998 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6999 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7007 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7008 const uint8_t *jvt_list, const uint8_t *fallback_list){
7009 MpegEncContext * const s = &h->s;
7010 int i, last = 8, next = 8;
7011 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7012 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7013 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7015 for(i=0;i<size;i++){
7017 next = (last + get_se_golomb(&s->gb)) & 0xff;
7018 if(!i && !next){ /* matrix not written, we use the preset one */
7019 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7022 last = factors[scan[i]] = next ? next : last;
7026 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7027 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7028 MpegEncContext * const s = &h->s;
7029 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7030 const uint8_t *fallback[4] = {
7031 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7032 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7033 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7034 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7036 if(get_bits1(&s->gb)){
7037 sps->scaling_matrix_present |= is_sps;
7038 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7039 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7040 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7041 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7042 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7043 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7044 if(is_sps || pps->transform_8x8_mode){
7045 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7046 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7051 static inline int decode_seq_parameter_set(H264Context *h){
7052 MpegEncContext * const s = &h->s;
7053 int profile_idc, level_idc;
7054 unsigned int sps_id;
7058 profile_idc= get_bits(&s->gb, 8);
7059 get_bits1(&s->gb); //constraint_set0_flag
7060 get_bits1(&s->gb); //constraint_set1_flag
7061 get_bits1(&s->gb); //constraint_set2_flag
7062 get_bits1(&s->gb); //constraint_set3_flag
7063 get_bits(&s->gb, 4); // reserved
7064 level_idc= get_bits(&s->gb, 8);
7065 sps_id= get_ue_golomb_31(&s->gb);
7067 if(sps_id >= MAX_SPS_COUNT) {
7068 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7071 sps= av_mallocz(sizeof(SPS));
7075 sps->profile_idc= profile_idc;
7076 sps->level_idc= level_idc;
7078 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7079 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7080 sps->scaling_matrix_present = 0;
7082 if(sps->profile_idc >= 100){ //high profile
7083 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7084 if(sps->chroma_format_idc == 3)
7085 sps->residual_color_transform_flag = get_bits1(&s->gb);
7086 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7087 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7088 sps->transform_bypass = get_bits1(&s->gb);
7089 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7091 sps->chroma_format_idc= 1;
7094 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7095 sps->poc_type= get_ue_golomb_31(&s->gb);
7097 if(sps->poc_type == 0){ //FIXME #define
7098 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7099 } else if(sps->poc_type == 1){//FIXME #define
7100 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7101 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7102 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7103 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7105 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7106 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7110 for(i=0; i<sps->poc_cycle_length; i++)
7111 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7112 }else if(sps->poc_type != 2){
7113 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7117 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7118 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7119 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7122 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7123 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7124 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7125 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7126 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7127 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7131 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7132 if(!sps->frame_mbs_only_flag)
7133 sps->mb_aff= get_bits1(&s->gb);
7137 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7139 #ifndef ALLOW_INTERLACE
7141 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7143 sps->crop= get_bits1(&s->gb);
7145 sps->crop_left = get_ue_golomb(&s->gb);
7146 sps->crop_right = get_ue_golomb(&s->gb);
7147 sps->crop_top = get_ue_golomb(&s->gb);
7148 sps->crop_bottom= get_ue_golomb(&s->gb);
7149 if(sps->crop_left || sps->crop_top){
7150 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7152 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7153 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7159 sps->crop_bottom= 0;
7162 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7163 if( sps->vui_parameters_present_flag )
7164 decode_vui_parameters(h, sps);
7166 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7167 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7168 sps_id, sps->profile_idc, sps->level_idc,
7170 sps->ref_frame_count,
7171 sps->mb_width, sps->mb_height,
7172 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7173 sps->direct_8x8_inference_flag ? "8B8" : "",
7174 sps->crop_left, sps->crop_right,
7175 sps->crop_top, sps->crop_bottom,
7176 sps->vui_parameters_present_flag ? "VUI" : "",
7177 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7181 av_free(h->sps_buffers[sps_id]);
7182 h->sps_buffers[sps_id]= sps;
7190 build_qp_table(PPS *pps, int t, int index)
7193 for(i = 0; i < 52; i++)
7194 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7197 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7198 MpegEncContext * const s = &h->s;
7199 unsigned int pps_id= get_ue_golomb(&s->gb);
7202 if(pps_id >= MAX_PPS_COUNT) {
7203 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7207 pps= av_mallocz(sizeof(PPS));
7210 pps->sps_id= get_ue_golomb_31(&s->gb);
7211 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7212 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7216 pps->cabac= get_bits1(&s->gb);
7217 pps->pic_order_present= get_bits1(&s->gb);
7218 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7219 if(pps->slice_group_count > 1 ){
7220 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7221 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7222 switch(pps->mb_slice_group_map_type){
7225 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7226 | run_length[ i ] |1 |ue(v) |
7231 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7233 | top_left_mb[ i ] |1 |ue(v) |
7234 | bottom_right_mb[ i ] |1 |ue(v) |
7242 | slice_group_change_direction_flag |1 |u(1) |
7243 | slice_group_change_rate_minus1 |1 |ue(v) |
7248 | slice_group_id_cnt_minus1 |1 |ue(v) |
7249 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7251 | slice_group_id[ i ] |1 |u(v) |
7256 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7257 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7258 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7259 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7263 pps->weighted_pred= get_bits1(&s->gb);
7264 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7265 pps->init_qp= get_se_golomb(&s->gb) + 26;
7266 pps->init_qs= get_se_golomb(&s->gb) + 26;
7267 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7268 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7269 pps->constrained_intra_pred= get_bits1(&s->gb);
7270 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7272 pps->transform_8x8_mode= 0;
7273 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7274 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7275 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7277 if(get_bits_count(&s->gb) < bit_length){
7278 pps->transform_8x8_mode= get_bits1(&s->gb);
7279 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7280 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7282 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7285 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7286 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7287 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7288 h->pps.chroma_qp_diff= 1;
7290 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7291 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7292 pps_id, pps->sps_id,
7293 pps->cabac ? "CABAC" : "CAVLC",
7294 pps->slice_group_count,
7295 pps->ref_count[0], pps->ref_count[1],
7296 pps->weighted_pred ? "weighted" : "",
7297 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7298 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7299 pps->constrained_intra_pred ? "CONSTR" : "",
7300 pps->redundant_pic_cnt_present ? "REDU" : "",
7301 pps->transform_8x8_mode ? "8x8DCT" : ""
7305 av_free(h->pps_buffers[pps_id]);
7306 h->pps_buffers[pps_id]= pps;
7314 * Call decode_slice() for each context.
7316 * @param h h264 master context
7317 * @param context_count number of contexts to execute
7319 static void execute_decode_slices(H264Context *h, int context_count){
7320 MpegEncContext * const s = &h->s;
7321 AVCodecContext * const avctx= s->avctx;
7325 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7327 if(context_count == 1) {
7328 decode_slice(avctx, &h);
7330 for(i = 1; i < context_count; i++) {
7331 hx = h->thread_context[i];
7332 hx->s.error_recognition = avctx->error_recognition;
7333 hx->s.error_count = 0;
7336 avctx->execute(avctx, (void *)decode_slice,
7337 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7339 /* pull back stuff from slices to master context */
7340 hx = h->thread_context[context_count - 1];
7341 s->mb_x = hx->s.mb_x;
7342 s->mb_y = hx->s.mb_y;
7343 s->dropable = hx->s.dropable;
7344 s->picture_structure = hx->s.picture_structure;
7345 for(i = 1; i < context_count; i++)
7346 h->s.error_count += h->thread_context[i]->s.error_count;
7351 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7352 MpegEncContext * const s = &h->s;
7353 AVCodecContext * const avctx= s->avctx;
7355 H264Context *hx; ///< thread context
7356 int context_count = 0;
7358 h->max_contexts = avctx->thread_count;
7361 for(i=0; i<50; i++){
7362 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7365 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7366 h->current_slice = 0;
7367 if (!s->first_field)
7368 s->current_picture_ptr= NULL;
7380 if(buf_index >= buf_size) break;
7382 for(i = 0; i < h->nal_length_size; i++)
7383 nalsize = (nalsize << 8) | buf[buf_index++];
7384 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7389 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7394 // start code prefix search
7395 for(; buf_index + 3 < buf_size; buf_index++){
7396 // This should always succeed in the first iteration.
7397 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7401 if(buf_index+3 >= buf_size) break;
7406 hx = h->thread_context[context_count];
7408 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7409 if (ptr==NULL || dst_length < 0){
7412 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7414 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7416 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7417 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7420 if (h->is_avc && (nalsize != consumed)){
7421 int i, debug_level = AV_LOG_DEBUG;
7422 for (i = consumed; i < nalsize; i++)
7423 if (buf[buf_index+i])
7424 debug_level = AV_LOG_ERROR;
7425 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7429 buf_index += consumed;
7431 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7432 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7437 switch(hx->nal_unit_type){
7439 if (h->nal_unit_type != NAL_IDR_SLICE) {
7440 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7443 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7445 init_get_bits(&hx->s.gb, ptr, bit_length);
7447 hx->inter_gb_ptr= &hx->s.gb;
7448 hx->s.data_partitioning = 0;
7450 if((err = decode_slice_header(hx, h)))
7453 s->current_picture_ptr->key_frame |=
7454 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7455 (h->sei_recovery_frame_cnt >= 0);
7456 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7457 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7458 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7459 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7460 && avctx->skip_frame < AVDISCARD_ALL){
7461 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7462 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7463 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7464 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7470 init_get_bits(&hx->s.gb, ptr, bit_length);
7472 hx->inter_gb_ptr= NULL;
7473 hx->s.data_partitioning = 1;
7475 err = decode_slice_header(hx, h);
7478 init_get_bits(&hx->intra_gb, ptr, bit_length);
7479 hx->intra_gb_ptr= &hx->intra_gb;
7482 init_get_bits(&hx->inter_gb, ptr, bit_length);
7483 hx->inter_gb_ptr= &hx->inter_gb;
7485 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7486 && s->context_initialized
7488 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7489 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7490 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7491 && avctx->skip_frame < AVDISCARD_ALL)
7495 init_get_bits(&s->gb, ptr, bit_length);
7499 init_get_bits(&s->gb, ptr, bit_length);
7500 decode_seq_parameter_set(h);
7502 if(s->flags& CODEC_FLAG_LOW_DELAY)
7505 if(avctx->has_b_frames < 2)
7506 avctx->has_b_frames= !s->low_delay;
7509 init_get_bits(&s->gb, ptr, bit_length);
7511 decode_picture_parameter_set(h, bit_length);
7515 case NAL_END_SEQUENCE:
7516 case NAL_END_STREAM:
7517 case NAL_FILLER_DATA:
7519 case NAL_AUXILIARY_SLICE:
7522 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7525 if(context_count == h->max_contexts) {
7526 execute_decode_slices(h, context_count);
7531 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7533 /* Slice could not be decoded in parallel mode, copy down
7534 * NAL unit stuff to context 0 and restart. Note that
7535 * rbsp_buffer is not transferred, but since we no longer
7536 * run in parallel mode this should not be an issue. */
7537 h->nal_unit_type = hx->nal_unit_type;
7538 h->nal_ref_idc = hx->nal_ref_idc;
7544 execute_decode_slices(h, context_count);
7549 * returns the number of bytes consumed for building the current frame
7551 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7552 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7553 if(pos+10>buf_size) pos=buf_size; // oops ;)
7558 static int decode_frame(AVCodecContext *avctx,
7559 void *data, int *data_size,
7560 const uint8_t *buf, int buf_size)
7562 H264Context *h = avctx->priv_data;
7563 MpegEncContext *s = &h->s;
7564 AVFrame *pict = data;
7567 s->flags= avctx->flags;
7568 s->flags2= avctx->flags2;
7570 /* end of stream, output what is still in the buffers */
7571 if (buf_size == 0) {
7575 //FIXME factorize this with the output code below
7576 out = h->delayed_pic[0];
7578 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7579 if(h->delayed_pic[i]->poc < out->poc){
7580 out = h->delayed_pic[i];
7584 for(i=out_idx; h->delayed_pic[i]; i++)
7585 h->delayed_pic[i] = h->delayed_pic[i+1];
7588 *data_size = sizeof(AVFrame);
7589 *pict= *(AVFrame*)out;
7595 if(h->is_avc && !h->got_avcC) {
7596 int i, cnt, nalsize;
7597 unsigned char *p = avctx->extradata;
7598 if(avctx->extradata_size < 7) {
7599 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7603 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7606 /* sps and pps in the avcC always have length coded with 2 bytes,
7607 so put a fake nal_length_size = 2 while parsing them */
7608 h->nal_length_size = 2;
7609 // Decode sps from avcC
7610 cnt = *(p+5) & 0x1f; // Number of sps
7612 for (i = 0; i < cnt; i++) {
7613 nalsize = AV_RB16(p) + 2;
7614 if(decode_nal_units(h, p, nalsize) < 0) {
7615 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7620 // Decode pps from avcC
7621 cnt = *(p++); // Number of pps
7622 for (i = 0; i < cnt; i++) {
7623 nalsize = AV_RB16(p) + 2;
7624 if(decode_nal_units(h, p, nalsize) != nalsize) {
7625 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7630 // Now store right nal length size, that will be use to parse all other nals
7631 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7632 // Do not reparse avcC
7636 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7637 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7642 buf_index=decode_nal_units(h, buf, buf_size);
7646 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7647 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7648 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7652 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7653 Picture *out = s->current_picture_ptr;
7654 Picture *cur = s->current_picture_ptr;
7655 int i, pics, cross_idr, out_of_order, out_idx;
7659 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7660 s->current_picture_ptr->pict_type= s->pict_type;
7662 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7663 ff_vdpau_h264_set_reference_frames(s);
7666 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7667 h->prev_poc_msb= h->poc_msb;
7668 h->prev_poc_lsb= h->poc_lsb;
7670 h->prev_frame_num_offset= h->frame_num_offset;
7671 h->prev_frame_num= h->frame_num;
7673 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7674 ff_vdpau_h264_picture_complete(s);
7677 * FIXME: Error handling code does not seem to support interlaced
7678 * when slices span multiple rows
7679 * The ff_er_add_slice calls don't work right for bottom
7680 * fields; they cause massive erroneous error concealing
7681 * Error marking covers both fields (top and bottom).
7682 * This causes a mismatched s->error_count
7683 * and a bad error table. Further, the error count goes to
7684 * INT_MAX when called for bottom field, because mb_y is
7685 * past end by one (callers fault) and resync_mb_y != 0
7686 * causes problems for the first MB line, too.
7692 h->sei_recovery_frame_cnt = -1;
7694 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7695 /* Wait for second field. */
7699 cur->repeat_pict = 0;
7701 /* Signal interlacing information externally. */
7702 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7703 if(h->sps.pic_struct_present_flag){
7704 switch (h->sei_pic_struct)
7706 case SEI_PIC_STRUCT_FRAME:
7707 cur->interlaced_frame = 0;
7709 case SEI_PIC_STRUCT_TOP_FIELD:
7710 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7711 case SEI_PIC_STRUCT_TOP_BOTTOM:
7712 case SEI_PIC_STRUCT_BOTTOM_TOP:
7713 cur->interlaced_frame = 1;
7715 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7716 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7717 // Signal the possibility of telecined film externally (pic_struct 5,6)
7718 // From these hints, let the applications decide if they apply deinterlacing.
7719 cur->repeat_pict = 1;
7720 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7722 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7723 // Force progressive here, as doubling interlaced frame is a bad idea.
7724 cur->interlaced_frame = 0;
7725 cur->repeat_pict = 2;
7727 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7728 cur->interlaced_frame = 0;
7729 cur->repeat_pict = 4;
7733 /* Derive interlacing flag from used decoding process. */
7734 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7737 if (cur->field_poc[0] != cur->field_poc[1]){
7738 /* Derive top_field_first from field pocs. */
7739 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7741 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7742 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7743 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7744 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7745 cur->top_field_first = 1;
7747 cur->top_field_first = 0;
7749 /* Most likely progressive */
7750 cur->top_field_first = 0;
7754 //FIXME do something with unavailable reference frames
7756 /* Sort B-frames into display order */
7758 if(h->sps.bitstream_restriction_flag
7759 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7760 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7764 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7765 && !h->sps.bitstream_restriction_flag){
7766 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7771 while(h->delayed_pic[pics]) pics++;
7773 assert(pics <= MAX_DELAYED_PIC_COUNT);
7775 h->delayed_pic[pics++] = cur;
7776 if(cur->reference == 0)
7777 cur->reference = DELAYED_PIC_REF;
7779 out = h->delayed_pic[0];
7781 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7782 if(h->delayed_pic[i]->poc < out->poc){
7783 out = h->delayed_pic[i];
7786 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7788 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7790 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7792 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7794 ((!cross_idr && out->poc > h->outputed_poc + 2)
7795 || cur->pict_type == FF_B_TYPE)))
7798 s->avctx->has_b_frames++;
7801 if(out_of_order || pics > s->avctx->has_b_frames){
7802 out->reference &= ~DELAYED_PIC_REF;
7803 for(i=out_idx; h->delayed_pic[i]; i++)
7804 h->delayed_pic[i] = h->delayed_pic[i+1];
7806 if(!out_of_order && pics > s->avctx->has_b_frames){
7807 *data_size = sizeof(AVFrame);
7809 h->outputed_poc = out->poc;
7810 *pict= *(AVFrame*)out;
7812 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7817 assert(pict->data[0] || !*data_size);
7818 ff_print_debug_info(s, pict);
7819 //printf("out %d\n", (int)pict->data[0]);
7822 /* Return the Picture timestamp as the frame number */
7823 /* we subtract 1 because it is added on utils.c */
7824 avctx->frame_number = s->picture_number - 1;
7826 return get_consumed_bytes(s, buf_index, buf_size);
7829 static inline void fill_mb_avail(H264Context *h){
7830 MpegEncContext * const s = &h->s;
7831 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7834 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7835 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7836 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7842 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7843 h->mb_avail[4]= 1; //FIXME move out
7844 h->mb_avail[5]= 0; //FIXME move out
7852 #define SIZE (COUNT*40)
7858 // int int_temp[10000];
7860 AVCodecContext avctx;
7862 dsputil_init(&dsp, &avctx);
7864 init_put_bits(&pb, temp, SIZE);
7865 printf("testing unsigned exp golomb\n");
7866 for(i=0; i<COUNT; i++){
7868 set_ue_golomb(&pb, i);
7869 STOP_TIMER("set_ue_golomb");
7871 flush_put_bits(&pb);
7873 init_get_bits(&gb, temp, 8*SIZE);
7874 for(i=0; i<COUNT; i++){
7877 s= show_bits(&gb, 24);
7880 j= get_ue_golomb(&gb);
7882 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7885 STOP_TIMER("get_ue_golomb");
7889 init_put_bits(&pb, temp, SIZE);
7890 printf("testing signed exp golomb\n");
7891 for(i=0; i<COUNT; i++){
7893 set_se_golomb(&pb, i - COUNT/2);
7894 STOP_TIMER("set_se_golomb");
7896 flush_put_bits(&pb);
7898 init_get_bits(&gb, temp, 8*SIZE);
7899 for(i=0; i<COUNT; i++){
7902 s= show_bits(&gb, 24);
7905 j= get_se_golomb(&gb);
7906 if(j != i - COUNT/2){
7907 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7910 STOP_TIMER("get_se_golomb");
7914 printf("testing 4x4 (I)DCT\n");
7917 uint8_t src[16], ref[16];
7918 uint64_t error= 0, max_error=0;
7920 for(i=0; i<COUNT; i++){
7922 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7923 for(j=0; j<16; j++){
7924 ref[j]= random()%255;
7925 src[j]= random()%255;
7928 h264_diff_dct_c(block, src, ref, 4);
7931 for(j=0; j<16; j++){
7932 // printf("%d ", block[j]);
7933 block[j]= block[j]*4;
7934 if(j&1) block[j]= (block[j]*4 + 2)/5;
7935 if(j&4) block[j]= (block[j]*4 + 2)/5;
7939 s->dsp.h264_idct_add(ref, block, 4);
7940 /* for(j=0; j<16; j++){
7941 printf("%d ", ref[j]);
7945 for(j=0; j<16; j++){
7946 int diff= FFABS(src[j] - ref[j]);
7949 max_error= FFMAX(max_error, diff);
7952 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7953 printf("testing quantizer\n");
7954 for(qp=0; qp<52; qp++){
7956 src1_block[i]= src2_block[i]= random()%255;
7959 printf("Testing NAL layer\n");
7961 uint8_t bitstream[COUNT];
7962 uint8_t nal[COUNT*2];
7964 memset(&h, 0, sizeof(H264Context));
7966 for(i=0; i<COUNT; i++){
7974 for(j=0; j<COUNT; j++){
7975 bitstream[j]= (random() % 255) + 1;
7978 for(j=0; j<zeros; j++){
7979 int pos= random() % COUNT;
7980 while(bitstream[pos] == 0){
7989 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7991 printf("encoding failed\n");
7995 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7999 if(out_length != COUNT){
8000 printf("incorrect length %d %d\n", out_length, COUNT);
8004 if(consumed != nal_length){
8005 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8009 if(memcmp(bitstream, out, COUNT)){
8010 printf("mismatch\n");
8016 printf("Testing RBSP\n");
8024 static av_cold int decode_end(AVCodecContext *avctx)
8026 H264Context *h = avctx->priv_data;
8027 MpegEncContext *s = &h->s;
8030 av_freep(&h->rbsp_buffer[0]);
8031 av_freep(&h->rbsp_buffer[1]);
8032 free_tables(h); //FIXME cleanup init stuff perhaps
8034 for(i = 0; i < MAX_SPS_COUNT; i++)
8035 av_freep(h->sps_buffers + i);
8037 for(i = 0; i < MAX_PPS_COUNT; i++)
8038 av_freep(h->pps_buffers + i);
8042 // memset(h, 0, sizeof(H264Context));
8048 AVCodec h264_decoder = {
8052 sizeof(H264Context),
8057 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8059 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8062 #if CONFIG_H264_VDPAU_DECODER
8063 AVCodec h264_vdpau_decoder = {
8067 sizeof(H264Context),
8072 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8074 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8078 #if CONFIG_SVQ3_DECODER