2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
36 #include "rectangle.h"
37 #include "vdpau_internal.h"
41 #include "x86/h264_i386.h"
48 * Value of Picture.reference when Picture is not a reference picture, but
49 * is held for delayed output.
51 #define DELAYED_PIC_REF 4
53 static VLC coeff_token_vlc[4];
54 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
55 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57 static VLC chroma_dc_coeff_token_vlc;
58 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
59 static const int chroma_dc_coeff_token_vlc_table_size = 256;
61 static VLC total_zeros_vlc[15];
62 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
63 static const int total_zeros_vlc_tables_size = 512;
65 static VLC chroma_dc_total_zeros_vlc[3];
66 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
67 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
69 static VLC run_vlc[6];
70 static VLC_TYPE run_vlc_tables[6][8][2];
71 static const int run_vlc_tables_size = 8;
74 static VLC_TYPE run7_vlc_table[96][2];
75 static const int run7_vlc_table_size = 96;
77 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
78 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
79 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static Picture * remove_long(H264Context *h, int i, int ref_mask);
83 static av_always_inline uint32_t pack16to32(int a, int b){
84 #ifdef WORDS_BIGENDIAN
85 return (b&0xFFFF) + (a<<16);
87 return (a&0xFFFF) + (b<<16);
91 static const uint8_t rem6[52]={
92 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
95 static const uint8_t div6[52]={
96 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
99 static const uint8_t left_block_options[4][8]={
106 #define LEVEL_TAB_BITS 8
107 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
110 MpegEncContext * const s = &h->s;
111 const int mb_xy= h->mb_xy;
112 int topleft_xy, top_xy, topright_xy, left_xy[2];
113 int topleft_type, top_type, topright_type, left_type[2];
114 const uint8_t * left_block;
115 int topleft_partition= -1;
118 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
120 //FIXME deblocking could skip the intra and nnz parts.
121 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
124 /* Wow, what a mess, why didn't they simplify the interlacing & intra
125 * stuff, I can't imagine that these complex rules are worth it. */
127 topleft_xy = top_xy - 1;
128 topright_xy= top_xy + 1;
129 left_xy[1] = left_xy[0] = mb_xy-1;
130 left_block = left_block_options[0];
132 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
133 const int top_pair_xy = pair_xy - s->mb_stride;
134 const int topleft_pair_xy = top_pair_xy - 1;
135 const int topright_pair_xy = top_pair_xy + 1;
136 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
137 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
138 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
139 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
140 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
141 const int bottom = (s->mb_y & 1);
142 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
144 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
145 top_xy -= s->mb_stride;
147 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
155 topright_xy -= s->mb_stride;
157 if (left_mb_field_flag != curr_mb_field_flag) {
158 left_xy[1] = left_xy[0] = pair_xy - 1;
159 if (curr_mb_field_flag) {
160 left_xy[1] += s->mb_stride;
161 left_block = left_block_options[3];
163 left_block= left_block_options[2 - bottom];
168 h->top_mb_xy = top_xy;
169 h->left_mb_xy[0] = left_xy[0];
170 h->left_mb_xy[1] = left_xy[1];
174 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
175 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
176 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
178 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 for(list=0; list<h->list_count; list++){
181 //These values where changed for ease of performing MC, we need to change them back
182 //FIXME maybe we can make MC and loop filter use the same values or prevent
183 //the MC code from changing ref_cache and rather use a temporary array.
184 if(USES_LIST(mb_type,list)){
185 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
196 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
197 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
198 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
199 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
201 if(IS_INTRA(mb_type)){
202 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
203 h->topleft_samples_available=
204 h->top_samples_available=
205 h->left_samples_available= 0xFFFF;
206 h->topright_samples_available= 0xEEEA;
208 if(!(top_type & type_mask)){
209 h->topleft_samples_available= 0xB3FF;
210 h->top_samples_available= 0x33FF;
211 h->topright_samples_available= 0x26EA;
213 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
214 if(IS_INTERLACED(mb_type)){
215 if(!(left_type[0] & type_mask)){
216 h->topleft_samples_available&= 0xDFFF;
217 h->left_samples_available&= 0x5FFF;
219 if(!(left_type[1] & type_mask)){
220 h->topleft_samples_available&= 0xFF5F;
221 h->left_samples_available&= 0xFF5F;
224 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
225 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
226 assert(left_xy[0] == left_xy[1]);
227 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
228 h->topleft_samples_available&= 0xDF5F;
229 h->left_samples_available&= 0x5F5F;
233 if(!(left_type[0] & type_mask)){
234 h->topleft_samples_available&= 0xDF5F;
235 h->left_samples_available&= 0x5F5F;
239 if(!(topleft_type & type_mask))
240 h->topleft_samples_available&= 0x7FFF;
242 if(!(topright_type & type_mask))
243 h->topright_samples_available&= 0xFBFF;
245 if(IS_INTRA4x4(mb_type)){
246 if(IS_INTRA4x4(top_type)){
247 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
248 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
249 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
250 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
253 if(!(top_type & type_mask))
258 h->intra4x4_pred_mode_cache[4+8*0]=
259 h->intra4x4_pred_mode_cache[5+8*0]=
260 h->intra4x4_pred_mode_cache[6+8*0]=
261 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 if(IS_INTRA4x4(left_type[i])){
265 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
266 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
269 if(!(left_type[i] & type_mask))
274 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
275 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
294 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
295 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
296 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
298 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
299 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
301 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
302 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 h->non_zero_count_cache[4+8*0]=
306 h->non_zero_count_cache[5+8*0]=
307 h->non_zero_count_cache[6+8*0]=
308 h->non_zero_count_cache[7+8*0]=
310 h->non_zero_count_cache[1+8*0]=
311 h->non_zero_count_cache[2+8*0]=
313 h->non_zero_count_cache[1+8*3]=
314 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 for (i=0; i<2; i++) {
320 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
321 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
322 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 h->non_zero_count_cache[3+8*1 + 2*8*i]=
326 h->non_zero_count_cache[3+8*2 + 2*8*i]=
327 h->non_zero_count_cache[0+8*1 + 8*i]=
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
335 h->top_cbp = h->cbp_table[top_xy];
336 } else if(IS_INTRA(mb_type)) {
343 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
344 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 for(list=0; list<h->list_count; list++){
361 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
362 /*if(!h->mv_cache_clean[list]){
363 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
364 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
365 h->mv_cache_clean[list]= 1;
369 h->mv_cache_clean[list]= 0;
371 if(USES_LIST(top_type, list)){
372 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
373 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
378 h->ref_cache[list][scan8[0] + 0 - 1*8]=
379 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
380 h->ref_cache[list][scan8[0] + 2 - 1*8]=
381 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
387 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 int cache_idx = scan8[0] - 1 + i*2*8;
392 if(USES_LIST(left_type[i], list)){
393 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
394 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
395 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
396 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
397 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
398 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 *(uint32_t*)h->mv_cache [list][cache_idx ]=
401 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
402 h->ref_cache[list][cache_idx ]=
403 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
407 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
410 if(USES_LIST(topleft_type, list)){
411 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
412 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
413 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
414 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
417 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
420 if(USES_LIST(topright_type, list)){
421 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
422 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
423 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
424 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
427 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
430 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
433 h->ref_cache[list][scan8[5 ]+1] =
434 h->ref_cache[list][scan8[7 ]+1] =
435 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
436 h->ref_cache[list][scan8[4 ]] =
437 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
438 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
441 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
442 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 /* XXX beurk, Load mvd */
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 if(USES_LIST(left_type[0], list)){
459 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 if(USES_LIST(left_type[1], list)){
467 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
477 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
478 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
480 if(h->slice_type_nos == FF_B_TYPE){
481 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483 if(IS_DIRECT(top_type)){
484 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
485 }else if(IS_8X8(top_type)){
486 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
487 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
488 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
493 if(IS_DIRECT(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
495 else if(IS_8X8(left_type[0]))
496 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500 if(IS_DIRECT(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
502 else if(IS_8X8(left_type[1]))
503 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
511 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
512 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
517 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
520 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 #define MAP_F2F(idx, mb_type)\
523 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
524 h->ref_cache[list][idx] <<= 1;\
525 h->mv_cache[list][idx][1] /= 2;\
526 h->mvd_cache[list][idx][1] /= 2;\
531 #define MAP_F2F(idx, mb_type)\
532 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
533 h->ref_cache[list][idx] >>= 1;\
534 h->mv_cache[list][idx][1] <<= 1;\
535 h->mvd_cache[list][idx][1] <<= 1;\
545 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
548 static inline void write_back_intra_pred_mode(H264Context *h){
549 const int mb_xy= h->mb_xy;
551 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
552 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
553 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
554 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
555 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
556 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
557 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
561 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 static inline int check_intra4x4_pred_mode(H264Context *h){
564 MpegEncContext * const s = &h->s;
565 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
566 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
569 if(!(h->top_samples_available&0x8000)){
571 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
576 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
581 if((h->left_samples_available&0x8888)!=0x8888){
582 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 if(!(h->left_samples_available&mask[i])){
585 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
590 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 } //FIXME cleanup like next
600 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 static inline int check_intra_pred_mode(H264Context *h, int mode){
603 MpegEncContext * const s = &h->s;
604 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
605 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
612 if(!(h->top_samples_available&0x8000)){
615 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
620 if((h->left_samples_available&0x8080) != 0x8080){
622 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
623 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
635 * gets the predicted intra4x4 prediction mode.
637 static inline int pred_intra_mode(H264Context *h, int n){
638 const int index8= scan8[n];
639 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
640 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
641 const int min= FFMIN(left, top);
643 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
645 if(min<0) return DC_PRED;
649 static inline void write_back_non_zero_count(H264Context *h){
650 const int mb_xy= h->mb_xy;
652 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
653 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
654 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
655 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
656 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
657 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
658 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
660 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
661 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
662 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
664 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
665 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
666 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
670 * gets the predicted number of non-zero coefficients.
671 * @param n block index
673 static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
679 if(i<64) i= (i+1)>>1;
681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
686 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
688 MpegEncContext *s = &h->s;
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
705 if(!USES_LIST(mb_type,list))\
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
736 tprintf(s->avctx, "topright MV not available\n");
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
750 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 int diagonal_ref, match_count;
759 assert(part_width==1 || part_width==2 || part_width==4);
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
779 }else if(top_ref==ref){
787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
805 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
831 pred_motion(h, n, 4, list, ref, mx, my);
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
840 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860 if(diagonal_ref == ref){
868 pred_motion(h, n, 2, list, ref, mx, my);
871 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
878 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
879 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
885 pred_motion(h, 0, 4, 0, 0, mx, my);
890 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
891 int poc0 = h->ref_list[0][i].poc;
892 int td = av_clip(poc1 - poc0, -128, 127);
893 if(td == 0 || h->ref_list[0][i].long_ref){
896 int tb = av_clip(poc - poc0, -128, 127);
897 int tx = (16384 + (FFABS(td) >> 1)) / td;
898 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
902 static inline void direct_dist_scale_factor(H264Context * const h){
903 MpegEncContext * const s = &h->s;
904 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
905 const int poc1 = h->ref_list[1][0].poc;
907 for(field=0; field<2; field++){
908 const int poc = h->s.current_picture_ptr->field_poc[field];
909 const int poc1 = h->ref_list[1][0].field_poc[field];
910 for(i=0; i < 2*h->ref_count[0]; i++)
911 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
919 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 int j, old_ref, rfield;
923 int start= mbafi ? 16 : 0;
924 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
925 int interl= mbafi || s->picture_structure != PICT_FRAME;
927 /* bogus; fills in for missing frames */
928 memset(map[list], 0, sizeof(map[list]));
930 for(rfield=0; rfield<2; rfield++){
931 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
932 int poc = ref1->ref_poc[colfield][list][old_ref];
936 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
937 poc= (poc&~3) + rfield + 1;
939 for(j=start; j<end; j++){
940 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
941 int cur_ref= mbafi ? (j-16)^field : j;
942 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 map[list][old_ref] = cur_ref;
952 static inline void direct_ref_list_init(H264Context * const h){
953 MpegEncContext * const s = &h->s;
954 Picture * const ref1 = &h->ref_list[1][0];
955 Picture * const cur = s->current_picture_ptr;
957 int sidx= (s->picture_structure&1)^1;
958 int ref1sidx= (ref1->reference&1)^1;
960 for(list=0; list<2; list++){
961 cur->ref_count[sidx][list] = h->ref_count[list];
962 for(j=0; j<h->ref_count[list]; j++)
963 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
966 if(s->picture_structure == PICT_FRAME){
967 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
968 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
971 cur->mbaff= FRAME_MBAFF;
973 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
976 for(list=0; list<2; list++){
977 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
978 for(field=0; field<2; field++)
979 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
983 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
984 MpegEncContext * const s = &h->s;
985 int b8_stride = h->b8_stride;
986 int b4_stride = h->b_stride;
987 int mb_xy = h->mb_xy;
989 const int16_t (*l1mv0)[2], (*l1mv1)[2];
990 const int8_t *l1ref0, *l1ref1;
991 const int is_b8x8 = IS_8X8(*mb_type);
992 unsigned int sub_mb_type;
995 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
997 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
998 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
999 int cur_poc = s->current_picture_ptr->poc;
1000 int *col_poc = h->ref_list[1]->field_poc;
1001 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1002 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1004 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1005 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1006 mb_xy += s->mb_stride*fieldoff;
1009 }else{ // AFL/AFR/FR/FL -> AFR/FR
1010 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1011 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1012 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1013 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1016 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1017 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1018 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 }else{ // AFR/FR -> AFR/FR
1029 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1030 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1031 /* FIXME save sub mb types from previous frames (or derive from MVs)
1032 * so we know exactly what block size to use */
1033 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1034 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1035 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1036 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1037 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1045 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1046 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1047 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1048 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1051 l1ref0 += h->b8_stride;
1052 l1ref1 += h->b8_stride;
1053 l1mv0 += 2*b4_stride;
1054 l1mv1 += 2*b4_stride;
1058 if(h->direct_spatial_mv_pred){
1063 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1065 /* ref = min(neighbors) */
1066 for(list=0; list<2; list++){
1067 int refa = h->ref_cache[list][scan8[0] - 1];
1068 int refb = h->ref_cache[list][scan8[0] - 8];
1069 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1070 if(refc == PART_NOT_AVAILABLE)
1071 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1072 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1077 if(ref[0] < 0 && ref[1] < 0){
1078 ref[0] = ref[1] = 0;
1079 mv[0][0] = mv[0][1] =
1080 mv[1][0] = mv[1][1] = 0;
1082 for(list=0; list<2; list++){
1084 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1086 mv[list][0] = mv[list][1] = 0;
1092 *mb_type &= ~MB_TYPE_L1;
1093 sub_mb_type &= ~MB_TYPE_L1;
1094 }else if(ref[0] < 0){
1096 *mb_type &= ~MB_TYPE_L0;
1097 sub_mb_type &= ~MB_TYPE_L0;
1100 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1101 for(i8=0; i8<4; i8++){
1104 int xy8 = x8+y8*b8_stride;
1105 int xy4 = 3*x8+y8*b4_stride;
1108 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1110 h->sub_mb_type[i8] = sub_mb_type;
1112 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1114 if(!IS_INTRA(mb_type_col[y8])
1115 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1116 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1118 a= pack16to32(mv[0][0],mv[0][1]);
1120 b= pack16to32(mv[1][0],mv[1][1]);
1122 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1126 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1128 }else if(IS_16X16(*mb_type)){
1131 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1132 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1133 if(!IS_INTRA(mb_type_col[0])
1134 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1135 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1136 && (h->x264_build>33 || !h->x264_build)))){
1138 a= pack16to32(mv[0][0],mv[0][1]);
1140 b= pack16to32(mv[1][0],mv[1][1]);
1142 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1146 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1148 for(i8=0; i8<4; i8++){
1149 const int x8 = i8&1;
1150 const int y8 = i8>>1;
1152 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1154 h->sub_mb_type[i8] = sub_mb_type;
1156 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1158 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1159 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1162 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1163 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1164 && (h->x264_build>33 || !h->x264_build)))){
1165 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1166 if(IS_SUB_8X8(sub_mb_type)){
1167 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1168 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1170 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1172 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 for(i4=0; i4<4; i4++){
1176 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1177 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1179 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1181 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1187 }else{ /* direct temporal mv pred */
1188 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1189 const int *dist_scale_factor = h->dist_scale_factor;
1192 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1193 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1194 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1195 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1197 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1200 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1201 /* FIXME assumes direct_8x8_inference == 1 */
1202 int y_shift = 2*!IS_INTERLACED(*mb_type);
1204 for(i8=0; i8<4; i8++){
1205 const int x8 = i8&1;
1206 const int y8 = i8>>1;
1208 const int16_t (*l1mv)[2]= l1mv0;
1210 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1212 h->sub_mb_type[i8] = sub_mb_type;
1214 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 if(IS_INTRA(mb_type_col[y8])){
1216 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1217 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1218 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 ref0 = l1ref0[x8 + y8*b8_stride];
1224 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1226 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1229 scale = dist_scale_factor[ref0];
1230 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1233 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1234 int my_col = (mv_col[1]<<y_shift)/2;
1235 int mx = (scale * mv_col[0] + 128) >> 8;
1236 int my = (scale * my_col + 128) >> 8;
1237 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1238 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 /* one-to-one mv scaling */
1246 if(IS_16X16(*mb_type)){
1249 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1250 if(IS_INTRA(mb_type_col[0])){
1253 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1254 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1255 const int scale = dist_scale_factor[ref0];
1256 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1258 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1259 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1261 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1262 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1264 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1265 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1266 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1268 for(i8=0; i8<4; i8++){
1269 const int x8 = i8&1;
1270 const int y8 = i8>>1;
1272 const int16_t (*l1mv)[2]= l1mv0;
1274 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1276 h->sub_mb_type[i8] = sub_mb_type;
1277 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col[0])){
1279 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1280 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1281 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1287 ref0 = map_col_to_list0[0][ref0];
1289 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1292 scale = dist_scale_factor[ref0];
1294 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1295 if(IS_SUB_8X8(sub_mb_type)){
1296 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1297 int mx = (scale * mv_col[0] + 128) >> 8;
1298 int my = (scale * mv_col[1] + 128) >> 8;
1299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1302 for(i4=0; i4<4; i4++){
1303 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1304 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1305 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1306 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1307 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1308 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1315 static inline void write_back_motion(H264Context *h, int mb_type){
1316 MpegEncContext * const s = &h->s;
1317 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1318 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1321 if(!USES_LIST(mb_type, 0))
1322 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1324 for(list=0; list<h->list_count; list++){
1326 if(!USES_LIST(mb_type, list))
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1333 if( h->pps.cabac ) {
1334 if(IS_SKIP(mb_type))
1335 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1338 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1344 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1345 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1346 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1347 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1348 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1352 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1353 if(IS_8X8(mb_type)){
1354 uint8_t *direct_table = &h->direct_table[b8_xy];
1355 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1356 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1357 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1363 * Decodes a network abstraction layer unit.
1364 * @param consumed is the number of bytes used as input
1365 * @param length is the length of the array
1366 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1367 * @returns decoded bytes, might be src+1 if no escapes
1369 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1374 // src[0]&0x80; //forbidden bit
1375 h->nal_ref_idc= src[0]>>5;
1376 h->nal_unit_type= src[0]&0x1F;
1380 for(i=0; i<length; i++)
1381 printf("%2X ", src[i]);
1384 #if HAVE_FAST_UNALIGNED
1385 # if HAVE_FAST_64BIT
1387 for(i=0; i+1<length; i+=9){
1388 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1391 for(i=0; i+1<length; i+=5){
1392 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1395 if(i>0 && !src[i]) i--;
1399 for(i=0; i+1<length; i+=2){
1400 if(src[i]) continue;
1401 if(i>0 && src[i-1]==0) i--;
1403 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1405 /* startcode, so we must be past the end */
1413 if(i>=length-1){ //no escaped 0
1414 *dst_length= length;
1415 *consumed= length+1; //+1 for the header
1419 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1420 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1421 dst= h->rbsp_buffer[bufidx];
1427 //printf("decoding esc\n");
1428 memcpy(dst, src, i);
1431 //remove escapes (very rare 1:2^22)
1433 dst[di++]= src[si++];
1434 dst[di++]= src[si++];
1435 }else if(src[si]==0 && src[si+1]==0){
1436 if(src[si+2]==3){ //escape
1441 }else //next start code
1445 dst[di++]= src[si++];
1448 dst[di++]= src[si++];
1451 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1454 *consumed= si + 1;//+1 for the header
1455 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1460 * identifies the exact end of the bitstream
1461 * @return the length of the trailing, or 0 if damaged
1463 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1467 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1477 * IDCT transforms the 16 dc values and dequantizes them.
1478 * @param qp quantization parameter
1480 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1483 int temp[16]; //FIXME check if this is a good idea
1484 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1485 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1487 //memset(block, 64, 2*256);
1490 const int offset= y_offset[i];
1491 const int z0= block[offset+stride*0] + block[offset+stride*4];
1492 const int z1= block[offset+stride*0] - block[offset+stride*4];
1493 const int z2= block[offset+stride*1] - block[offset+stride*5];
1494 const int z3= block[offset+stride*1] + block[offset+stride*5];
1503 const int offset= x_offset[i];
1504 const int z0= temp[4*0+i] + temp[4*2+i];
1505 const int z1= temp[4*0+i] - temp[4*2+i];
1506 const int z2= temp[4*1+i] - temp[4*3+i];
1507 const int z3= temp[4*1+i] + temp[4*3+i];
1509 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1510 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1511 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1512 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1518 * DCT transforms the 16 dc values.
1519 * @param qp quantization parameter ??? FIXME
1521 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1522 // const int qmul= dequant_coeff[qp][0];
1524 int temp[16]; //FIXME check if this is a good idea
1525 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1526 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1529 const int offset= y_offset[i];
1530 const int z0= block[offset+stride*0] + block[offset+stride*4];
1531 const int z1= block[offset+stride*0] - block[offset+stride*4];
1532 const int z2= block[offset+stride*1] - block[offset+stride*5];
1533 const int z3= block[offset+stride*1] + block[offset+stride*5];
1542 const int offset= x_offset[i];
1543 const int z0= temp[4*0+i] + temp[4*2+i];
1544 const int z1= temp[4*0+i] - temp[4*2+i];
1545 const int z2= temp[4*1+i] - temp[4*3+i];
1546 const int z3= temp[4*1+i] + temp[4*3+i];
1548 block[stride*0 +offset]= (z0 + z3)>>1;
1549 block[stride*2 +offset]= (z1 + z2)>>1;
1550 block[stride*8 +offset]= (z1 - z2)>>1;
1551 block[stride*10+offset]= (z0 - z3)>>1;
1559 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1560 const int stride= 16*2;
1561 const int xStride= 16;
1564 a= block[stride*0 + xStride*0];
1565 b= block[stride*0 + xStride*1];
1566 c= block[stride*1 + xStride*0];
1567 d= block[stride*1 + xStride*1];
1574 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1575 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1576 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1577 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1581 static void chroma_dc_dct_c(DCTELEM *block){
1582 const int stride= 16*2;
1583 const int xStride= 16;
1586 a= block[stride*0 + xStride*0];
1587 b= block[stride*0 + xStride*1];
1588 c= block[stride*1 + xStride*0];
1589 d= block[stride*1 + xStride*1];
1596 block[stride*0 + xStride*0]= (a+c);
1597 block[stride*0 + xStride*1]= (e+b);
1598 block[stride*1 + xStride*0]= (a-c);
1599 block[stride*1 + xStride*1]= (e-b);
1604 * gets the chroma qp.
1606 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1607 return h->pps.chroma_qp_table[t][qscale];
1610 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1611 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1612 int src_x_offset, int src_y_offset,
1613 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1614 MpegEncContext * const s = &h->s;
1615 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1616 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1617 const int luma_xy= (mx&3) + ((my&3)<<2);
1618 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1619 uint8_t * src_cb, * src_cr;
1620 int extra_width= h->emu_edge_width;
1621 int extra_height= h->emu_edge_height;
1623 const int full_mx= mx>>2;
1624 const int full_my= my>>2;
1625 const int pic_width = 16*s->mb_width;
1626 const int pic_height = 16*s->mb_height >> MB_FIELD;
1628 if(mx&7) extra_width -= 3;
1629 if(my&7) extra_height -= 3;
1631 if( full_mx < 0-extra_width
1632 || full_my < 0-extra_height
1633 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1634 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1635 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1636 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1640 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1642 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1645 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1648 // chroma offset when predicting from a field of opposite parity
1649 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1650 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1652 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1656 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1657 src_cb= s->edge_emu_buffer;
1659 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1662 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1663 src_cr= s->edge_emu_buffer;
1665 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1668 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1669 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1670 int x_offset, int y_offset,
1671 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1672 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1673 int list0, int list1){
1674 MpegEncContext * const s = &h->s;
1675 qpel_mc_func *qpix_op= qpix_put;
1676 h264_chroma_mc_func chroma_op= chroma_put;
1678 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1679 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1680 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1681 x_offset += 8*s->mb_x;
1682 y_offset += 8*(s->mb_y >> MB_FIELD);
1685 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1691 chroma_op= chroma_avg;
1695 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1696 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1697 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1698 qpix_op, chroma_op);
1702 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1703 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1704 int x_offset, int y_offset,
1705 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1706 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1707 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1708 int list0, int list1){
1709 MpegEncContext * const s = &h->s;
1711 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1712 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1713 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1714 x_offset += 8*s->mb_x;
1715 y_offset += 8*(s->mb_y >> MB_FIELD);
1718 /* don't optimize for luma-only case, since B-frames usually
1719 * use implicit weights => chroma too. */
1720 uint8_t *tmp_cb = s->obmc_scratchpad;
1721 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1722 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1723 int refn0 = h->ref_cache[0][ scan8[n] ];
1724 int refn1 = h->ref_cache[1][ scan8[n] ];
1726 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1727 dest_y, dest_cb, dest_cr,
1728 x_offset, y_offset, qpix_put, chroma_put);
1729 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1730 tmp_y, tmp_cb, tmp_cr,
1731 x_offset, y_offset, qpix_put, chroma_put);
1733 if(h->use_weight == 2){
1734 int weight0 = h->implicit_weight[refn0][refn1];
1735 int weight1 = 64 - weight0;
1736 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1737 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1740 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1741 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1742 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1743 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1745 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1746 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1748 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1751 int list = list1 ? 1 : 0;
1752 int refn = h->ref_cache[list][ scan8[n] ];
1753 Picture *ref= &h->ref_list[list][refn];
1754 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1755 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1756 qpix_put, chroma_put);
1758 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1759 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1760 if(h->use_weight_chroma){
1761 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1762 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1763 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1764 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1769 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1770 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1771 int x_offset, int y_offset,
1772 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1773 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1774 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1775 int list0, int list1){
1776 if((h->use_weight==2 && list0 && list1
1777 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1778 || h->use_weight==1)
1779 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1780 x_offset, y_offset, qpix_put, chroma_put,
1781 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1783 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1784 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1787 static inline void prefetch_motion(H264Context *h, int list){
1788 /* fetch pixels for estimated mv 4 macroblocks ahead
1789 * optimized for 64byte cache lines */
1790 MpegEncContext * const s = &h->s;
1791 const int refn = h->ref_cache[list][scan8[0]];
1793 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1794 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1795 uint8_t **src= h->ref_list[list][refn].data;
1796 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1797 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1798 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1799 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1803 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1804 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1805 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1806 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1807 MpegEncContext * const s = &h->s;
1808 const int mb_xy= h->mb_xy;
1809 const int mb_type= s->current_picture.mb_type[mb_xy];
1811 assert(IS_INTER(mb_type));
1813 prefetch_motion(h, 0);
1815 if(IS_16X16(mb_type)){
1816 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1817 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1818 &weight_op[0], &weight_avg[0],
1819 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1820 }else if(IS_16X8(mb_type)){
1821 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1822 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1823 &weight_op[1], &weight_avg[1],
1824 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1825 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1826 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1827 &weight_op[1], &weight_avg[1],
1828 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1829 }else if(IS_8X16(mb_type)){
1830 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1832 &weight_op[2], &weight_avg[2],
1833 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1834 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1835 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1836 &weight_op[2], &weight_avg[2],
1837 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1841 assert(IS_8X8(mb_type));
1844 const int sub_mb_type= h->sub_mb_type[i];
1846 int x_offset= (i&1)<<2;
1847 int y_offset= (i&2)<<1;
1849 if(IS_SUB_8X8(sub_mb_type)){
1850 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1851 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1852 &weight_op[3], &weight_avg[3],
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 }else if(IS_SUB_8X4(sub_mb_type)){
1855 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1856 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1857 &weight_op[4], &weight_avg[4],
1858 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1859 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1860 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1861 &weight_op[4], &weight_avg[4],
1862 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1863 }else if(IS_SUB_4X8(sub_mb_type)){
1864 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1865 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1866 &weight_op[5], &weight_avg[5],
1867 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1868 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870 &weight_op[5], &weight_avg[5],
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1874 assert(IS_SUB_4X4(sub_mb_type));
1876 int sub_x_offset= x_offset + 2*(j&1);
1877 int sub_y_offset= y_offset + (j&2);
1878 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1879 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1880 &weight_op[6], &weight_avg[6],
1881 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1887 prefetch_motion(h, 1);
1890 static av_cold void init_cavlc_level_tab(void){
1891 int suffix_length, mask;
1894 for(suffix_length=0; suffix_length<7; suffix_length++){
1895 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1896 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1897 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1899 mask= -(level_code&1);
1900 level_code= (((2+level_code)>>1) ^ mask) - mask;
1901 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1902 cavlc_level_tab[suffix_length][i][0]= level_code;
1903 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1904 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1905 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1906 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1908 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1909 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1915 static av_cold void decode_init_vlc(void){
1916 static int done = 0;
1923 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1924 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1925 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1926 &chroma_dc_coeff_token_len [0], 1, 1,
1927 &chroma_dc_coeff_token_bits[0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1932 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1933 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1934 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1935 &coeff_token_len [i][0], 1, 1,
1936 &coeff_token_bits[i][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1938 offset += coeff_token_vlc_tables_size[i];
1941 * This is a one time safety check to make sure that
1942 * the packed static coeff_token_vlc table sizes
1943 * were initialized correctly.
1945 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1948 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1949 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1950 init_vlc(&chroma_dc_total_zeros_vlc[i],
1951 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1952 &chroma_dc_total_zeros_len [i][0], 1, 1,
1953 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1954 INIT_VLC_USE_NEW_STATIC);
1956 for(i=0; i<15; i++){
1957 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1958 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1959 init_vlc(&total_zeros_vlc[i],
1960 TOTAL_ZEROS_VLC_BITS, 16,
1961 &total_zeros_len [i][0], 1, 1,
1962 &total_zeros_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
1967 run_vlc[i].table = run_vlc_tables[i];
1968 run_vlc[i].table_allocated = run_vlc_tables_size;
1969 init_vlc(&run_vlc[i],
1971 &run_len [i][0], 1, 1,
1972 &run_bits[i][0], 1, 1,
1973 INIT_VLC_USE_NEW_STATIC);
1975 run7_vlc.table = run7_vlc_table,
1976 run7_vlc.table_allocated = run7_vlc_table_size;
1977 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1978 &run_len [6][0], 1, 1,
1979 &run_bits[6][0], 1, 1,
1980 INIT_VLC_USE_NEW_STATIC);
1982 init_cavlc_level_tab();
1986 static void free_tables(H264Context *h){
1989 av_freep(&h->intra4x4_pred_mode);
1990 av_freep(&h->chroma_pred_mode_table);
1991 av_freep(&h->cbp_table);
1992 av_freep(&h->mvd_table[0]);
1993 av_freep(&h->mvd_table[1]);
1994 av_freep(&h->direct_table);
1995 av_freep(&h->non_zero_count);
1996 av_freep(&h->slice_table_base);
1997 h->slice_table= NULL;
1999 av_freep(&h->mb2b_xy);
2000 av_freep(&h->mb2b8_xy);
2002 for(i = 0; i < h->s.avctx->thread_count; i++) {
2003 hx = h->thread_context[i];
2005 av_freep(&hx->top_borders[1]);
2006 av_freep(&hx->top_borders[0]);
2007 av_freep(&hx->s.obmc_scratchpad);
2011 static void init_dequant8_coeff_table(H264Context *h){
2013 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2014 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2015 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2017 for(i=0; i<2; i++ ){
2018 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2019 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2023 for(q=0; q<52; q++){
2024 int shift = div6[q];
2027 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2028 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2029 h->pps.scaling_matrix8[i][x]) << shift;
2034 static void init_dequant4_coeff_table(H264Context *h){
2036 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2037 for(i=0; i<6; i++ ){
2038 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2040 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2041 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2048 for(q=0; q<52; q++){
2049 int shift = div6[q] + 2;
2052 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2053 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2054 h->pps.scaling_matrix4[i][x]) << shift;
2059 static void init_dequant_tables(H264Context *h){
2061 init_dequant4_coeff_table(h);
2062 if(h->pps.transform_8x8_mode)
2063 init_dequant8_coeff_table(h);
2064 if(h->sps.transform_bypass){
2067 h->dequant4_coeff[i][0][x] = 1<<6;
2068 if(h->pps.transform_8x8_mode)
2071 h->dequant8_coeff[i][0][x] = 1<<6;
2078 * needs width/height
2080 static int alloc_tables(H264Context *h){
2081 MpegEncContext * const s = &h->s;
2082 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2085 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2087 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2088 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2089 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2091 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2092 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2093 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2094 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2096 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2097 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2099 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2100 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2101 for(y=0; y<s->mb_height; y++){
2102 for(x=0; x<s->mb_width; x++){
2103 const int mb_xy= x + y*s->mb_stride;
2104 const int b_xy = 4*x + 4*y*h->b_stride;
2105 const int b8_xy= 2*x + 2*y*h->b8_stride;
2107 h->mb2b_xy [mb_xy]= b_xy;
2108 h->mb2b8_xy[mb_xy]= b8_xy;
2112 s->obmc_scratchpad = NULL;
2114 if(!h->dequant4_coeff[0])
2115 init_dequant_tables(h);
2124 * Mimic alloc_tables(), but for every context thread.
2126 static void clone_tables(H264Context *dst, H264Context *src){
2127 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2128 dst->non_zero_count = src->non_zero_count;
2129 dst->slice_table = src->slice_table;
2130 dst->cbp_table = src->cbp_table;
2131 dst->mb2b_xy = src->mb2b_xy;
2132 dst->mb2b8_xy = src->mb2b8_xy;
2133 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2134 dst->mvd_table[0] = src->mvd_table[0];
2135 dst->mvd_table[1] = src->mvd_table[1];
2136 dst->direct_table = src->direct_table;
2138 dst->s.obmc_scratchpad = NULL;
2139 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2144 * Allocate buffers which are not shared amongst multiple threads.
2146 static int context_init(H264Context *h){
2147 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2148 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2152 return -1; // free_tables will clean up for us
2155 static av_cold void common_init(H264Context *h){
2156 MpegEncContext * const s = &h->s;
2158 s->width = s->avctx->width;
2159 s->height = s->avctx->height;
2160 s->codec_id= s->avctx->codec->id;
2162 ff_h264_pred_init(&h->hpc, s->codec_id);
2164 h->dequant_coeff_pps= -1;
2165 s->unrestricted_mv=1;
2166 s->decode=1; //FIXME
2168 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2170 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2171 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2174 static av_cold int decode_init(AVCodecContext *avctx){
2175 H264Context *h= avctx->priv_data;
2176 MpegEncContext * const s = &h->s;
2178 MPV_decode_defaults(s);
2183 s->out_format = FMT_H264;
2184 s->workaround_bugs= avctx->workaround_bugs;
2187 // s->decode_mb= ff_h263_decode_mb;
2188 s->quarter_sample = 1;
2191 if(avctx->codec_id == CODEC_ID_SVQ3)
2192 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2193 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2194 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2196 avctx->pix_fmt= PIX_FMT_YUV420P;
2200 if(avctx->extradata_size > 0 && avctx->extradata &&
2201 *(char *)avctx->extradata == 1){
2208 h->thread_context[0] = h;
2209 h->outputed_poc = INT_MIN;
2210 h->prev_poc_msb= 1<<16;
2211 h->sei_recovery_frame_cnt = -1;
2212 h->sei_dpb_output_delay = 0;
2213 h->sei_cpb_removal_delay = -1;
2217 static int frame_start(H264Context *h){
2218 MpegEncContext * const s = &h->s;
2221 if(MPV_frame_start(s, s->avctx) < 0)
2223 ff_er_frame_start(s);
2225 * MPV_frame_start uses pict_type to derive key_frame.
2226 * This is incorrect for H.264; IDR markings must be used.
2227 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2228 * See decode_nal_units().
2230 s->current_picture_ptr->key_frame= 0;
2232 assert(s->linesize && s->uvlinesize);
2234 for(i=0; i<16; i++){
2235 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2236 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2239 h->block_offset[16+i]=
2240 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[24+16+i]=
2242 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2245 /* can't be in alloc_tables because linesize isn't known there.
2246 * FIXME: redo bipred weight to not require extra buffer? */
2247 for(i = 0; i < s->avctx->thread_count; i++)
2248 if(!h->thread_context[i]->s.obmc_scratchpad)
2249 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2251 /* some macroblocks will be accessed before they're available */
2252 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2253 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2255 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2257 // We mark the current picture as non-reference after allocating it, so
2258 // that if we break out due to an error it can be released automatically
2259 // in the next MPV_frame_start().
2260 // SVQ3 as well as most other codecs have only last/next/current and thus
2261 // get released even with set reference, besides SVQ3 and others do not
2262 // mark frames as reference later "naturally".
2263 if(s->codec_id != CODEC_ID_SVQ3)
2264 s->current_picture_ptr->reference= 0;
2266 s->current_picture_ptr->field_poc[0]=
2267 s->current_picture_ptr->field_poc[1]= INT_MAX;
2268 assert(s->current_picture_ptr->long_ref==0);
2273 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2274 MpegEncContext * const s = &h->s;
2283 src_cb -= uvlinesize;
2284 src_cr -= uvlinesize;
2286 if(!simple && FRAME_MBAFF){
2288 offset = MB_MBAFF ? 1 : 17;
2289 uvoffset= MB_MBAFF ? 1 : 9;
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2293 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2294 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2300 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2301 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2302 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2303 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2309 top_idx = MB_MBAFF ? 0 : 1;
2311 step= MB_MBAFF ? 2 : 1;
2314 // There are two lines saved, the line above the the top macroblock of a pair,
2315 // and the line above the bottom macroblock
2316 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2317 for(i=1; i<17 - skiplast; i++){
2318 h->left_border[offset+i*step]= src_y[15+i* linesize];
2321 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2322 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2324 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2325 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2326 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2327 for(i=1; i<9 - skiplast; i++){
2328 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2329 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2331 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2332 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2336 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2337 MpegEncContext * const s = &h->s;
2348 if(!simple && FRAME_MBAFF){
2350 offset = MB_MBAFF ? 1 : 17;
2351 uvoffset= MB_MBAFF ? 1 : 9;
2355 top_idx = MB_MBAFF ? 0 : 1;
2357 step= MB_MBAFF ? 2 : 1;
2360 if(h->deblocking_filter == 2) {
2362 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2363 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2365 deblock_left = (s->mb_x > 0);
2366 deblock_top = (s->mb_y > !!MB_FIELD);
2369 src_y -= linesize + 1;
2370 src_cb -= uvlinesize + 1;
2371 src_cr -= uvlinesize + 1;
2373 #define XCHG(a,b,t,xchg)\
2380 for(i = !deblock_top; i<16; i++){
2381 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2383 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2387 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2388 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2389 if(s->mb_x+1 < s->mb_width){
2390 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2394 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2396 for(i = !deblock_top; i<8; i++){
2397 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2398 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2400 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2401 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2404 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2405 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2410 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2411 MpegEncContext * const s = &h->s;
2412 const int mb_x= s->mb_x;
2413 const int mb_y= s->mb_y;
2414 const int mb_xy= h->mb_xy;
2415 const int mb_type= s->current_picture.mb_type[mb_xy];
2416 uint8_t *dest_y, *dest_cb, *dest_cr;
2417 int linesize, uvlinesize /*dct_offset*/;
2419 int *block_offset = &h->block_offset[0];
2420 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2421 /* is_h264 should always be true if SVQ3 is disabled. */
2422 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2423 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2424 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2426 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2427 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2428 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2430 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2431 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2433 if (!simple && MB_FIELD) {
2434 linesize = h->mb_linesize = s->linesize * 2;
2435 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2436 block_offset = &h->block_offset[24];
2437 if(mb_y&1){ //FIXME move out of this function?
2438 dest_y -= s->linesize*15;
2439 dest_cb-= s->uvlinesize*7;
2440 dest_cr-= s->uvlinesize*7;
2444 for(list=0; list<h->list_count; list++){
2445 if(!USES_LIST(mb_type, list))
2447 if(IS_16X16(mb_type)){
2448 int8_t *ref = &h->ref_cache[list][scan8[0]];
2449 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2451 for(i=0; i<16; i+=4){
2452 int ref = h->ref_cache[list][scan8[i]];
2454 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2460 linesize = h->mb_linesize = s->linesize;
2461 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2462 // dct_offset = s->linesize * 16;
2465 if (!simple && IS_INTRA_PCM(mb_type)) {
2466 for (i=0; i<16; i++) {
2467 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2469 for (i=0; i<8; i++) {
2470 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2471 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2474 if(IS_INTRA(mb_type)){
2475 if(h->deblocking_filter)
2476 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2478 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2479 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2480 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2483 if(IS_INTRA4x4(mb_type)){
2484 if(simple || !s->encoding){
2485 if(IS_8x8DCT(mb_type)){
2486 if(transform_bypass){
2488 idct_add = s->dsp.add_pixels8;
2490 idct_dc_add = s->dsp.h264_idct8_dc_add;
2491 idct_add = s->dsp.h264_idct8_add;
2493 for(i=0; i<16; i+=4){
2494 uint8_t * const ptr= dest_y + block_offset[i];
2495 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2496 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2497 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2499 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2500 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2501 (h->topright_samples_available<<i)&0x4000, linesize);
2503 if(nnz == 1 && h->mb[i*16])
2504 idct_dc_add(ptr, h->mb + i*16, linesize);
2506 idct_add (ptr, h->mb + i*16, linesize);
2511 if(transform_bypass){
2513 idct_add = s->dsp.add_pixels4;
2515 idct_dc_add = s->dsp.h264_idct_dc_add;
2516 idct_add = s->dsp.h264_idct_add;
2518 for(i=0; i<16; i++){
2519 uint8_t * const ptr= dest_y + block_offset[i];
2520 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2522 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2523 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2527 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2528 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2529 assert(mb_y || linesize <= block_offset[i]);
2530 if(!topright_avail){
2531 tr= ptr[3 - linesize]*0x01010101;
2532 topright= (uint8_t*) &tr;
2534 topright= ptr + 4 - linesize;
2538 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2539 nnz = h->non_zero_count_cache[ scan8[i] ];
2542 if(nnz == 1 && h->mb[i*16])
2543 idct_dc_add(ptr, h->mb + i*16, linesize);
2545 idct_add (ptr, h->mb + i*16, linesize);
2547 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2554 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2556 if(!transform_bypass)
2557 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2559 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2561 if(h->deblocking_filter)
2562 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2564 hl_motion(h, dest_y, dest_cb, dest_cr,
2565 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2566 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2567 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2571 if(!IS_INTRA4x4(mb_type)){
2573 if(IS_INTRA16x16(mb_type)){
2574 if(transform_bypass){
2575 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2576 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2578 for(i=0; i<16; i++){
2579 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2580 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2584 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2586 }else if(h->cbp&15){
2587 if(transform_bypass){
2588 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2589 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2590 for(i=0; i<16; i+=di){
2591 if(h->non_zero_count_cache[ scan8[i] ]){
2592 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2596 if(IS_8x8DCT(mb_type)){
2597 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2599 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2604 for(i=0; i<16; i++){
2605 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2606 uint8_t * const ptr= dest_y + block_offset[i];
2607 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2613 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2614 uint8_t *dest[2] = {dest_cb, dest_cr};
2615 if(transform_bypass){
2616 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2617 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2618 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2620 idct_add = s->dsp.add_pixels4;
2621 for(i=16; i<16+8; i++){
2622 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2623 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2627 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2628 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2630 idct_add = s->dsp.h264_idct_add;
2631 idct_dc_add = s->dsp.h264_idct_dc_add;
2632 for(i=16; i<16+8; i++){
2633 if(h->non_zero_count_cache[ scan8[i] ])
2634 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2635 else if(h->mb[i*16])
2636 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2639 for(i=16; i<16+8; i++){
2640 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2641 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2642 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2649 if(h->cbp || IS_INTRA(mb_type))
2650 s->dsp.clear_blocks(h->mb);
2652 if(h->deblocking_filter) {
2653 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2654 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2655 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2656 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2657 if (!simple && FRAME_MBAFF) {
2658 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2660 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2666 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2668 static void hl_decode_mb_simple(H264Context *h){
2669 hl_decode_mb_internal(h, 1);
2673 * Process a macroblock; this handles edge cases, such as interlacing.
2675 static void av_noinline hl_decode_mb_complex(H264Context *h){
2676 hl_decode_mb_internal(h, 0);
2679 static void hl_decode_mb(H264Context *h){
2680 MpegEncContext * const s = &h->s;
2681 const int mb_xy= h->mb_xy;
2682 const int mb_type= s->current_picture.mb_type[mb_xy];
2683 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2686 hl_decode_mb_complex(h);
2687 else hl_decode_mb_simple(h);
2690 static void pic_as_field(Picture *pic, const int parity){
2692 for (i = 0; i < 4; ++i) {
2693 if (parity == PICT_BOTTOM_FIELD)
2694 pic->data[i] += pic->linesize[i];
2695 pic->reference = parity;
2696 pic->linesize[i] *= 2;
2698 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2701 static int split_field_copy(Picture *dest, Picture *src,
2702 int parity, int id_add){
2703 int match = !!(src->reference & parity);
2707 if(parity != PICT_FRAME){
2708 pic_as_field(dest, parity);
2710 dest->pic_id += id_add;
2717 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2721 while(i[0]<len || i[1]<len){
2722 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2724 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2727 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2728 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2731 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2732 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2739 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2744 best_poc= dir ? INT_MIN : INT_MAX;
2746 for(i=0; i<len; i++){
2747 const int poc= src[i]->poc;
2748 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2750 sorted[out_i]= src[i];
2753 if(best_poc == (dir ? INT_MIN : INT_MAX))
2755 limit= sorted[out_i++]->poc - dir;
2761 * fills the default_ref_list.
2763 static int fill_default_ref_list(H264Context *h){
2764 MpegEncContext * const s = &h->s;
2767 if(h->slice_type_nos==FF_B_TYPE){
2768 Picture *sorted[32];
2773 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2775 cur_poc= s->current_picture_ptr->poc;
2777 for(list= 0; list<2; list++){
2778 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2779 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2781 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2782 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2785 if(len < h->ref_count[list])
2786 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2790 if(lens[0] == lens[1] && lens[1] > 1){
2791 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2793 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2796 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2797 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2799 if(len < h->ref_count[0])
2800 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2803 for (i=0; i<h->ref_count[0]; i++) {
2804 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2806 if(h->slice_type_nos==FF_B_TYPE){
2807 for (i=0; i<h->ref_count[1]; i++) {
2808 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2815 static void print_short_term(H264Context *h);
2816 static void print_long_term(H264Context *h);
2819 * Extract structure information about the picture described by pic_num in
2820 * the current decoding context (frame or field). Note that pic_num is
2821 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2822 * @param pic_num picture number for which to extract structure information
2823 * @param structure one of PICT_XXX describing structure of picture
2825 * @return frame number (short term) or long term index of picture
2826 * described by pic_num
2828 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2829 MpegEncContext * const s = &h->s;
2831 *structure = s->picture_structure;
2834 /* opposite field */
2835 *structure ^= PICT_FRAME;
2842 static int decode_ref_pic_list_reordering(H264Context *h){
2843 MpegEncContext * const s = &h->s;
2844 int list, index, pic_structure;
2846 print_short_term(h);
2849 for(list=0; list<h->list_count; list++){
2850 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2852 if(get_bits1(&s->gb)){
2853 int pred= h->curr_pic_num;
2855 for(index=0; ; index++){
2856 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2857 unsigned int pic_id;
2859 Picture *ref = NULL;
2861 if(reordering_of_pic_nums_idc==3)
2864 if(index >= h->ref_count[list]){
2865 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2869 if(reordering_of_pic_nums_idc<3){
2870 if(reordering_of_pic_nums_idc<2){
2871 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2874 if(abs_diff_pic_num > h->max_pic_num){
2875 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2879 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2880 else pred+= abs_diff_pic_num;
2881 pred &= h->max_pic_num - 1;
2883 frame_num = pic_num_extract(h, pred, &pic_structure);
2885 for(i= h->short_ref_count-1; i>=0; i--){
2886 ref = h->short_ref[i];
2887 assert(ref->reference);
2888 assert(!ref->long_ref);
2890 ref->frame_num == frame_num &&
2891 (ref->reference & pic_structure)
2899 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2901 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2904 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2907 ref = h->long_ref[long_idx];
2908 assert(!(ref && !ref->reference));
2909 if(ref && (ref->reference & pic_structure)){
2910 ref->pic_id= pic_id;
2911 assert(ref->long_ref);
2919 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2920 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2922 for(i=index; i+1<h->ref_count[list]; i++){
2923 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2926 for(; i > index; i--){
2927 h->ref_list[list][i]= h->ref_list[list][i-1];
2929 h->ref_list[list][index]= *ref;
2931 pic_as_field(&h->ref_list[list][index], pic_structure);
2935 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2941 for(list=0; list<h->list_count; list++){
2942 for(index= 0; index < h->ref_count[list]; index++){
2943 if(!h->ref_list[list][index].data[0]){
2944 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2945 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2953 static void fill_mbaff_ref_list(H264Context *h){
2955 for(list=0; list<2; list++){ //FIXME try list_count
2956 for(i=0; i<h->ref_count[list]; i++){
2957 Picture *frame = &h->ref_list[list][i];
2958 Picture *field = &h->ref_list[list][16+2*i];
2961 field[0].linesize[j] <<= 1;
2962 field[0].reference = PICT_TOP_FIELD;
2963 field[0].poc= field[0].field_poc[0];
2964 field[1] = field[0];
2966 field[1].data[j] += frame->linesize[j];
2967 field[1].reference = PICT_BOTTOM_FIELD;
2968 field[1].poc= field[1].field_poc[1];
2970 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2971 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2973 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2974 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2978 for(j=0; j<h->ref_count[1]; j++){
2979 for(i=0; i<h->ref_count[0]; i++)
2980 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2981 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2982 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2986 static int pred_weight_table(H264Context *h){
2987 MpegEncContext * const s = &h->s;
2989 int luma_def, chroma_def;
2992 h->use_weight_chroma= 0;
2993 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2994 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2995 luma_def = 1<<h->luma_log2_weight_denom;
2996 chroma_def = 1<<h->chroma_log2_weight_denom;
2998 for(list=0; list<2; list++){
2999 h->luma_weight_flag[list] = 0;
3000 h->chroma_weight_flag[list] = 0;
3001 for(i=0; i<h->ref_count[list]; i++){
3002 int luma_weight_flag, chroma_weight_flag;
3004 luma_weight_flag= get_bits1(&s->gb);
3005 if(luma_weight_flag){
3006 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3007 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3008 if( h->luma_weight[list][i] != luma_def
3009 || h->luma_offset[list][i] != 0) {
3011 h->luma_weight_flag[list]= 1;
3014 h->luma_weight[list][i]= luma_def;
3015 h->luma_offset[list][i]= 0;
3019 chroma_weight_flag= get_bits1(&s->gb);
3020 if(chroma_weight_flag){
3023 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3024 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3025 if( h->chroma_weight[list][i][j] != chroma_def
3026 || h->chroma_offset[list][i][j] != 0) {
3027 h->use_weight_chroma= 1;
3028 h->chroma_weight_flag[list]= 1;
3034 h->chroma_weight[list][i][j]= chroma_def;
3035 h->chroma_offset[list][i][j]= 0;
3040 if(h->slice_type_nos != FF_B_TYPE) break;
3042 h->use_weight= h->use_weight || h->use_weight_chroma;
3046 static void implicit_weight_table(H264Context *h){
3047 MpegEncContext * const s = &h->s;
3049 int cur_poc = s->current_picture_ptr->poc;
3051 for (i = 0; i < 2; i++) {
3052 h->luma_weight_flag[i] = 0;
3053 h->chroma_weight_flag[i] = 0;
3056 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3057 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3059 h->use_weight_chroma= 0;
3064 h->use_weight_chroma= 2;
3065 h->luma_log2_weight_denom= 5;
3066 h->chroma_log2_weight_denom= 5;
3068 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3069 int poc0 = h->ref_list[0][ref0].poc;
3070 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3071 int poc1 = h->ref_list[1][ref1].poc;
3072 int td = av_clip(poc1 - poc0, -128, 127);
3074 int tb = av_clip(cur_poc - poc0, -128, 127);
3075 int tx = (16384 + (FFABS(td) >> 1)) / td;
3076 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3077 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3078 h->implicit_weight[ref0][ref1] = 32;
3080 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3082 h->implicit_weight[ref0][ref1] = 32;
3088 * Mark a picture as no longer needed for reference. The refmask
3089 * argument allows unreferencing of individual fields or the whole frame.
3090 * If the picture becomes entirely unreferenced, but is being held for
3091 * display purposes, it is marked as such.
3092 * @param refmask mask of fields to unreference; the mask is bitwise
3093 * anded with the reference marking of pic
3094 * @return non-zero if pic becomes entirely unreferenced (except possibly
3095 * for display purposes) zero if one of the fields remains in
3098 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3100 if (pic->reference &= refmask) {
3103 for(i = 0; h->delayed_pic[i]; i++)
3104 if(pic == h->delayed_pic[i]){
3105 pic->reference=DELAYED_PIC_REF;
3113 * instantaneous decoder refresh.
3115 static void idr(H264Context *h){
3118 for(i=0; i<16; i++){
3119 remove_long(h, i, 0);
3121 assert(h->long_ref_count==0);
3123 for(i=0; i<h->short_ref_count; i++){
3124 unreference_pic(h, h->short_ref[i], 0);
3125 h->short_ref[i]= NULL;
3127 h->short_ref_count=0;
3128 h->prev_frame_num= 0;
3129 h->prev_frame_num_offset= 0;
3134 /* forget old pics after a seek */
3135 static void flush_dpb(AVCodecContext *avctx){
3136 H264Context *h= avctx->priv_data;
3138 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3139 if(h->delayed_pic[i])
3140 h->delayed_pic[i]->reference= 0;
3141 h->delayed_pic[i]= NULL;
3143 h->outputed_poc= INT_MIN;
3145 if(h->s.current_picture_ptr)
3146 h->s.current_picture_ptr->reference= 0;
3147 h->s.first_field= 0;
3148 h->sei_recovery_frame_cnt = -1;
3149 h->sei_dpb_output_delay = 0;
3150 h->sei_cpb_removal_delay = -1;
3151 ff_mpeg_flush(avctx);
3155 * Find a Picture in the short term reference list by frame number.
3156 * @param frame_num frame number to search for
3157 * @param idx the index into h->short_ref where returned picture is found
3158 * undefined if no picture found.
3159 * @return pointer to the found picture, or NULL if no pic with the provided
3160 * frame number is found
3162 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3163 MpegEncContext * const s = &h->s;
3166 for(i=0; i<h->short_ref_count; i++){
3167 Picture *pic= h->short_ref[i];
3168 if(s->avctx->debug&FF_DEBUG_MMCO)
3169 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3170 if(pic->frame_num == frame_num) {
3179 * Remove a picture from the short term reference list by its index in
3180 * that list. This does no checking on the provided index; it is assumed
3181 * to be valid. Other list entries are shifted down.
3182 * @param i index into h->short_ref of picture to remove.
3184 static void remove_short_at_index(H264Context *h, int i){
3185 assert(i >= 0 && i < h->short_ref_count);
3186 h->short_ref[i]= NULL;
3187 if (--h->short_ref_count)
3188 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3193 * @return the removed picture or NULL if an error occurs
3195 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3196 MpegEncContext * const s = &h->s;
3200 if(s->avctx->debug&FF_DEBUG_MMCO)
3201 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3203 pic = find_short(h, frame_num, &i);
3205 if(unreference_pic(h, pic, ref_mask))
3206 remove_short_at_index(h, i);
3213 * Remove a picture from the long term reference list by its index in
3215 * @return the removed picture or NULL if an error occurs
3217 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3220 pic= h->long_ref[i];
3222 if(unreference_pic(h, pic, ref_mask)){
3223 assert(h->long_ref[i]->long_ref == 1);
3224 h->long_ref[i]->long_ref= 0;
3225 h->long_ref[i]= NULL;
3226 h->long_ref_count--;
3234 * print short term list
3236 static void print_short_term(H264Context *h) {
3238 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3239 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3240 for(i=0; i<h->short_ref_count; i++){
3241 Picture *pic= h->short_ref[i];
3242 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3248 * print long term list
3250 static void print_long_term(H264Context *h) {
3252 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3253 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3254 for(i = 0; i < 16; i++){
3255 Picture *pic= h->long_ref[i];
3257 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3264 * Executes the reference picture marking (memory management control operations).
3266 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3267 MpegEncContext * const s = &h->s;
3269 int current_ref_assigned=0;
3272 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3273 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3275 for(i=0; i<mmco_count; i++){
3276 int structure, frame_num;
3277 if(s->avctx->debug&FF_DEBUG_MMCO)
3278 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3280 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3281 || mmco[i].opcode == MMCO_SHORT2LONG){
3282 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3283 pic = find_short(h, frame_num, &j);
3285 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3286 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3287 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3292 switch(mmco[i].opcode){
3293 case MMCO_SHORT2UNUSED:
3294 if(s->avctx->debug&FF_DEBUG_MMCO)
3295 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3296 remove_short(h, frame_num, structure ^ PICT_FRAME);
3298 case MMCO_SHORT2LONG:
3299 if (h->long_ref[mmco[i].long_arg] != pic)
3300 remove_long(h, mmco[i].long_arg, 0);
3302 remove_short_at_index(h, j);
3303 h->long_ref[ mmco[i].long_arg ]= pic;
3304 if (h->long_ref[ mmco[i].long_arg ]){
3305 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3306 h->long_ref_count++;
3309 case MMCO_LONG2UNUSED:
3310 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3311 pic = h->long_ref[j];
3313 remove_long(h, j, structure ^ PICT_FRAME);
3314 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3315 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3318 // Comment below left from previous code as it is an interresting note.
3319 /* First field in pair is in short term list or
3320 * at a different long term index.
3321 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3322 * Report the problem and keep the pair where it is,
3323 * and mark this field valid.
3326 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3327 remove_long(h, mmco[i].long_arg, 0);
3329 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3330 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3331 h->long_ref_count++;
3334 s->current_picture_ptr->reference |= s->picture_structure;
3335 current_ref_assigned=1;
3337 case MMCO_SET_MAX_LONG:
3338 assert(mmco[i].long_arg <= 16);
3339 // just remove the long term which index is greater than new max
3340 for(j = mmco[i].long_arg; j<16; j++){
3341 remove_long(h, j, 0);
3345 while(h->short_ref_count){
3346 remove_short(h, h->short_ref[0]->frame_num, 0);
3348 for(j = 0; j < 16; j++) {
3349 remove_long(h, j, 0);
3351 s->current_picture_ptr->poc=
3352 s->current_picture_ptr->field_poc[0]=
3353 s->current_picture_ptr->field_poc[1]=
3357 s->current_picture_ptr->frame_num= 0;
3363 if (!current_ref_assigned) {
3364 /* Second field of complementary field pair; the first field of
3365 * which is already referenced. If short referenced, it
3366 * should be first entry in short_ref. If not, it must exist
3367 * in long_ref; trying to put it on the short list here is an
3368 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3370 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3371 /* Just mark the second field valid */
3372 s->current_picture_ptr->reference = PICT_FRAME;
3373 } else if (s->current_picture_ptr->long_ref) {
3374 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3375 "assignment for second field "
3376 "in complementary field pair "
3377 "(first field is long term)\n");
3379 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3381 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3384 if(h->short_ref_count)
3385 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3387 h->short_ref[0]= s->current_picture_ptr;
3388 h->short_ref_count++;
3389 s->current_picture_ptr->reference |= s->picture_structure;
3393 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3395 /* We have too many reference frames, probably due to corrupted
3396 * stream. Need to discard one frame. Prevents overrun of the
3397 * short_ref and long_ref buffers.
3399 av_log(h->s.avctx, AV_LOG_ERROR,
3400 "number of reference frames exceeds max (probably "
3401 "corrupt input), discarding one\n");
3403 if (h->long_ref_count && !h->short_ref_count) {
3404 for (i = 0; i < 16; ++i)
3409 remove_long(h, i, 0);
3411 pic = h->short_ref[h->short_ref_count - 1];
3412 remove_short(h, pic->frame_num, 0);
3416 print_short_term(h);
3421 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3422 MpegEncContext * const s = &h->s;
3426 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3427 s->broken_link= get_bits1(gb) -1;
3429 h->mmco[0].opcode= MMCO_LONG;
3430 h->mmco[0].long_arg= 0;
3434 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3435 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3436 MMCOOpcode opcode= get_ue_golomb_31(gb);
3438 h->mmco[i].opcode= opcode;
3439 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3440 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3441 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3442 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3446 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3447 unsigned int long_arg= get_ue_golomb_31(gb);
3448 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3449 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3452 h->mmco[i].long_arg= long_arg;
3455 if(opcode > (unsigned)MMCO_LONG){
3456 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3459 if(opcode == MMCO_END)
3464 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3466 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3467 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3468 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3469 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3471 if (FIELD_PICTURE) {
3472 h->mmco[0].short_pic_num *= 2;
3473 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3474 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3484 static int init_poc(H264Context *h){
3485 MpegEncContext * const s = &h->s;
3486 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3488 Picture *cur = s->current_picture_ptr;
3490 h->frame_num_offset= h->prev_frame_num_offset;
3491 if(h->frame_num < h->prev_frame_num)
3492 h->frame_num_offset += max_frame_num;
3494 if(h->sps.poc_type==0){
3495 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3497 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3498 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3499 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3500 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3502 h->poc_msb = h->prev_poc_msb;
3503 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3505 field_poc[1] = h->poc_msb + h->poc_lsb;
3506 if(s->picture_structure == PICT_FRAME)
3507 field_poc[1] += h->delta_poc_bottom;
3508 }else if(h->sps.poc_type==1){
3509 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3512 if(h->sps.poc_cycle_length != 0)
3513 abs_frame_num = h->frame_num_offset + h->frame_num;
3517 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3520 expected_delta_per_poc_cycle = 0;
3521 for(i=0; i < h->sps.poc_cycle_length; i++)
3522 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3524 if(abs_frame_num > 0){
3525 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3526 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3528 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3529 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3530 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3534 if(h->nal_ref_idc == 0)
3535 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3537 field_poc[0] = expectedpoc + h->delta_poc[0];
3538 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3540 if(s->picture_structure == PICT_FRAME)
3541 field_poc[1] += h->delta_poc[1];
3543 int poc= 2*(h->frame_num_offset + h->frame_num);
3552 if(s->picture_structure != PICT_BOTTOM_FIELD)
3553 s->current_picture_ptr->field_poc[0]= field_poc[0];
3554 if(s->picture_structure != PICT_TOP_FIELD)
3555 s->current_picture_ptr->field_poc[1]= field_poc[1];
3556 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3563 * initialize scan tables
3565 static void init_scan_tables(H264Context *h){
3566 MpegEncContext * const s = &h->s;
3568 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3569 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3570 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3572 for(i=0; i<16; i++){
3573 #define T(x) (x>>2) | ((x<<2) & 0xF)
3574 h->zigzag_scan[i] = T(zigzag_scan[i]);
3575 h-> field_scan[i] = T( field_scan[i]);
3579 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3580 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3581 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3582 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3583 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3585 for(i=0; i<64; i++){
3586 #define T(x) (x>>3) | ((x&7)<<3)
3587 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3588 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3589 h->field_scan8x8[i] = T(field_scan8x8[i]);
3590 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3594 if(h->sps.transform_bypass){ //FIXME same ugly
3595 h->zigzag_scan_q0 = zigzag_scan;
3596 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3597 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3598 h->field_scan_q0 = field_scan;
3599 h->field_scan8x8_q0 = field_scan8x8;
3600 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3602 h->zigzag_scan_q0 = h->zigzag_scan;
3603 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3604 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3605 h->field_scan_q0 = h->field_scan;
3606 h->field_scan8x8_q0 = h->field_scan8x8;
3607 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3612 * Replicates H264 "master" context to thread contexts.
3614 static void clone_slice(H264Context *dst, H264Context *src)
3616 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3617 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3618 dst->s.current_picture = src->s.current_picture;
3619 dst->s.linesize = src->s.linesize;
3620 dst->s.uvlinesize = src->s.uvlinesize;
3621 dst->s.first_field = src->s.first_field;
3623 dst->prev_poc_msb = src->prev_poc_msb;
3624 dst->prev_poc_lsb = src->prev_poc_lsb;
3625 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3626 dst->prev_frame_num = src->prev_frame_num;
3627 dst->short_ref_count = src->short_ref_count;
3629 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3630 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3631 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3632 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3634 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3635 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3639 * decodes a slice header.
3640 * This will also call MPV_common_init() and frame_start() as needed.
3642 * @param h h264context
3643 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3645 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3647 static int decode_slice_header(H264Context *h, H264Context *h0){
3648 MpegEncContext * const s = &h->s;
3649 MpegEncContext * const s0 = &h0->s;
3650 unsigned int first_mb_in_slice;
3651 unsigned int pps_id;
3652 int num_ref_idx_active_override_flag;
3653 unsigned int slice_type, tmp, i, j;
3654 int default_ref_list_done = 0;
3655 int last_pic_structure;
3657 s->dropable= h->nal_ref_idc == 0;
3659 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3660 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3661 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3663 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3664 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3667 first_mb_in_slice= get_ue_golomb(&s->gb);
3669 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3670 h0->current_slice = 0;
3671 if (!s0->first_field)
3672 s->current_picture_ptr= NULL;
3675 slice_type= get_ue_golomb_31(&s->gb);
3677 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3682 h->slice_type_fixed=1;
3684 h->slice_type_fixed=0;
3686 slice_type= golomb_to_pict_type[ slice_type ];
3687 if (slice_type == FF_I_TYPE
3688 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3689 default_ref_list_done = 1;
3691 h->slice_type= slice_type;
3692 h->slice_type_nos= slice_type & 3;
3694 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3695 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3696 av_log(h->s.avctx, AV_LOG_ERROR,
3697 "B picture before any references, skipping\n");
3701 pps_id= get_ue_golomb(&s->gb);
3702 if(pps_id>=MAX_PPS_COUNT){
3703 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3706 if(!h0->pps_buffers[pps_id]) {
3707 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3710 h->pps= *h0->pps_buffers[pps_id];
3712 if(!h0->sps_buffers[h->pps.sps_id]) {
3713 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3716 h->sps = *h0->sps_buffers[h->pps.sps_id];
3718 if(h == h0 && h->dequant_coeff_pps != pps_id){
3719 h->dequant_coeff_pps = pps_id;
3720 init_dequant_tables(h);
3723 s->mb_width= h->sps.mb_width;
3724 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3726 h->b_stride= s->mb_width*4;
3727 h->b8_stride= s->mb_width*2;
3729 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3730 if(h->sps.frame_mbs_only_flag)
3731 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3733 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3735 if (s->context_initialized
3736 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3738 return -1; // width / height changed during parallelized decoding
3740 flush_dpb(s->avctx);
3743 if (!s->context_initialized) {
3745 return -1; // we cant (re-)initialize context during parallel decoding
3746 if (MPV_common_init(s) < 0)
3750 init_scan_tables(h);
3753 for(i = 1; i < s->avctx->thread_count; i++) {
3755 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3756 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3757 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3760 init_scan_tables(c);
3764 for(i = 0; i < s->avctx->thread_count; i++)
3765 if(context_init(h->thread_context[i]) < 0)
3768 s->avctx->width = s->width;
3769 s->avctx->height = s->height;
3770 s->avctx->sample_aspect_ratio= h->sps.sar;
3771 if(!s->avctx->sample_aspect_ratio.den)
3772 s->avctx->sample_aspect_ratio.den = 1;
3774 if(h->sps.timing_info_present_flag){
3775 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3776 if(h->x264_build > 0 && h->x264_build < 44)
3777 s->avctx->time_base.den *= 2;
3778 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3779 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3783 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3786 h->mb_aff_frame = 0;
3787 last_pic_structure = s0->picture_structure;
3788 if(h->sps.frame_mbs_only_flag){
3789 s->picture_structure= PICT_FRAME;
3791 if(get_bits1(&s->gb)) { //field_pic_flag
3792 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3794 s->picture_structure= PICT_FRAME;
3795 h->mb_aff_frame = h->sps.mb_aff;
3798 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3800 if(h0->current_slice == 0){
3801 while(h->frame_num != h->prev_frame_num &&
3802 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3803 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3805 h->prev_frame_num++;
3806 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3807 s->current_picture_ptr->frame_num= h->prev_frame_num;
3808 execute_ref_pic_marking(h, NULL, 0);
3811 /* See if we have a decoded first field looking for a pair... */
3812 if (s0->first_field) {
3813 assert(s0->current_picture_ptr);
3814 assert(s0->current_picture_ptr->data[0]);
3815 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3817 /* figure out if we have a complementary field pair */
3818 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3820 * Previous field is unmatched. Don't display it, but let it
3821 * remain for reference if marked as such.
3823 s0->current_picture_ptr = NULL;
3824 s0->first_field = FIELD_PICTURE;
3827 if (h->nal_ref_idc &&
3828 s0->current_picture_ptr->reference &&
3829 s0->current_picture_ptr->frame_num != h->frame_num) {
3831 * This and previous field were reference, but had
3832 * different frame_nums. Consider this field first in
3833 * pair. Throw away previous field except for reference
3836 s0->first_field = 1;
3837 s0->current_picture_ptr = NULL;
3840 /* Second field in complementary pair */
3841 s0->first_field = 0;
3846 /* Frame or first field in a potentially complementary pair */
3847 assert(!s0->current_picture_ptr);
3848 s0->first_field = FIELD_PICTURE;
3851 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3852 s0->first_field = 0;
3859 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3861 assert(s->mb_num == s->mb_width * s->mb_height);
3862 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3863 first_mb_in_slice >= s->mb_num){
3864 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3867 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3868 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3869 if (s->picture_structure == PICT_BOTTOM_FIELD)
3870 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3871 assert(s->mb_y < s->mb_height);
3873 if(s->picture_structure==PICT_FRAME){
3874 h->curr_pic_num= h->frame_num;
3875 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3877 h->curr_pic_num= 2*h->frame_num + 1;
3878 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3881 if(h->nal_unit_type == NAL_IDR_SLICE){
3882 get_ue_golomb(&s->gb); /* idr_pic_id */
3885 if(h->sps.poc_type==0){
3886 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3888 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3889 h->delta_poc_bottom= get_se_golomb(&s->gb);
3893 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3894 h->delta_poc[0]= get_se_golomb(&s->gb);
3896 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3897 h->delta_poc[1]= get_se_golomb(&s->gb);
3902 if(h->pps.redundant_pic_cnt_present){
3903 h->redundant_pic_count= get_ue_golomb(&s->gb);
3906 //set defaults, might be overridden a few lines later
3907 h->ref_count[0]= h->pps.ref_count[0];
3908 h->ref_count[1]= h->pps.ref_count[1];
3910 if(h->slice_type_nos != FF_I_TYPE){
3911 if(h->slice_type_nos == FF_B_TYPE){
3912 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3914 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3916 if(num_ref_idx_active_override_flag){
3917 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3918 if(h->slice_type_nos==FF_B_TYPE)
3919 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3921 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3922 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3923 h->ref_count[0]= h->ref_count[1]= 1;
3927 if(h->slice_type_nos == FF_B_TYPE)
3934 if(!default_ref_list_done){
3935 fill_default_ref_list(h);
3938 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3941 if(h->slice_type_nos!=FF_I_TYPE){
3942 s->last_picture_ptr= &h->ref_list[0][0];
3943 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3945 if(h->slice_type_nos==FF_B_TYPE){
3946 s->next_picture_ptr= &h->ref_list[1][0];
3947 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3950 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3951 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3952 pred_weight_table(h);
3953 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3954 implicit_weight_table(h);
3957 for (i = 0; i < 2; i++) {
3958 h->luma_weight_flag[i] = 0;
3959 h->chroma_weight_flag[i] = 0;
3964 decode_ref_pic_marking(h0, &s->gb);
3967 fill_mbaff_ref_list(h);
3969 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3970 direct_dist_scale_factor(h);
3971 direct_ref_list_init(h);
3973 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3974 tmp = get_ue_golomb_31(&s->gb);
3976 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3979 h->cabac_init_idc= tmp;
3982 h->last_qscale_diff = 0;
3983 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3985 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3989 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3990 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3991 //FIXME qscale / qp ... stuff
3992 if(h->slice_type == FF_SP_TYPE){
3993 get_bits1(&s->gb); /* sp_for_switch_flag */
3995 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3996 get_se_golomb(&s->gb); /* slice_qs_delta */
3999 h->deblocking_filter = 1;
4000 h->slice_alpha_c0_offset = 0;
4001 h->slice_beta_offset = 0;
4002 if( h->pps.deblocking_filter_parameters_present ) {
4003 tmp= get_ue_golomb_31(&s->gb);
4005 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4008 h->deblocking_filter= tmp;
4009 if(h->deblocking_filter < 2)
4010 h->deblocking_filter^= 1; // 1<->0
4012 if( h->deblocking_filter ) {
4013 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4014 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4018 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4019 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4020 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4021 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4022 h->deblocking_filter= 0;
4024 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4025 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4026 /* Cheat slightly for speed:
4027 Do not bother to deblock across slices. */
4028 h->deblocking_filter = 2;
4030 h0->max_contexts = 1;
4031 if(!h0->single_decode_warning) {
4032 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4033 h0->single_decode_warning = 1;
4036 return 1; // deblocking switched inside frame
4041 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4042 slice_group_change_cycle= get_bits(&s->gb, ?);
4045 h0->last_slice_type = slice_type;
4046 h->slice_num = ++h0->current_slice;
4047 if(h->slice_num >= MAX_SLICES){
4048 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4052 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4056 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4057 +(h->ref_list[j][i].reference&3);
4060 for(i=16; i<48; i++)
4061 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4062 +(h->ref_list[j][i].reference&3);
4065 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4066 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4068 s->avctx->refs= h->sps.ref_frame_count;
4070 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4071 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4073 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4075 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4076 pps_id, h->frame_num,
4077 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4078 h->ref_count[0], h->ref_count[1],
4080 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4082 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4083 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4093 static inline int get_level_prefix(GetBitContext *gb){
4097 OPEN_READER(re, gb);
4098 UPDATE_CACHE(re, gb);
4099 buf=GET_CACHE(re, gb);
4101 log= 32 - av_log2(buf);
4103 print_bin(buf>>(32-log), log);
4104 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4107 LAST_SKIP_BITS(re, gb, log);
4108 CLOSE_READER(re, gb);
4113 static inline int get_dct8x8_allowed(H264Context *h){
4114 if(h->sps.direct_8x8_inference_flag)
4115 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4117 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4121 * decodes a residual block.
4122 * @param n block index
4123 * @param scantable scantable
4124 * @param max_coeff number of coefficients in the block
4125 * @return <0 if an error occurred
4127 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4128 MpegEncContext * const s = &h->s;
4129 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4131 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4133 //FIXME put trailing_onex into the context
4135 if(n == CHROMA_DC_BLOCK_INDEX){
4136 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4137 total_coeff= coeff_token>>2;
4139 if(n == LUMA_DC_BLOCK_INDEX){
4140 total_coeff= pred_non_zero_count(h, 0);
4141 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4142 total_coeff= coeff_token>>2;
4144 total_coeff= pred_non_zero_count(h, n);
4145 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4146 total_coeff= coeff_token>>2;
4147 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4151 //FIXME set last_non_zero?
4155 if(total_coeff > (unsigned)max_coeff) {
4156 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4160 trailing_ones= coeff_token&3;
4161 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4162 assert(total_coeff<=16);
4164 i = show_bits(gb, 3);
4165 skip_bits(gb, trailing_ones);
4166 level[0] = 1-((i&4)>>1);
4167 level[1] = 1-((i&2) );
4168 level[2] = 1-((i&1)<<1);
4170 if(trailing_ones<total_coeff) {
4172 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4173 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4174 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4176 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4177 if(level_code >= 100){
4178 prefix= level_code - 100;
4179 if(prefix == LEVEL_TAB_BITS)
4180 prefix += get_level_prefix(gb);
4182 //first coefficient has suffix_length equal to 0 or 1
4183 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4185 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4187 level_code= (prefix<<suffix_length); //part
4188 }else if(prefix==14){
4190 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4192 level_code= prefix + get_bits(gb, 4); //part
4194 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4195 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4197 level_code += (1<<(prefix-3))-4096;
4200 if(trailing_ones < 3) level_code += 2;
4203 mask= -(level_code&1);
4204 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4206 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4209 if(level_code + 3U > 6U)
4211 level[trailing_ones]= level_code;
4214 //remaining coefficients have suffix_length > 0
4215 for(i=trailing_ones+1;i<total_coeff;i++) {
4216 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4217 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4218 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4220 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4221 if(level_code >= 100){
4222 prefix= level_code - 100;
4223 if(prefix == LEVEL_TAB_BITS){
4224 prefix += get_level_prefix(gb);
4227 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4229 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4231 level_code += (1<<(prefix-3))-4096;
4233 mask= -(level_code&1);
4234 level_code= (((2+level_code)>>1) ^ mask) - mask;
4236 level[i]= level_code;
4238 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4243 if(total_coeff == max_coeff)
4246 if(n == CHROMA_DC_BLOCK_INDEX)
4247 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4249 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4252 coeff_num = zeros_left + total_coeff - 1;
4253 j = scantable[coeff_num];
4255 block[j] = level[0];
4256 for(i=1;i<total_coeff;i++) {
4259 else if(zeros_left < 7){
4260 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4262 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4264 zeros_left -= run_before;
4265 coeff_num -= 1 + run_before;
4266 j= scantable[ coeff_num ];
4271 block[j] = (level[0] * qmul[j] + 32)>>6;
4272 for(i=1;i<total_coeff;i++) {
4275 else if(zeros_left < 7){
4276 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4278 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4280 zeros_left -= run_before;
4281 coeff_num -= 1 + run_before;
4282 j= scantable[ coeff_num ];
4284 block[j]= (level[i] * qmul[j] + 32)>>6;
4289 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4296 static void predict_field_decoding_flag(H264Context *h){
4297 MpegEncContext * const s = &h->s;
4298 const int mb_xy= h->mb_xy;
4299 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4300 ? s->current_picture.mb_type[mb_xy-1]
4301 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4302 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4304 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4308 * decodes a P_SKIP or B_SKIP macroblock
4310 static void decode_mb_skip(H264Context *h){
4311 MpegEncContext * const s = &h->s;
4312 const int mb_xy= h->mb_xy;
4315 memset(h->non_zero_count[mb_xy], 0, 16);
4316 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4319 mb_type|= MB_TYPE_INTERLACED;
4321 if( h->slice_type_nos == FF_B_TYPE )
4323 // just for fill_caches. pred_direct_motion will set the real mb_type
4324 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4326 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4327 pred_direct_motion(h, &mb_type);
4328 mb_type|= MB_TYPE_SKIP;
4333 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4335 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4336 pred_pskip_motion(h, &mx, &my);
4337 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4338 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4341 write_back_motion(h, mb_type);
4342 s->current_picture.mb_type[mb_xy]= mb_type;
4343 s->current_picture.qscale_table[mb_xy]= s->qscale;
4344 h->slice_table[ mb_xy ]= h->slice_num;
4345 h->prev_mb_skipped= 1;
4349 * decodes a macroblock
4350 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4352 static int decode_mb_cavlc(H264Context *h){
4353 MpegEncContext * const s = &h->s;
4355 int partition_count;
4356 unsigned int mb_type, cbp;
4357 int dct8x8_allowed= h->pps.transform_8x8_mode;
4359 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4361 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4362 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4364 if(h->slice_type_nos != FF_I_TYPE){
4365 if(s->mb_skip_run==-1)
4366 s->mb_skip_run= get_ue_golomb(&s->gb);
4368 if (s->mb_skip_run--) {
4369 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4370 if(s->mb_skip_run==0)
4371 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4373 predict_field_decoding_flag(h);
4380 if( (s->mb_y&1) == 0 )
4381 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4384 h->prev_mb_skipped= 0;
4386 mb_type= get_ue_golomb(&s->gb);
4387 if(h->slice_type_nos == FF_B_TYPE){
4389 partition_count= b_mb_type_info[mb_type].partition_count;
4390 mb_type= b_mb_type_info[mb_type].type;
4393 goto decode_intra_mb;
4395 }else if(h->slice_type_nos == FF_P_TYPE){
4397 partition_count= p_mb_type_info[mb_type].partition_count;
4398 mb_type= p_mb_type_info[mb_type].type;
4401 goto decode_intra_mb;
4404 assert(h->slice_type_nos == FF_I_TYPE);
4405 if(h->slice_type == FF_SI_TYPE && mb_type)
4409 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4413 cbp= i_mb_type_info[mb_type].cbp;
4414 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4415 mb_type= i_mb_type_info[mb_type].type;
4419 mb_type |= MB_TYPE_INTERLACED;
4421 h->slice_table[ mb_xy ]= h->slice_num;
4423 if(IS_INTRA_PCM(mb_type)){
4426 // We assume these blocks are very rare so we do not optimize it.
4427 align_get_bits(&s->gb);
4429 // The pixels are stored in the same order as levels in h->mb array.
4430 for(x=0; x < (CHROMA ? 384 : 256); x++){
4431 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4434 // In deblocking, the quantizer is 0
4435 s->current_picture.qscale_table[mb_xy]= 0;
4436 // All coeffs are present
4437 memset(h->non_zero_count[mb_xy], 16, 16);
4439 s->current_picture.mb_type[mb_xy]= mb_type;
4444 h->ref_count[0] <<= 1;
4445 h->ref_count[1] <<= 1;
4448 fill_caches(h, mb_type, 0);
4451 if(IS_INTRA(mb_type)){
4453 // init_top_left_availability(h);
4454 if(IS_INTRA4x4(mb_type)){
4457 if(dct8x8_allowed && get_bits1(&s->gb)){
4458 mb_type |= MB_TYPE_8x8DCT;
4462 // fill_intra4x4_pred_table(h);
4463 for(i=0; i<16; i+=di){
4464 int mode= pred_intra_mode(h, i);
4466 if(!get_bits1(&s->gb)){
4467 const int rem_mode= get_bits(&s->gb, 3);
4468 mode = rem_mode + (rem_mode >= mode);
4472 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4474 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4476 write_back_intra_pred_mode(h);
4477 if( check_intra4x4_pred_mode(h) < 0)
4480 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4481 if(h->intra16x16_pred_mode < 0)
4485 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4488 h->chroma_pred_mode= pred_mode;
4490 }else if(partition_count==4){
4491 int i, j, sub_partition_count[4], list, ref[2][4];
4493 if(h->slice_type_nos == FF_B_TYPE){
4495 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4496 if(h->sub_mb_type[i] >=13){
4497 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4500 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4501 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4503 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4504 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4505 pred_direct_motion(h, &mb_type);
4506 h->ref_cache[0][scan8[4]] =
4507 h->ref_cache[1][scan8[4]] =
4508 h->ref_cache[0][scan8[12]] =
4509 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4512 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4514 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4515 if(h->sub_mb_type[i] >=4){
4516 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4519 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4520 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4524 for(list=0; list<h->list_count; list++){
4525 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4527 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4528 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4532 }else if(ref_count == 2){
4533 tmp= get_bits1(&s->gb)^1;
4535 tmp= get_ue_golomb_31(&s->gb);
4537 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4550 dct8x8_allowed = get_dct8x8_allowed(h);
4552 for(list=0; list<h->list_count; list++){
4554 if(IS_DIRECT(h->sub_mb_type[i])) {
4555 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4558 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4559 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4561 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4562 const int sub_mb_type= h->sub_mb_type[i];
4563 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4564 for(j=0; j<sub_partition_count[i]; j++){
4566 const int index= 4*i + block_width*j;
4567 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4568 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4569 mx += get_se_golomb(&s->gb);
4570 my += get_se_golomb(&s->gb);
4571 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4573 if(IS_SUB_8X8(sub_mb_type)){
4575 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4577 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4578 }else if(IS_SUB_8X4(sub_mb_type)){
4579 mv_cache[ 1 ][0]= mx;
4580 mv_cache[ 1 ][1]= my;
4581 }else if(IS_SUB_4X8(sub_mb_type)){
4582 mv_cache[ 8 ][0]= mx;
4583 mv_cache[ 8 ][1]= my;
4585 mv_cache[ 0 ][0]= mx;
4586 mv_cache[ 0 ][1]= my;
4589 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4595 }else if(IS_DIRECT(mb_type)){
4596 pred_direct_motion(h, &mb_type);
4597 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4599 int list, mx, my, i;
4600 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4601 if(IS_16X16(mb_type)){
4602 for(list=0; list<h->list_count; list++){
4604 if(IS_DIR(mb_type, 0, list)){
4605 if(h->ref_count[list]==1){
4607 }else if(h->ref_count[list]==2){
4608 val= get_bits1(&s->gb)^1;
4610 val= get_ue_golomb_31(&s->gb);
4611 if(val >= h->ref_count[list]){
4612 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4617 val= LIST_NOT_USED&0xFF;
4618 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4620 for(list=0; list<h->list_count; list++){
4622 if(IS_DIR(mb_type, 0, list)){
4623 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4624 mx += get_se_golomb(&s->gb);
4625 my += get_se_golomb(&s->gb);
4626 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4628 val= pack16to32(mx,my);
4631 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4634 else if(IS_16X8(mb_type)){
4635 for(list=0; list<h->list_count; list++){
4638 if(IS_DIR(mb_type, i, list)){
4639 if(h->ref_count[list] == 1){
4641 }else if(h->ref_count[list] == 2){
4642 val= get_bits1(&s->gb)^1;
4644 val= get_ue_golomb_31(&s->gb);
4645 if(val >= h->ref_count[list]){
4646 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4651 val= LIST_NOT_USED&0xFF;
4652 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4655 for(list=0; list<h->list_count; list++){
4658 if(IS_DIR(mb_type, i, list)){
4659 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4660 mx += get_se_golomb(&s->gb);
4661 my += get_se_golomb(&s->gb);
4662 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4664 val= pack16to32(mx,my);
4667 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4671 assert(IS_8X16(mb_type));
4672 for(list=0; list<h->list_count; list++){
4675 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4676 if(h->ref_count[list]==1){
4678 }else if(h->ref_count[list]==2){
4679 val= get_bits1(&s->gb)^1;
4681 val= get_ue_golomb_31(&s->gb);
4682 if(val >= h->ref_count[list]){
4683 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4688 val= LIST_NOT_USED&0xFF;
4689 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4692 for(list=0; list<h->list_count; list++){
4695 if(IS_DIR(mb_type, i, list)){
4696 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4697 mx += get_se_golomb(&s->gb);
4698 my += get_se_golomb(&s->gb);
4699 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4701 val= pack16to32(mx,my);
4704 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4710 if(IS_INTER(mb_type))
4711 write_back_motion(h, mb_type);
4713 if(!IS_INTRA16x16(mb_type)){
4714 cbp= get_ue_golomb(&s->gb);
4716 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4721 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4722 else cbp= golomb_to_inter_cbp [cbp];
4724 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4725 else cbp= golomb_to_inter_cbp_gray[cbp];
4730 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4731 if(get_bits1(&s->gb)){
4732 mb_type |= MB_TYPE_8x8DCT;
4733 h->cbp_table[mb_xy]= cbp;
4736 s->current_picture.mb_type[mb_xy]= mb_type;
4738 if(cbp || IS_INTRA16x16(mb_type)){
4739 int i8x8, i4x4, chroma_idx;
4741 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4742 const uint8_t *scan, *scan8x8, *dc_scan;
4744 // fill_non_zero_count_cache(h);
4746 if(IS_INTERLACED(mb_type)){
4747 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4748 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4749 dc_scan= luma_dc_field_scan;
4751 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4752 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4753 dc_scan= luma_dc_zigzag_scan;
4756 dquant= get_se_golomb(&s->gb);
4758 if( dquant > 25 || dquant < -26 ){
4759 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4763 s->qscale += dquant;
4764 if(((unsigned)s->qscale) > 51){
4765 if(s->qscale<0) s->qscale+= 52;
4766 else s->qscale-= 52;
4769 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4770 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4771 if(IS_INTRA16x16(mb_type)){
4772 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4773 return -1; //FIXME continue if partitioned and other return -1 too
4776 assert((cbp&15) == 0 || (cbp&15) == 15);
4779 for(i8x8=0; i8x8<4; i8x8++){
4780 for(i4x4=0; i4x4<4; i4x4++){
4781 const int index= i4x4 + 4*i8x8;
4782 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4788 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4791 for(i8x8=0; i8x8<4; i8x8++){
4792 if(cbp & (1<<i8x8)){
4793 if(IS_8x8DCT(mb_type)){
4794 DCTELEM *buf = &h->mb[64*i8x8];
4796 for(i4x4=0; i4x4<4; i4x4++){
4797 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4798 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4801 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4802 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4804 for(i4x4=0; i4x4<4; i4x4++){
4805 const int index= i4x4 + 4*i8x8;
4807 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4813 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4814 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4820 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4821 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4827 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4828 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4829 for(i4x4=0; i4x4<4; i4x4++){
4830 const int index= 16 + 4*chroma_idx + i4x4;
4831 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4837 uint8_t * const nnz= &h->non_zero_count_cache[0];
4838 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4839 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4842 uint8_t * const nnz= &h->non_zero_count_cache[0];
4843 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4844 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4845 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4847 s->current_picture.qscale_table[mb_xy]= s->qscale;
4848 write_back_non_zero_count(h);
4851 h->ref_count[0] >>= 1;
4852 h->ref_count[1] >>= 1;
4858 static int decode_cabac_field_decoding_flag(H264Context *h) {
4859 MpegEncContext * const s = &h->s;
4860 const int mb_x = s->mb_x;
4861 const int mb_y = s->mb_y & ~1;
4862 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4863 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4865 unsigned int ctx = 0;
4867 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4870 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4874 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4877 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4878 uint8_t *state= &h->cabac_state[ctx_base];
4882 MpegEncContext * const s = &h->s;
4883 const int mba_xy = h->left_mb_xy[0];
4884 const int mbb_xy = h->top_mb_xy;
4886 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4888 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4890 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4891 return 0; /* I4x4 */
4894 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4895 return 0; /* I4x4 */
4898 if( get_cabac_terminate( &h->cabac ) )
4899 return 25; /* PCM */
4901 mb_type = 1; /* I16x16 */
4902 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4903 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4904 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4905 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4906 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4910 static int decode_cabac_mb_type_b( H264Context *h ) {
4911 MpegEncContext * const s = &h->s;
4913 const int mba_xy = h->left_mb_xy[0];
4914 const int mbb_xy = h->top_mb_xy;
4917 assert(h->slice_type_nos == FF_B_TYPE);
4919 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4921 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4924 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4925 return 0; /* B_Direct_16x16 */
4927 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4928 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4931 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4932 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4933 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4934 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4936 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4937 else if( bits == 13 ) {
4938 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4939 } else if( bits == 14 )
4940 return 11; /* B_L1_L0_8x16 */
4941 else if( bits == 15 )
4942 return 22; /* B_8x8 */
4944 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4945 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4948 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4949 MpegEncContext * const s = &h->s;
4953 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4954 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4957 && h->slice_table[mba_xy] == h->slice_num
4958 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4959 mba_xy += s->mb_stride;
4961 mbb_xy = mb_xy - s->mb_stride;
4963 && h->slice_table[mbb_xy] == h->slice_num
4964 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4965 mbb_xy -= s->mb_stride;
4967 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4969 int mb_xy = h->mb_xy;
4971 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4974 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4976 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4979 if( h->slice_type_nos == FF_B_TYPE )
4981 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4984 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4987 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4990 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4991 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4992 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4994 if( mode >= pred_mode )
5000 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5001 const int mba_xy = h->left_mb_xy[0];
5002 const int mbb_xy = h->top_mb_xy;
5006 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5007 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5010 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5013 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5016 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5018 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5024 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5025 int cbp_b, cbp_a, ctx, cbp = 0;
5027 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5028 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5030 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5031 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5032 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5033 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5034 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5035 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5036 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5037 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5040 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5044 cbp_a = (h->left_cbp>>4)&0x03;
5045 cbp_b = (h-> top_cbp>>4)&0x03;
5048 if( cbp_a > 0 ) ctx++;
5049 if( cbp_b > 0 ) ctx += 2;
5050 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5054 if( cbp_a == 2 ) ctx++;
5055 if( cbp_b == 2 ) ctx += 2;
5056 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5058 static int decode_cabac_mb_dqp( H264Context *h) {
5059 int ctx= h->last_qscale_diff != 0;
5062 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5065 if(val > 102) //prevent infinite loop
5070 return (val + 1)>>1 ;
5072 return -((val + 1)>>1);
5074 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5075 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5077 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5079 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5083 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5085 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5086 return 0; /* B_Direct_8x8 */
5087 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5088 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5090 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5091 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5092 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5095 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5096 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5100 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5101 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5104 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5105 int refa = h->ref_cache[list][scan8[n] - 1];
5106 int refb = h->ref_cache[list][scan8[n] - 8];
5110 if( h->slice_type_nos == FF_B_TYPE) {
5111 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5113 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5122 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5125 if(ref >= 32 /*h->ref_list[list]*/){
5132 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5133 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5134 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5135 int ctxbase = (l == 0) ? 40 : 47;
5137 int ctx = (amvd>2) + (amvd>32);
5139 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5144 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5152 while( get_cabac_bypass( &h->cabac ) ) {
5156 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5161 if( get_cabac_bypass( &h->cabac ) )
5165 return get_cabac_bypass_sign( &h->cabac, -mvd );
5168 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5174 nza = h->left_cbp&0x100;
5175 nzb = h-> top_cbp&0x100;
5177 nza = (h->left_cbp>>(6+idx))&0x01;
5178 nzb = (h-> top_cbp>>(6+idx))&0x01;
5181 assert(cat == 1 || cat == 2 || cat == 4);
5182 nza = h->non_zero_count_cache[scan8[idx] - 1];
5183 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5192 return ctx + 4 * cat;
5195 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5196 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5197 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5198 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5199 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5202 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5203 static const int significant_coeff_flag_offset[2][6] = {
5204 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5205 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5207 static const int last_coeff_flag_offset[2][6] = {
5208 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5209 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5211 static const int coeff_abs_level_m1_offset[6] = {
5212 227+0, 227+10, 227+20, 227+30, 227+39, 426
5214 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5215 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5216 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5217 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5218 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5219 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5220 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5221 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5222 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5224 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5225 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5226 * map node ctx => cabac ctx for level=1 */
5227 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5228 /* map node ctx => cabac ctx for level>1 */
5229 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5230 static const uint8_t coeff_abs_level_transition[2][8] = {
5231 /* update node ctx after decoding a level=1 */
5232 { 1, 2, 3, 3, 4, 5, 6, 7 },
5233 /* update node ctx after decoding a level>1 */
5234 { 4, 4, 4, 4, 5, 6, 7, 7 }
5240 int coeff_count = 0;
5243 uint8_t *significant_coeff_ctx_base;
5244 uint8_t *last_coeff_ctx_base;
5245 uint8_t *abs_level_m1_ctx_base;
5248 #define CABAC_ON_STACK
5250 #ifdef CABAC_ON_STACK
5253 cc.range = h->cabac.range;
5254 cc.low = h->cabac.low;
5255 cc.bytestream= h->cabac.bytestream;
5257 #define CC &h->cabac
5261 /* cat: 0-> DC 16x16 n = 0
5262 * 1-> AC 16x16 n = luma4x4idx
5263 * 2-> Luma4x4 n = luma4x4idx
5264 * 3-> DC Chroma n = iCbCr
5265 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5266 * 5-> Luma8x8 n = 4 * luma8x8idx
5269 /* read coded block flag */
5270 if( is_dc || cat != 5 ) {
5271 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5273 h->non_zero_count_cache[scan8[n]] = 0;
5275 #ifdef CABAC_ON_STACK
5276 h->cabac.range = cc.range ;
5277 h->cabac.low = cc.low ;
5278 h->cabac.bytestream= cc.bytestream;
5284 significant_coeff_ctx_base = h->cabac_state
5285 + significant_coeff_flag_offset[MB_FIELD][cat];
5286 last_coeff_ctx_base = h->cabac_state
5287 + last_coeff_flag_offset[MB_FIELD][cat];
5288 abs_level_m1_ctx_base = h->cabac_state
5289 + coeff_abs_level_m1_offset[cat];
5291 if( !is_dc && cat == 5 ) {
5292 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5293 for(last= 0; last < coefs; last++) { \
5294 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5295 if( get_cabac( CC, sig_ctx )) { \
5296 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5297 index[coeff_count++] = last; \
5298 if( get_cabac( CC, last_ctx ) ) { \
5304 if( last == max_coeff -1 ) {\
5305 index[coeff_count++] = last;\
5307 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5308 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5309 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5311 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5313 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5315 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5318 assert(coeff_count > 0);
5322 h->cbp_table[h->mb_xy] |= 0x100;
5324 h->cbp_table[h->mb_xy] |= 0x40 << n;
5327 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5329 assert( cat == 1 || cat == 2 || cat == 4 );
5330 h->non_zero_count_cache[scan8[n]] = coeff_count;
5335 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5337 int j= scantable[index[--coeff_count]];
5339 if( get_cabac( CC, ctx ) == 0 ) {
5340 node_ctx = coeff_abs_level_transition[0][node_ctx];
5342 block[j] = get_cabac_bypass_sign( CC, -1);
5344 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5348 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5349 node_ctx = coeff_abs_level_transition[1][node_ctx];
5351 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5355 if( coeff_abs >= 15 ) {
5357 while( get_cabac_bypass( CC ) ) {
5363 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5369 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5371 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5374 } while( coeff_count );
5375 #ifdef CABAC_ON_STACK
5376 h->cabac.range = cc.range ;
5377 h->cabac.low = cc.low ;
5378 h->cabac.bytestream= cc.bytestream;
5384 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5385 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5388 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5389 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5393 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5395 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5397 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5398 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5402 static inline void compute_mb_neighbors(H264Context *h)
5404 MpegEncContext * const s = &h->s;
5405 const int mb_xy = h->mb_xy;
5406 h->top_mb_xy = mb_xy - s->mb_stride;
5407 h->left_mb_xy[0] = mb_xy - 1;
5409 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5410 const int top_pair_xy = pair_xy - s->mb_stride;
5411 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5412 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5413 const int curr_mb_field_flag = MB_FIELD;
5414 const int bottom = (s->mb_y & 1);
5416 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5417 h->top_mb_xy -= s->mb_stride;
5419 if (!left_mb_field_flag == curr_mb_field_flag) {
5420 h->left_mb_xy[0] = pair_xy - 1;
5422 } else if (FIELD_PICTURE) {
5423 h->top_mb_xy -= s->mb_stride;
5429 * decodes a macroblock
5430 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5432 static int decode_mb_cabac(H264Context *h) {
5433 MpegEncContext * const s = &h->s;
5435 int mb_type, partition_count, cbp = 0;
5436 int dct8x8_allowed= h->pps.transform_8x8_mode;
5438 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5440 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5441 if( h->slice_type_nos != FF_I_TYPE ) {
5443 /* a skipped mb needs the aff flag from the following mb */
5444 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5445 predict_field_decoding_flag(h);
5446 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5447 skip = h->next_mb_skipped;
5449 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5450 /* read skip flags */
5452 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5453 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5454 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5455 if(!h->next_mb_skipped)
5456 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5461 h->cbp_table[mb_xy] = 0;
5462 h->chroma_pred_mode_table[mb_xy] = 0;
5463 h->last_qscale_diff = 0;
5470 if( (s->mb_y&1) == 0 )
5472 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5475 h->prev_mb_skipped = 0;
5477 compute_mb_neighbors(h);
5479 if( h->slice_type_nos == FF_B_TYPE ) {
5480 mb_type = decode_cabac_mb_type_b( h );
5482 partition_count= b_mb_type_info[mb_type].partition_count;
5483 mb_type= b_mb_type_info[mb_type].type;
5486 goto decode_intra_mb;
5488 } else if( h->slice_type_nos == FF_P_TYPE ) {
5489 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5491 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5492 /* P_L0_D16x16, P_8x8 */
5493 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5495 /* P_L0_D8x16, P_L0_D16x8 */
5496 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5498 partition_count= p_mb_type_info[mb_type].partition_count;
5499 mb_type= p_mb_type_info[mb_type].type;
5501 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5502 goto decode_intra_mb;
5505 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5506 if(h->slice_type == FF_SI_TYPE && mb_type)
5508 assert(h->slice_type_nos == FF_I_TYPE);
5510 partition_count = 0;
5511 cbp= i_mb_type_info[mb_type].cbp;
5512 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5513 mb_type= i_mb_type_info[mb_type].type;
5516 mb_type |= MB_TYPE_INTERLACED;
5518 h->slice_table[ mb_xy ]= h->slice_num;
5520 if(IS_INTRA_PCM(mb_type)) {
5523 // We assume these blocks are very rare so we do not optimize it.
5524 // FIXME The two following lines get the bitstream position in the cabac
5525 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5526 ptr= h->cabac.bytestream;
5527 if(h->cabac.low&0x1) ptr--;
5529 if(h->cabac.low&0x1FF) ptr--;
5532 // The pixels are stored in the same order as levels in h->mb array.
5533 memcpy(h->mb, ptr, 256); ptr+=256;
5535 memcpy(h->mb+128, ptr, 128); ptr+=128;
5538 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5540 // All blocks are present
5541 h->cbp_table[mb_xy] = 0x1ef;
5542 h->chroma_pred_mode_table[mb_xy] = 0;
5543 // In deblocking, the quantizer is 0
5544 s->current_picture.qscale_table[mb_xy]= 0;
5545 // All coeffs are present
5546 memset(h->non_zero_count[mb_xy], 16, 16);
5547 s->current_picture.mb_type[mb_xy]= mb_type;
5548 h->last_qscale_diff = 0;
5553 h->ref_count[0] <<= 1;
5554 h->ref_count[1] <<= 1;
5557 fill_caches(h, mb_type, 0);
5559 if( IS_INTRA( mb_type ) ) {
5561 if( IS_INTRA4x4( mb_type ) ) {
5562 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5563 mb_type |= MB_TYPE_8x8DCT;
5564 for( i = 0; i < 16; i+=4 ) {
5565 int pred = pred_intra_mode( h, i );
5566 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5567 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5570 for( i = 0; i < 16; i++ ) {
5571 int pred = pred_intra_mode( h, i );
5572 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5574 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5577 write_back_intra_pred_mode(h);
5578 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5580 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5581 if( h->intra16x16_pred_mode < 0 ) return -1;
5584 h->chroma_pred_mode_table[mb_xy] =
5585 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5587 pred_mode= check_intra_pred_mode( h, pred_mode );
5588 if( pred_mode < 0 ) return -1;
5589 h->chroma_pred_mode= pred_mode;
5591 } else if( partition_count == 4 ) {
5592 int i, j, sub_partition_count[4], list, ref[2][4];
5594 if( h->slice_type_nos == FF_B_TYPE ) {
5595 for( i = 0; i < 4; i++ ) {
5596 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5597 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5598 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5600 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5601 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5602 pred_direct_motion(h, &mb_type);
5603 h->ref_cache[0][scan8[4]] =
5604 h->ref_cache[1][scan8[4]] =
5605 h->ref_cache[0][scan8[12]] =
5606 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5607 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5608 for( i = 0; i < 4; i++ )
5609 if( IS_DIRECT(h->sub_mb_type[i]) )
5610 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5614 for( i = 0; i < 4; i++ ) {
5615 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5616 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5617 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5621 for( list = 0; list < h->list_count; list++ ) {
5622 for( i = 0; i < 4; i++ ) {
5623 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5624 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5625 if( h->ref_count[list] > 1 ){
5626 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5627 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5628 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5636 h->ref_cache[list][ scan8[4*i]+1 ]=
5637 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5642 dct8x8_allowed = get_dct8x8_allowed(h);
5644 for(list=0; list<h->list_count; list++){
5646 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5647 if(IS_DIRECT(h->sub_mb_type[i])){
5648 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5652 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5653 const int sub_mb_type= h->sub_mb_type[i];
5654 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5655 for(j=0; j<sub_partition_count[i]; j++){
5658 const int index= 4*i + block_width*j;
5659 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5660 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5661 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5663 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5664 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5665 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5667 if(IS_SUB_8X8(sub_mb_type)){
5669 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5671 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5674 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5676 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5677 }else if(IS_SUB_8X4(sub_mb_type)){
5678 mv_cache[ 1 ][0]= mx;
5679 mv_cache[ 1 ][1]= my;
5681 mvd_cache[ 1 ][0]= mx - mpx;
5682 mvd_cache[ 1 ][1]= my - mpy;
5683 }else if(IS_SUB_4X8(sub_mb_type)){
5684 mv_cache[ 8 ][0]= mx;
5685 mv_cache[ 8 ][1]= my;
5687 mvd_cache[ 8 ][0]= mx - mpx;
5688 mvd_cache[ 8 ][1]= my - mpy;
5690 mv_cache[ 0 ][0]= mx;
5691 mv_cache[ 0 ][1]= my;
5693 mvd_cache[ 0 ][0]= mx - mpx;
5694 mvd_cache[ 0 ][1]= my - mpy;
5697 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5698 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5699 p[0] = p[1] = p[8] = p[9] = 0;
5700 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5704 } else if( IS_DIRECT(mb_type) ) {
5705 pred_direct_motion(h, &mb_type);
5706 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5707 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5708 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5710 int list, mx, my, i, mpx, mpy;
5711 if(IS_16X16(mb_type)){
5712 for(list=0; list<h->list_count; list++){
5713 if(IS_DIR(mb_type, 0, list)){
5715 if(h->ref_count[list] > 1){
5716 ref= decode_cabac_mb_ref(h, list, 0);
5717 if(ref >= (unsigned)h->ref_count[list]){
5718 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5723 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5725 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5727 for(list=0; list<h->list_count; list++){
5728 if(IS_DIR(mb_type, 0, list)){
5729 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5731 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5732 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5733 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5735 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5736 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5738 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5741 else if(IS_16X8(mb_type)){
5742 for(list=0; list<h->list_count; list++){
5744 if(IS_DIR(mb_type, i, list)){
5746 if(h->ref_count[list] > 1){
5747 ref= decode_cabac_mb_ref( h, list, 8*i );
5748 if(ref >= (unsigned)h->ref_count[list]){
5749 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5754 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5756 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5759 for(list=0; list<h->list_count; list++){
5761 if(IS_DIR(mb_type, i, list)){
5762 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5763 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5764 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5765 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5767 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5768 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5770 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5771 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5776 assert(IS_8X16(mb_type));
5777 for(list=0; list<h->list_count; list++){
5779 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5781 if(h->ref_count[list] > 1){
5782 ref= decode_cabac_mb_ref( h, list, 4*i );
5783 if(ref >= (unsigned)h->ref_count[list]){
5784 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5789 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5791 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5794 for(list=0; list<h->list_count; list++){
5796 if(IS_DIR(mb_type, i, list)){
5797 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5798 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5799 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5801 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5802 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5803 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5805 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5806 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5813 if( IS_INTER( mb_type ) ) {
5814 h->chroma_pred_mode_table[mb_xy] = 0;
5815 write_back_motion( h, mb_type );
5818 if( !IS_INTRA16x16( mb_type ) ) {
5819 cbp = decode_cabac_mb_cbp_luma( h );
5821 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5824 h->cbp_table[mb_xy] = h->cbp = cbp;
5826 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5827 if( decode_cabac_mb_transform_size( h ) )
5828 mb_type |= MB_TYPE_8x8DCT;
5830 s->current_picture.mb_type[mb_xy]= mb_type;
5832 if( cbp || IS_INTRA16x16( mb_type ) ) {
5833 const uint8_t *scan, *scan8x8, *dc_scan;
5834 const uint32_t *qmul;
5837 if(IS_INTERLACED(mb_type)){
5838 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5839 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5840 dc_scan= luma_dc_field_scan;
5842 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5843 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5844 dc_scan= luma_dc_zigzag_scan;
5847 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5848 if( dqp == INT_MIN ){
5849 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5853 if(((unsigned)s->qscale) > 51){
5854 if(s->qscale<0) s->qscale+= 52;
5855 else s->qscale-= 52;
5857 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5858 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5860 if( IS_INTRA16x16( mb_type ) ) {
5862 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5863 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5866 qmul = h->dequant4_coeff[0][s->qscale];
5867 for( i = 0; i < 16; i++ ) {
5868 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5869 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5872 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5876 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5877 if( cbp & (1<<i8x8) ) {
5878 if( IS_8x8DCT(mb_type) ) {
5879 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5880 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5882 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5883 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5884 const int index = 4*i8x8 + i4x4;
5885 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5887 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5888 //STOP_TIMER("decode_residual")
5892 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5893 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5900 for( c = 0; c < 2; c++ ) {
5901 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5902 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5908 for( c = 0; c < 2; c++ ) {
5909 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5910 for( i = 0; i < 4; i++ ) {
5911 const int index = 16 + 4 * c + i;
5912 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5913 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5917 uint8_t * const nnz= &h->non_zero_count_cache[0];
5918 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5919 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5922 uint8_t * const nnz= &h->non_zero_count_cache[0];
5923 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5924 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5925 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5926 h->last_qscale_diff = 0;
5929 s->current_picture.qscale_table[mb_xy]= s->qscale;
5930 write_back_non_zero_count(h);
5933 h->ref_count[0] >>= 1;
5934 h->ref_count[1] >>= 1;
5941 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5942 const int index_a = qp + h->slice_alpha_c0_offset;
5943 const int alpha = (alpha_table+52)[index_a];
5944 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5948 tc[0] = (tc0_table+52)[index_a][bS[0]];
5949 tc[1] = (tc0_table+52)[index_a][bS[1]];
5950 tc[2] = (tc0_table+52)[index_a][bS[2]];
5951 tc[3] = (tc0_table+52)[index_a][bS[3]];
5952 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5954 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5957 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5958 const int index_a = qp + h->slice_alpha_c0_offset;
5959 const int alpha = (alpha_table+52)[index_a];
5960 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5964 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5965 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5966 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5967 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5968 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5970 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5974 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5976 for( i = 0; i < 16; i++, pix += stride) {
5982 int bS_index = (i >> 1);
5985 bS_index |= (i & 1);
5988 if( bS[bS_index] == 0 ) {
5992 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5993 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5994 alpha = (alpha_table+52)[index_a];
5995 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5997 if( bS[bS_index] < 4 ) {
5998 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5999 const int p0 = pix[-1];
6000 const int p1 = pix[-2];
6001 const int p2 = pix[-3];
6002 const int q0 = pix[0];
6003 const int q1 = pix[1];
6004 const int q2 = pix[2];
6006 if( FFABS( p0 - q0 ) < alpha &&
6007 FFABS( p1 - p0 ) < beta &&
6008 FFABS( q1 - q0 ) < beta ) {
6012 if( FFABS( p2 - p0 ) < beta ) {
6013 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6016 if( FFABS( q2 - q0 ) < beta ) {
6017 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6021 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6022 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6023 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6024 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6027 const int p0 = pix[-1];
6028 const int p1 = pix[-2];
6029 const int p2 = pix[-3];
6031 const int q0 = pix[0];
6032 const int q1 = pix[1];
6033 const int q2 = pix[2];
6035 if( FFABS( p0 - q0 ) < alpha &&
6036 FFABS( p1 - p0 ) < beta &&
6037 FFABS( q1 - q0 ) < beta ) {
6039 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6040 if( FFABS( p2 - p0 ) < beta)
6042 const int p3 = pix[-4];
6044 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6045 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6046 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6049 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6051 if( FFABS( q2 - q0 ) < beta)
6053 const int q3 = pix[3];
6055 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6056 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6057 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6060 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6064 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6065 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6067 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6072 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6074 for( i = 0; i < 8; i++, pix += stride) {
6082 if( bS[bS_index] == 0 ) {
6086 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6087 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6088 alpha = (alpha_table+52)[index_a];
6089 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6091 if( bS[bS_index] < 4 ) {
6092 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6093 const int p0 = pix[-1];
6094 const int p1 = pix[-2];
6095 const int q0 = pix[0];
6096 const int q1 = pix[1];
6098 if( FFABS( p0 - q0 ) < alpha &&
6099 FFABS( p1 - p0 ) < beta &&
6100 FFABS( q1 - q0 ) < beta ) {
6101 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6103 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6104 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6105 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6108 const int p0 = pix[-1];
6109 const int p1 = pix[-2];
6110 const int q0 = pix[0];
6111 const int q1 = pix[1];
6113 if( FFABS( p0 - q0 ) < alpha &&
6114 FFABS( p1 - p0 ) < beta &&
6115 FFABS( q1 - q0 ) < beta ) {
6117 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6118 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6119 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6125 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6126 const int index_a = qp + h->slice_alpha_c0_offset;
6127 const int alpha = (alpha_table+52)[index_a];
6128 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6132 tc[0] = (tc0_table+52)[index_a][bS[0]];
6133 tc[1] = (tc0_table+52)[index_a][bS[1]];
6134 tc[2] = (tc0_table+52)[index_a][bS[2]];
6135 tc[3] = (tc0_table+52)[index_a][bS[3]];
6136 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6138 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6142 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6143 const int index_a = qp + h->slice_alpha_c0_offset;
6144 const int alpha = (alpha_table+52)[index_a];
6145 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6149 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6150 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6151 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6152 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6153 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6155 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6159 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6160 MpegEncContext * const s = &h->s;
6161 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6163 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6167 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6168 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6169 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6170 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6171 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6174 assert(!FRAME_MBAFF);
6176 mb_type = s->current_picture.mb_type[mb_xy];
6177 qp = s->current_picture.qscale_table[mb_xy];
6178 qp0 = s->current_picture.qscale_table[mb_xy-1];
6179 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6180 qpc = get_chroma_qp( h, 0, qp );
6181 qpc0 = get_chroma_qp( h, 0, qp0 );
6182 qpc1 = get_chroma_qp( h, 0, qp1 );
6183 qp0 = (qp + qp0 + 1) >> 1;
6184 qp1 = (qp + qp1 + 1) >> 1;
6185 qpc0 = (qpc + qpc0 + 1) >> 1;
6186 qpc1 = (qpc + qpc1 + 1) >> 1;
6187 qp_thresh = 15 - h->slice_alpha_c0_offset;
6188 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6189 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6192 if( IS_INTRA(mb_type) ) {
6193 int16_t bS4[4] = {4,4,4,4};
6194 int16_t bS3[4] = {3,3,3,3};
6195 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6196 if( IS_8x8DCT(mb_type) ) {
6197 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6198 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6199 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6200 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6202 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6203 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6204 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6205 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6206 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6207 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6208 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6209 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6211 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6212 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6213 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6214 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6215 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6216 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6217 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6218 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6221 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6222 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6224 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6226 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6228 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6229 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6230 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6231 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6233 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6234 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6235 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6236 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6238 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6239 bSv[0][0] = 0x0004000400040004ULL;
6240 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6241 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6243 #define FILTER(hv,dir,edge)\
6244 if(bSv[dir][edge]) {\
6245 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6247 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6248 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6254 } else if( IS_8x8DCT(mb_type) ) {
6274 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6275 MpegEncContext * const s = &h->s;
6277 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6278 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6279 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6280 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6281 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6283 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6284 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6285 // how often to recheck mv-based bS when iterating between edges
6286 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6287 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6288 // how often to recheck mv-based bS when iterating along each edge
6289 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6291 if (first_vertical_edge_done) {
6295 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6298 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6299 && !IS_INTERLACED(mb_type)
6300 && IS_INTERLACED(mbm_type)
6302 // This is a special case in the norm where the filtering must
6303 // be done twice (one each of the field) even if we are in a
6304 // frame macroblock.
6306 static const int nnz_idx[4] = {4,5,6,3};
6307 unsigned int tmp_linesize = 2 * linesize;
6308 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6309 int mbn_xy = mb_xy - 2 * s->mb_stride;
6314 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6315 if( IS_INTRA(mb_type) ||
6316 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6317 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6319 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6320 for( i = 0; i < 4; i++ ) {
6321 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6322 mbn_nnz[nnz_idx[i]] != 0 )
6328 // Do not use s->qscale as luma quantizer because it has not the same
6329 // value in IPCM macroblocks.
6330 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6331 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6332 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6333 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6334 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6335 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6336 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6337 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6344 for( edge = start; edge < edges; edge++ ) {
6345 /* mbn_xy: neighbor macroblock */
6346 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6347 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6348 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6352 if( (edge&1) && IS_8x8DCT(mb_type) )
6355 if( IS_INTRA(mb_type) ||
6356 IS_INTRA(mbn_type) ) {
6359 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6360 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6369 bS[0] = bS[1] = bS[2] = bS[3] = value;
6374 if( edge & mask_edge ) {
6375 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6378 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6379 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6382 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6383 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6384 int bn_idx= b_idx - (dir ? 8:1);
6387 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6388 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6389 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6390 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6393 if(h->slice_type_nos == FF_B_TYPE && v){
6395 for( l = 0; !v && l < 2; l++ ) {
6397 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6398 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6399 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6403 bS[0] = bS[1] = bS[2] = bS[3] = v;
6409 for( i = 0; i < 4; i++ ) {
6410 int x = dir == 0 ? edge : i;
6411 int y = dir == 0 ? i : edge;
6412 int b_idx= 8 + 4 + x + 8*y;
6413 int bn_idx= b_idx - (dir ? 8:1);
6415 if( h->non_zero_count_cache[b_idx] |
6416 h->non_zero_count_cache[bn_idx] ) {
6422 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6423 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6424 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6425 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6431 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6433 for( l = 0; l < 2; l++ ) {
6435 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6436 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6437 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6446 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6451 // Do not use s->qscale as luma quantizer because it has not the same
6452 // value in IPCM macroblocks.
6453 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6454 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6455 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6456 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6458 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6459 if( (edge&1) == 0 ) {
6460 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6461 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6462 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6463 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6466 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6467 if( (edge&1) == 0 ) {
6468 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6469 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6470 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6471 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6477 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6478 MpegEncContext * const s = &h->s;
6479 const int mb_xy= mb_x + mb_y*s->mb_stride;
6480 const int mb_type = s->current_picture.mb_type[mb_xy];
6481 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6482 int first_vertical_edge_done = 0;
6485 //for sufficiently low qp, filtering wouldn't do anything
6486 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6488 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6489 int qp = s->current_picture.qscale_table[mb_xy];
6491 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6492 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6497 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6498 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6499 int top_type, left_type[2];
6500 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6501 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6502 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6504 if(IS_8x8DCT(top_type)){
6505 h->non_zero_count_cache[4+8*0]=
6506 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6507 h->non_zero_count_cache[6+8*0]=
6508 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6510 if(IS_8x8DCT(left_type[0])){
6511 h->non_zero_count_cache[3+8*1]=
6512 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6514 if(IS_8x8DCT(left_type[1])){
6515 h->non_zero_count_cache[3+8*3]=
6516 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6519 if(IS_8x8DCT(mb_type)){
6520 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6521 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6523 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6524 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6526 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6527 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6529 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6530 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6535 // left mb is in picture
6536 && h->slice_table[mb_xy-1] != 0xFFFF
6537 // and current and left pair do not have the same interlaced type
6538 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6539 // and left mb is in the same slice if deblocking_filter == 2
6540 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6541 /* First vertical edge is different in MBAFF frames
6542 * There are 8 different bS to compute and 2 different Qp
6544 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6545 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6550 int mb_qp, mbn0_qp, mbn1_qp;
6552 first_vertical_edge_done = 1;
6554 if( IS_INTRA(mb_type) )
6555 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6557 for( i = 0; i < 8; i++ ) {
6558 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6560 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6562 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6563 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6564 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6566 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6573 mb_qp = s->current_picture.qscale_table[mb_xy];
6574 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6575 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6576 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6577 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6578 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6579 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6580 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6581 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6582 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6583 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6584 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6585 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6588 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6589 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6590 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6591 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6592 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6596 for( dir = 0; dir < 2; dir++ )
6597 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6599 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6600 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6604 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6605 H264Context *h = *(void**)arg;
6606 MpegEncContext * const s = &h->s;
6607 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6611 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6612 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6614 if( h->pps.cabac ) {
6618 align_get_bits( &s->gb );
6621 ff_init_cabac_states( &h->cabac);
6622 ff_init_cabac_decoder( &h->cabac,
6623 s->gb.buffer + get_bits_count(&s->gb)/8,
6624 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6625 /* calculate pre-state */
6626 for( i= 0; i < 460; i++ ) {
6628 if( h->slice_type_nos == FF_I_TYPE )
6629 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6631 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6634 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6636 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6641 int ret = decode_mb_cabac(h);
6643 //STOP_TIMER("decode_mb_cabac")
6645 if(ret>=0) hl_decode_mb(h);
6647 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6650 ret = decode_mb_cabac(h);
6652 if(ret>=0) hl_decode_mb(h);
6655 eos = get_cabac_terminate( &h->cabac );
6657 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6658 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6659 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6663 if( ++s->mb_x >= s->mb_width ) {
6665 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6667 if(FIELD_OR_MBAFF_PICTURE) {
6672 if( eos || s->mb_y >= s->mb_height ) {
6673 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6674 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6681 int ret = decode_mb_cavlc(h);
6683 if(ret>=0) hl_decode_mb(h);
6685 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6687 ret = decode_mb_cavlc(h);
6689 if(ret>=0) hl_decode_mb(h);
6694 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6695 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6700 if(++s->mb_x >= s->mb_width){
6702 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6704 if(FIELD_OR_MBAFF_PICTURE) {
6707 if(s->mb_y >= s->mb_height){
6708 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6710 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6711 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6715 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6722 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6723 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6724 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6725 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6729 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6738 for(;s->mb_y < s->mb_height; s->mb_y++){
6739 for(;s->mb_x < s->mb_width; s->mb_x++){
6740 int ret= decode_mb(h);
6745 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6751 if(++s->mb_x >= s->mb_width){
6753 if(++s->mb_y >= s->mb_height){
6754 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6755 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6759 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6766 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6767 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6768 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6772 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6779 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6782 return -1; //not reached
6785 static int decode_picture_timing(H264Context *h){
6786 MpegEncContext * const s = &h->s;
6787 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6788 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6789 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6791 if(h->sps.pic_struct_present_flag){
6792 unsigned int i, num_clock_ts;
6793 h->sei_pic_struct = get_bits(&s->gb, 4);
6795 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6798 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6800 for (i = 0 ; i < num_clock_ts ; i++){
6801 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6802 unsigned int full_timestamp_flag;
6803 skip_bits(&s->gb, 2); /* ct_type */
6804 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6805 skip_bits(&s->gb, 5); /* counting_type */
6806 full_timestamp_flag = get_bits(&s->gb, 1);
6807 skip_bits(&s->gb, 1); /* discontinuity_flag */
6808 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6809 skip_bits(&s->gb, 8); /* n_frames */
6810 if(full_timestamp_flag){
6811 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6812 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6813 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6815 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6816 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6817 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6818 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6819 if(get_bits(&s->gb, 1)) /* hours_flag */
6820 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6824 if(h->sps.time_offset_length > 0)
6825 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6832 static int decode_unregistered_user_data(H264Context *h, int size){
6833 MpegEncContext * const s = &h->s;
6834 uint8_t user_data[16+256];
6840 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6841 user_data[i]= get_bits(&s->gb, 8);
6845 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6846 if(e==1 && build>=0)
6847 h->x264_build= build;
6849 if(s->avctx->debug & FF_DEBUG_BUGS)
6850 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6853 skip_bits(&s->gb, 8);
6858 static int decode_recovery_point(H264Context *h){
6859 MpegEncContext * const s = &h->s;
6861 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6862 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6867 static int decode_sei(H264Context *h){
6868 MpegEncContext * const s = &h->s;
6870 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6875 type+= show_bits(&s->gb, 8);
6876 }while(get_bits(&s->gb, 8) == 255);
6880 size+= show_bits(&s->gb, 8);
6881 }while(get_bits(&s->gb, 8) == 255);
6884 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6885 if(decode_picture_timing(h) < 0)
6888 case SEI_TYPE_USER_DATA_UNREGISTERED:
6889 if(decode_unregistered_user_data(h, size) < 0)
6892 case SEI_TYPE_RECOVERY_POINT:
6893 if(decode_recovery_point(h) < 0)
6897 skip_bits(&s->gb, 8*size);
6900 //FIXME check bits here
6901 align_get_bits(&s->gb);
6907 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6908 MpegEncContext * const s = &h->s;
6910 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6912 if(cpb_count > 32U){
6913 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6917 get_bits(&s->gb, 4); /* bit_rate_scale */
6918 get_bits(&s->gb, 4); /* cpb_size_scale */
6919 for(i=0; i<cpb_count; i++){
6920 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6921 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6922 get_bits1(&s->gb); /* cbr_flag */
6924 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6925 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6926 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6927 sps->time_offset_length = get_bits(&s->gb, 5);
6931 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6932 MpegEncContext * const s = &h->s;
6933 int aspect_ratio_info_present_flag;
6934 unsigned int aspect_ratio_idc;
6936 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6938 if( aspect_ratio_info_present_flag ) {
6939 aspect_ratio_idc= get_bits(&s->gb, 8);
6940 if( aspect_ratio_idc == EXTENDED_SAR ) {
6941 sps->sar.num= get_bits(&s->gb, 16);
6942 sps->sar.den= get_bits(&s->gb, 16);
6943 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6944 sps->sar= pixel_aspect[aspect_ratio_idc];
6946 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6953 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6955 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6956 get_bits1(&s->gb); /* overscan_appropriate_flag */
6959 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6960 get_bits(&s->gb, 3); /* video_format */
6961 get_bits1(&s->gb); /* video_full_range_flag */
6962 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6963 get_bits(&s->gb, 8); /* colour_primaries */
6964 get_bits(&s->gb, 8); /* transfer_characteristics */
6965 get_bits(&s->gb, 8); /* matrix_coefficients */
6969 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6970 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6971 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6974 sps->timing_info_present_flag = get_bits1(&s->gb);
6975 if(sps->timing_info_present_flag){
6976 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6977 sps->time_scale = get_bits_long(&s->gb, 32);
6978 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6981 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6982 if(sps->nal_hrd_parameters_present_flag)
6983 if(decode_hrd_parameters(h, sps) < 0)
6985 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6986 if(sps->vcl_hrd_parameters_present_flag)
6987 if(decode_hrd_parameters(h, sps) < 0)
6989 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6990 get_bits1(&s->gb); /* low_delay_hrd_flag */
6991 sps->pic_struct_present_flag = get_bits1(&s->gb);
6993 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6994 if(sps->bitstream_restriction_flag){
6995 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6996 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6997 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6998 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6999 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7000 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7001 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7003 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7004 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7012 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7013 const uint8_t *jvt_list, const uint8_t *fallback_list){
7014 MpegEncContext * const s = &h->s;
7015 int i, last = 8, next = 8;
7016 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7017 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7018 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7020 for(i=0;i<size;i++){
7022 next = (last + get_se_golomb(&s->gb)) & 0xff;
7023 if(!i && !next){ /* matrix not written, we use the preset one */
7024 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7027 last = factors[scan[i]] = next ? next : last;
7031 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7032 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7033 MpegEncContext * const s = &h->s;
7034 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7035 const uint8_t *fallback[4] = {
7036 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7037 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7038 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7039 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7041 if(get_bits1(&s->gb)){
7042 sps->scaling_matrix_present |= is_sps;
7043 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7044 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7045 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7046 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7047 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7048 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7049 if(is_sps || pps->transform_8x8_mode){
7050 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7051 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7056 static inline int decode_seq_parameter_set(H264Context *h){
7057 MpegEncContext * const s = &h->s;
7058 int profile_idc, level_idc;
7059 unsigned int sps_id;
7063 profile_idc= get_bits(&s->gb, 8);
7064 get_bits1(&s->gb); //constraint_set0_flag
7065 get_bits1(&s->gb); //constraint_set1_flag
7066 get_bits1(&s->gb); //constraint_set2_flag
7067 get_bits1(&s->gb); //constraint_set3_flag
7068 get_bits(&s->gb, 4); // reserved
7069 level_idc= get_bits(&s->gb, 8);
7070 sps_id= get_ue_golomb_31(&s->gb);
7072 if(sps_id >= MAX_SPS_COUNT) {
7073 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7076 sps= av_mallocz(sizeof(SPS));
7080 sps->profile_idc= profile_idc;
7081 sps->level_idc= level_idc;
7083 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7084 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7085 sps->scaling_matrix_present = 0;
7087 if(sps->profile_idc >= 100){ //high profile
7088 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7089 if(sps->chroma_format_idc == 3)
7090 sps->residual_color_transform_flag = get_bits1(&s->gb);
7091 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7092 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7093 sps->transform_bypass = get_bits1(&s->gb);
7094 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7096 sps->chroma_format_idc= 1;
7099 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7100 sps->poc_type= get_ue_golomb_31(&s->gb);
7102 if(sps->poc_type == 0){ //FIXME #define
7103 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7104 } else if(sps->poc_type == 1){//FIXME #define
7105 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7106 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7107 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7108 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7110 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7111 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7115 for(i=0; i<sps->poc_cycle_length; i++)
7116 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7117 }else if(sps->poc_type != 2){
7118 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7122 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7123 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7124 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7127 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7128 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7129 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7130 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7131 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7132 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7136 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7137 if(!sps->frame_mbs_only_flag)
7138 sps->mb_aff= get_bits1(&s->gb);
7142 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7144 #ifndef ALLOW_INTERLACE
7146 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7148 sps->crop= get_bits1(&s->gb);
7150 sps->crop_left = get_ue_golomb(&s->gb);
7151 sps->crop_right = get_ue_golomb(&s->gb);
7152 sps->crop_top = get_ue_golomb(&s->gb);
7153 sps->crop_bottom= get_ue_golomb(&s->gb);
7154 if(sps->crop_left || sps->crop_top){
7155 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7157 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7158 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7164 sps->crop_bottom= 0;
7167 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7168 if( sps->vui_parameters_present_flag )
7169 decode_vui_parameters(h, sps);
7171 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7172 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7173 sps_id, sps->profile_idc, sps->level_idc,
7175 sps->ref_frame_count,
7176 sps->mb_width, sps->mb_height,
7177 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7178 sps->direct_8x8_inference_flag ? "8B8" : "",
7179 sps->crop_left, sps->crop_right,
7180 sps->crop_top, sps->crop_bottom,
7181 sps->vui_parameters_present_flag ? "VUI" : "",
7182 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7186 av_free(h->sps_buffers[sps_id]);
7187 h->sps_buffers[sps_id]= sps;
7195 build_qp_table(PPS *pps, int t, int index)
7198 for(i = 0; i < 52; i++)
7199 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7202 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7203 MpegEncContext * const s = &h->s;
7204 unsigned int pps_id= get_ue_golomb(&s->gb);
7207 if(pps_id >= MAX_PPS_COUNT) {
7208 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7212 pps= av_mallocz(sizeof(PPS));
7215 pps->sps_id= get_ue_golomb_31(&s->gb);
7216 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7217 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7221 pps->cabac= get_bits1(&s->gb);
7222 pps->pic_order_present= get_bits1(&s->gb);
7223 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7224 if(pps->slice_group_count > 1 ){
7225 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7226 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7227 switch(pps->mb_slice_group_map_type){
7230 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7231 | run_length[ i ] |1 |ue(v) |
7236 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7238 | top_left_mb[ i ] |1 |ue(v) |
7239 | bottom_right_mb[ i ] |1 |ue(v) |
7247 | slice_group_change_direction_flag |1 |u(1) |
7248 | slice_group_change_rate_minus1 |1 |ue(v) |
7253 | slice_group_id_cnt_minus1 |1 |ue(v) |
7254 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7256 | slice_group_id[ i ] |1 |u(v) |
7261 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7262 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7263 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7264 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7268 pps->weighted_pred= get_bits1(&s->gb);
7269 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7270 pps->init_qp= get_se_golomb(&s->gb) + 26;
7271 pps->init_qs= get_se_golomb(&s->gb) + 26;
7272 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7273 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7274 pps->constrained_intra_pred= get_bits1(&s->gb);
7275 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7277 pps->transform_8x8_mode= 0;
7278 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7279 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7280 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7282 if(get_bits_count(&s->gb) < bit_length){
7283 pps->transform_8x8_mode= get_bits1(&s->gb);
7284 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7285 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7287 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7290 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7291 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7292 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7293 h->pps.chroma_qp_diff= 1;
7295 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7296 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7297 pps_id, pps->sps_id,
7298 pps->cabac ? "CABAC" : "CAVLC",
7299 pps->slice_group_count,
7300 pps->ref_count[0], pps->ref_count[1],
7301 pps->weighted_pred ? "weighted" : "",
7302 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7303 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7304 pps->constrained_intra_pred ? "CONSTR" : "",
7305 pps->redundant_pic_cnt_present ? "REDU" : "",
7306 pps->transform_8x8_mode ? "8x8DCT" : ""
7310 av_free(h->pps_buffers[pps_id]);
7311 h->pps_buffers[pps_id]= pps;
7319 * Call decode_slice() for each context.
7321 * @param h h264 master context
7322 * @param context_count number of contexts to execute
7324 static void execute_decode_slices(H264Context *h, int context_count){
7325 MpegEncContext * const s = &h->s;
7326 AVCodecContext * const avctx= s->avctx;
7330 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7332 if(context_count == 1) {
7333 decode_slice(avctx, &h);
7335 for(i = 1; i < context_count; i++) {
7336 hx = h->thread_context[i];
7337 hx->s.error_recognition = avctx->error_recognition;
7338 hx->s.error_count = 0;
7341 avctx->execute(avctx, (void *)decode_slice,
7342 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7344 /* pull back stuff from slices to master context */
7345 hx = h->thread_context[context_count - 1];
7346 s->mb_x = hx->s.mb_x;
7347 s->mb_y = hx->s.mb_y;
7348 s->dropable = hx->s.dropable;
7349 s->picture_structure = hx->s.picture_structure;
7350 for(i = 1; i < context_count; i++)
7351 h->s.error_count += h->thread_context[i]->s.error_count;
7356 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7357 MpegEncContext * const s = &h->s;
7358 AVCodecContext * const avctx= s->avctx;
7360 H264Context *hx; ///< thread context
7361 int context_count = 0;
7363 h->max_contexts = avctx->thread_count;
7366 for(i=0; i<50; i++){
7367 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7370 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7371 h->current_slice = 0;
7372 if (!s->first_field)
7373 s->current_picture_ptr= NULL;
7385 if(buf_index >= buf_size) break;
7387 for(i = 0; i < h->nal_length_size; i++)
7388 nalsize = (nalsize << 8) | buf[buf_index++];
7389 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7394 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7399 // start code prefix search
7400 for(; buf_index + 3 < buf_size; buf_index++){
7401 // This should always succeed in the first iteration.
7402 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7406 if(buf_index+3 >= buf_size) break;
7411 hx = h->thread_context[context_count];
7413 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7414 if (ptr==NULL || dst_length < 0){
7417 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7419 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7421 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7422 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7425 if (h->is_avc && (nalsize != consumed)){
7426 int i, debug_level = AV_LOG_DEBUG;
7427 for (i = consumed; i < nalsize; i++)
7428 if (buf[buf_index+i])
7429 debug_level = AV_LOG_ERROR;
7430 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7434 buf_index += consumed;
7436 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7437 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7442 switch(hx->nal_unit_type){
7444 if (h->nal_unit_type != NAL_IDR_SLICE) {
7445 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7448 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7450 init_get_bits(&hx->s.gb, ptr, bit_length);
7452 hx->inter_gb_ptr= &hx->s.gb;
7453 hx->s.data_partitioning = 0;
7455 if((err = decode_slice_header(hx, h)))
7458 s->current_picture_ptr->key_frame |=
7459 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7460 (h->sei_recovery_frame_cnt >= 0);
7461 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7462 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7463 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7464 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7465 && avctx->skip_frame < AVDISCARD_ALL){
7466 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7467 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7468 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7469 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7475 init_get_bits(&hx->s.gb, ptr, bit_length);
7477 hx->inter_gb_ptr= NULL;
7478 hx->s.data_partitioning = 1;
7480 err = decode_slice_header(hx, h);
7483 init_get_bits(&hx->intra_gb, ptr, bit_length);
7484 hx->intra_gb_ptr= &hx->intra_gb;
7487 init_get_bits(&hx->inter_gb, ptr, bit_length);
7488 hx->inter_gb_ptr= &hx->inter_gb;
7490 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7491 && s->context_initialized
7493 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7494 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7495 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7496 && avctx->skip_frame < AVDISCARD_ALL)
7500 init_get_bits(&s->gb, ptr, bit_length);
7504 init_get_bits(&s->gb, ptr, bit_length);
7505 decode_seq_parameter_set(h);
7507 if(s->flags& CODEC_FLAG_LOW_DELAY)
7510 if(avctx->has_b_frames < 2)
7511 avctx->has_b_frames= !s->low_delay;
7514 init_get_bits(&s->gb, ptr, bit_length);
7516 decode_picture_parameter_set(h, bit_length);
7520 case NAL_END_SEQUENCE:
7521 case NAL_END_STREAM:
7522 case NAL_FILLER_DATA:
7524 case NAL_AUXILIARY_SLICE:
7527 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7530 if(context_count == h->max_contexts) {
7531 execute_decode_slices(h, context_count);
7536 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7538 /* Slice could not be decoded in parallel mode, copy down
7539 * NAL unit stuff to context 0 and restart. Note that
7540 * rbsp_buffer is not transferred, but since we no longer
7541 * run in parallel mode this should not be an issue. */
7542 h->nal_unit_type = hx->nal_unit_type;
7543 h->nal_ref_idc = hx->nal_ref_idc;
7549 execute_decode_slices(h, context_count);
7554 * returns the number of bytes consumed for building the current frame
7556 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7557 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7558 if(pos+10>buf_size) pos=buf_size; // oops ;)
7563 static int decode_frame(AVCodecContext *avctx,
7564 void *data, int *data_size,
7565 const uint8_t *buf, int buf_size)
7567 H264Context *h = avctx->priv_data;
7568 MpegEncContext *s = &h->s;
7569 AVFrame *pict = data;
7572 s->flags= avctx->flags;
7573 s->flags2= avctx->flags2;
7575 /* end of stream, output what is still in the buffers */
7576 if (buf_size == 0) {
7580 //FIXME factorize this with the output code below
7581 out = h->delayed_pic[0];
7583 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7584 if(h->delayed_pic[i]->poc < out->poc){
7585 out = h->delayed_pic[i];
7589 for(i=out_idx; h->delayed_pic[i]; i++)
7590 h->delayed_pic[i] = h->delayed_pic[i+1];
7593 *data_size = sizeof(AVFrame);
7594 *pict= *(AVFrame*)out;
7600 if(h->is_avc && !h->got_avcC) {
7601 int i, cnt, nalsize;
7602 unsigned char *p = avctx->extradata;
7603 if(avctx->extradata_size < 7) {
7604 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7608 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7611 /* sps and pps in the avcC always have length coded with 2 bytes,
7612 so put a fake nal_length_size = 2 while parsing them */
7613 h->nal_length_size = 2;
7614 // Decode sps from avcC
7615 cnt = *(p+5) & 0x1f; // Number of sps
7617 for (i = 0; i < cnt; i++) {
7618 nalsize = AV_RB16(p) + 2;
7619 if(decode_nal_units(h, p, nalsize) < 0) {
7620 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7625 // Decode pps from avcC
7626 cnt = *(p++); // Number of pps
7627 for (i = 0; i < cnt; i++) {
7628 nalsize = AV_RB16(p) + 2;
7629 if(decode_nal_units(h, p, nalsize) != nalsize) {
7630 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7635 // Now store right nal length size, that will be use to parse all other nals
7636 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7637 // Do not reparse avcC
7641 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7642 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7647 buf_index=decode_nal_units(h, buf, buf_size);
7651 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7652 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7653 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7657 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7658 Picture *out = s->current_picture_ptr;
7659 Picture *cur = s->current_picture_ptr;
7660 int i, pics, cross_idr, out_of_order, out_idx;
7664 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7665 s->current_picture_ptr->pict_type= s->pict_type;
7667 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7668 ff_vdpau_h264_set_reference_frames(s);
7671 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7672 h->prev_poc_msb= h->poc_msb;
7673 h->prev_poc_lsb= h->poc_lsb;
7675 h->prev_frame_num_offset= h->frame_num_offset;
7676 h->prev_frame_num= h->frame_num;
7678 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7679 ff_vdpau_h264_picture_complete(s);
7682 * FIXME: Error handling code does not seem to support interlaced
7683 * when slices span multiple rows
7684 * The ff_er_add_slice calls don't work right for bottom
7685 * fields; they cause massive erroneous error concealing
7686 * Error marking covers both fields (top and bottom).
7687 * This causes a mismatched s->error_count
7688 * and a bad error table. Further, the error count goes to
7689 * INT_MAX when called for bottom field, because mb_y is
7690 * past end by one (callers fault) and resync_mb_y != 0
7691 * causes problems for the first MB line, too.
7697 h->sei_recovery_frame_cnt = -1;
7698 h->sei_dpb_output_delay = 0;
7699 h->sei_cpb_removal_delay = -1;
7701 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7702 /* Wait for second field. */
7706 cur->repeat_pict = 0;
7708 /* Signal interlacing information externally. */
7709 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7710 if(h->sps.pic_struct_present_flag){
7711 switch (h->sei_pic_struct)
7713 case SEI_PIC_STRUCT_FRAME:
7714 cur->interlaced_frame = 0;
7716 case SEI_PIC_STRUCT_TOP_FIELD:
7717 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7718 case SEI_PIC_STRUCT_TOP_BOTTOM:
7719 case SEI_PIC_STRUCT_BOTTOM_TOP:
7720 cur->interlaced_frame = 1;
7722 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7723 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7724 // Signal the possibility of telecined film externally (pic_struct 5,6)
7725 // From these hints, let the applications decide if they apply deinterlacing.
7726 cur->repeat_pict = 1;
7727 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7729 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7730 // Force progressive here, as doubling interlaced frame is a bad idea.
7731 cur->interlaced_frame = 0;
7732 cur->repeat_pict = 2;
7734 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7735 cur->interlaced_frame = 0;
7736 cur->repeat_pict = 4;
7740 /* Derive interlacing flag from used decoding process. */
7741 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7744 if (cur->field_poc[0] != cur->field_poc[1]){
7745 /* Derive top_field_first from field pocs. */
7746 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7748 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7749 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7750 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7751 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7752 cur->top_field_first = 1;
7754 cur->top_field_first = 0;
7756 /* Most likely progressive */
7757 cur->top_field_first = 0;
7761 //FIXME do something with unavailable reference frames
7763 /* Sort B-frames into display order */
7765 if(h->sps.bitstream_restriction_flag
7766 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7767 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7771 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7772 && !h->sps.bitstream_restriction_flag){
7773 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7778 while(h->delayed_pic[pics]) pics++;
7780 assert(pics <= MAX_DELAYED_PIC_COUNT);
7782 h->delayed_pic[pics++] = cur;
7783 if(cur->reference == 0)
7784 cur->reference = DELAYED_PIC_REF;
7786 out = h->delayed_pic[0];
7788 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7789 if(h->delayed_pic[i]->poc < out->poc){
7790 out = h->delayed_pic[i];
7793 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7795 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7797 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7799 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7801 ((!cross_idr && out->poc > h->outputed_poc + 2)
7802 || cur->pict_type == FF_B_TYPE)))
7805 s->avctx->has_b_frames++;
7808 if(out_of_order || pics > s->avctx->has_b_frames){
7809 out->reference &= ~DELAYED_PIC_REF;
7810 for(i=out_idx; h->delayed_pic[i]; i++)
7811 h->delayed_pic[i] = h->delayed_pic[i+1];
7813 if(!out_of_order && pics > s->avctx->has_b_frames){
7814 *data_size = sizeof(AVFrame);
7816 h->outputed_poc = out->poc;
7817 *pict= *(AVFrame*)out;
7819 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7824 assert(pict->data[0] || !*data_size);
7825 ff_print_debug_info(s, pict);
7826 //printf("out %d\n", (int)pict->data[0]);
7829 /* Return the Picture timestamp as the frame number */
7830 /* we subtract 1 because it is added on utils.c */
7831 avctx->frame_number = s->picture_number - 1;
7833 return get_consumed_bytes(s, buf_index, buf_size);
7836 static inline void fill_mb_avail(H264Context *h){
7837 MpegEncContext * const s = &h->s;
7838 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7841 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7842 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7843 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7849 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7850 h->mb_avail[4]= 1; //FIXME move out
7851 h->mb_avail[5]= 0; //FIXME move out
7859 #define SIZE (COUNT*40)
7865 // int int_temp[10000];
7867 AVCodecContext avctx;
7869 dsputil_init(&dsp, &avctx);
7871 init_put_bits(&pb, temp, SIZE);
7872 printf("testing unsigned exp golomb\n");
7873 for(i=0; i<COUNT; i++){
7875 set_ue_golomb(&pb, i);
7876 STOP_TIMER("set_ue_golomb");
7878 flush_put_bits(&pb);
7880 init_get_bits(&gb, temp, 8*SIZE);
7881 for(i=0; i<COUNT; i++){
7884 s= show_bits(&gb, 24);
7887 j= get_ue_golomb(&gb);
7889 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7892 STOP_TIMER("get_ue_golomb");
7896 init_put_bits(&pb, temp, SIZE);
7897 printf("testing signed exp golomb\n");
7898 for(i=0; i<COUNT; i++){
7900 set_se_golomb(&pb, i - COUNT/2);
7901 STOP_TIMER("set_se_golomb");
7903 flush_put_bits(&pb);
7905 init_get_bits(&gb, temp, 8*SIZE);
7906 for(i=0; i<COUNT; i++){
7909 s= show_bits(&gb, 24);
7912 j= get_se_golomb(&gb);
7913 if(j != i - COUNT/2){
7914 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7917 STOP_TIMER("get_se_golomb");
7921 printf("testing 4x4 (I)DCT\n");
7924 uint8_t src[16], ref[16];
7925 uint64_t error= 0, max_error=0;
7927 for(i=0; i<COUNT; i++){
7929 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7930 for(j=0; j<16; j++){
7931 ref[j]= random()%255;
7932 src[j]= random()%255;
7935 h264_diff_dct_c(block, src, ref, 4);
7938 for(j=0; j<16; j++){
7939 // printf("%d ", block[j]);
7940 block[j]= block[j]*4;
7941 if(j&1) block[j]= (block[j]*4 + 2)/5;
7942 if(j&4) block[j]= (block[j]*4 + 2)/5;
7946 s->dsp.h264_idct_add(ref, block, 4);
7947 /* for(j=0; j<16; j++){
7948 printf("%d ", ref[j]);
7952 for(j=0; j<16; j++){
7953 int diff= FFABS(src[j] - ref[j]);
7956 max_error= FFMAX(max_error, diff);
7959 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7960 printf("testing quantizer\n");
7961 for(qp=0; qp<52; qp++){
7963 src1_block[i]= src2_block[i]= random()%255;
7966 printf("Testing NAL layer\n");
7968 uint8_t bitstream[COUNT];
7969 uint8_t nal[COUNT*2];
7971 memset(&h, 0, sizeof(H264Context));
7973 for(i=0; i<COUNT; i++){
7981 for(j=0; j<COUNT; j++){
7982 bitstream[j]= (random() % 255) + 1;
7985 for(j=0; j<zeros; j++){
7986 int pos= random() % COUNT;
7987 while(bitstream[pos] == 0){
7996 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7998 printf("encoding failed\n");
8002 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8006 if(out_length != COUNT){
8007 printf("incorrect length %d %d\n", out_length, COUNT);
8011 if(consumed != nal_length){
8012 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8016 if(memcmp(bitstream, out, COUNT)){
8017 printf("mismatch\n");
8023 printf("Testing RBSP\n");
8031 static av_cold int decode_end(AVCodecContext *avctx)
8033 H264Context *h = avctx->priv_data;
8034 MpegEncContext *s = &h->s;
8037 av_freep(&h->rbsp_buffer[0]);
8038 av_freep(&h->rbsp_buffer[1]);
8039 free_tables(h); //FIXME cleanup init stuff perhaps
8041 for(i = 0; i < MAX_SPS_COUNT; i++)
8042 av_freep(h->sps_buffers + i);
8044 for(i = 0; i < MAX_PPS_COUNT; i++)
8045 av_freep(h->pps_buffers + i);
8049 // memset(h, 0, sizeof(H264Context));
8055 AVCodec h264_decoder = {
8059 sizeof(H264Context),
8064 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8066 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8069 #if CONFIG_H264_VDPAU_DECODER
8070 AVCodec h264_vdpau_decoder = {
8074 sizeof(H264Context),
8079 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8081 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8085 #if CONFIG_SVQ3_DECODER