2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 const uint8_t ff_rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 const uint8_t ff_div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
209 if(IS_INTRA(mb_type)){
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
221 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
222 h->topleft_samples_available&= 0xDF5F;
223 h->left_samples_available&= 0x5F5F;
227 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
228 h->topleft_samples_available&= 0x7FFF;
230 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
231 h->topright_samples_available&= 0xFBFF;
233 if(IS_INTRA4x4(mb_type)){
234 if(IS_INTRA4x4(top_type)){
235 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
236 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
237 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
238 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
241 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
246 h->intra4x4_pred_mode_cache[4+8*0]=
247 h->intra4x4_pred_mode_cache[5+8*0]=
248 h->intra4x4_pred_mode_cache[6+8*0]=
249 h->intra4x4_pred_mode_cache[7+8*0]= pred;
252 if(IS_INTRA4x4(left_type[i])){
253 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
254 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
257 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
262 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
263 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
278 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
280 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
281 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
282 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
283 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
285 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
286 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
288 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
289 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
292 h->non_zero_count_cache[4+8*0]=
293 h->non_zero_count_cache[5+8*0]=
294 h->non_zero_count_cache[6+8*0]=
295 h->non_zero_count_cache[7+8*0]=
297 h->non_zero_count_cache[1+8*0]=
298 h->non_zero_count_cache[2+8*0]=
300 h->non_zero_count_cache[1+8*3]=
301 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
305 for (i=0; i<2; i++) {
307 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
308 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
309 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
310 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
312 h->non_zero_count_cache[3+8*1 + 2*8*i]=
313 h->non_zero_count_cache[3+8*2 + 2*8*i]=
314 h->non_zero_count_cache[0+8*1 + 8*i]=
315 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 h->top_cbp = h->cbp_table[top_xy];
323 } else if(IS_INTRA(mb_type)) {
330 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
331 } else if(IS_INTRA(mb_type)) {
337 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
340 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
345 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
347 for(list=0; list<h->list_count; list++){
348 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
349 /*if(!h->mv_cache_clean[list]){
350 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
351 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
352 h->mv_cache_clean[list]= 1;
356 h->mv_cache_clean[list]= 0;
358 if(USES_LIST(top_type, list)){
359 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
360 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
361 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
362 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
363 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
364 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
365 h->ref_cache[list][scan8[0] + 0 - 1*8]=
366 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
367 h->ref_cache[list][scan8[0] + 2 - 1*8]=
368 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
370 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
371 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
372 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
373 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
374 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
378 int cache_idx = scan8[0] - 1 + i*2*8;
379 if(USES_LIST(left_type[i], list)){
380 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
381 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
382 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
383 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
384 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
385 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
387 *(uint32_t*)h->mv_cache [list][cache_idx ]=
388 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
389 h->ref_cache[list][cache_idx ]=
390 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
394 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
397 if(USES_LIST(topleft_type, list)){
398 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
399 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
400 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
401 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
403 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
404 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
407 if(USES_LIST(topright_type, list)){
408 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
409 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
410 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
411 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
413 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
414 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
417 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
420 h->ref_cache[list][scan8[5 ]+1] =
421 h->ref_cache[list][scan8[7 ]+1] =
422 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
423 h->ref_cache[list][scan8[4 ]] =
424 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
425 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
426 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
427 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
428 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
429 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
432 /* XXX beurk, Load mvd */
433 if(USES_LIST(top_type, list)){
434 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
435 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
436 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
437 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
438 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
440 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
441 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
442 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
443 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
445 if(USES_LIST(left_type[0], list)){
446 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
447 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
450 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
451 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
453 if(USES_LIST(left_type[1], list)){
454 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
458 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
461 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
462 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
463 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
464 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
465 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
467 if(h->slice_type_nos == FF_B_TYPE){
468 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
470 if(IS_DIRECT(top_type)){
471 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
472 }else if(IS_8X8(top_type)){
473 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
474 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
475 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
477 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
480 if(IS_DIRECT(left_type[0]))
481 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
482 else if(IS_8X8(left_type[0]))
483 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
485 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
487 if(IS_DIRECT(left_type[1]))
488 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
489 else if(IS_8X8(left_type[1]))
490 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
492 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
498 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
499 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
500 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
501 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
502 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
503 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
504 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
505 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
506 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
507 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
509 #define MAP_F2F(idx, mb_type)\
510 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
511 h->ref_cache[list][idx] <<= 1;\
512 h->mv_cache[list][idx][1] /= 2;\
513 h->mvd_cache[list][idx][1] /= 2;\
518 #define MAP_F2F(idx, mb_type)\
519 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
520 h->ref_cache[list][idx] >>= 1;\
521 h->mv_cache[list][idx][1] <<= 1;\
522 h->mvd_cache[list][idx][1] <<= 1;\
532 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
535 static inline void write_back_intra_pred_mode(H264Context *h){
536 const int mb_xy= h->mb_xy;
538 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
539 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
540 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
541 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
542 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
543 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
544 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
548 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
550 static inline int check_intra4x4_pred_mode(H264Context *h){
551 MpegEncContext * const s = &h->s;
552 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
553 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
556 if(!(h->top_samples_available&0x8000)){
558 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
560 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
563 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
568 if(!(h->left_samples_available&0x8000)){
570 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
572 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
581 } //FIXME cleanup like next
584 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
586 static inline int check_intra_pred_mode(H264Context *h, int mode){
587 MpegEncContext * const s = &h->s;
588 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
589 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
592 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
596 if(!(h->top_samples_available&0x8000)){
599 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
604 if(!(h->left_samples_available&0x8000)){
607 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
616 * gets the predicted intra4x4 prediction mode.
618 static inline int pred_intra_mode(H264Context *h, int n){
619 const int index8= scan8[n];
620 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
621 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
622 const int min= FFMIN(left, top);
624 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
626 if(min<0) return DC_PRED;
630 static inline void write_back_non_zero_count(H264Context *h){
631 const int mb_xy= h->mb_xy;
633 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
634 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
635 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
636 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
637 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
638 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
639 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
641 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
642 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
643 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
645 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
646 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
647 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
651 * gets the predicted number of non-zero coefficients.
652 * @param n block index
654 static inline int pred_non_zero_count(H264Context *h, int n){
655 const int index8= scan8[n];
656 const int left= h->non_zero_count_cache[index8 - 1];
657 const int top = h->non_zero_count_cache[index8 - 8];
660 if(i<64) i= (i+1)>>1;
662 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
667 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
668 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
669 MpegEncContext *s = &h->s;
671 /* there is no consistent mapping of mvs to neighboring locations that will
672 * make mbaff happy, so we can't move all this logic to fill_caches */
674 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
676 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
677 *C = h->mv_cache[list][scan8[0]-2];
680 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
681 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
682 if(IS_INTERLACED(mb_types[topright_xy])){
683 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
684 const int x4 = X4, y4 = Y4;\
685 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
686 if(!USES_LIST(mb_type,list))\
687 return LIST_NOT_USED;\
688 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
689 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
690 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
691 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
693 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
696 if(topright_ref == PART_NOT_AVAILABLE
697 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
698 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
700 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
701 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
704 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
706 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
707 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
713 if(topright_ref != PART_NOT_AVAILABLE){
714 *C= h->mv_cache[list][ i - 8 + part_width ];
717 tprintf(s->avctx, "topright MV not available\n");
719 *C= h->mv_cache[list][ i - 8 - 1 ];
720 return h->ref_cache[list][ i - 8 - 1 ];
725 * gets the predicted MV.
726 * @param n the block index
727 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
728 * @param mx the x component of the predicted motion vector
729 * @param my the y component of the predicted motion vector
731 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
732 const int index8= scan8[n];
733 const int top_ref= h->ref_cache[list][ index8 - 8 ];
734 const int left_ref= h->ref_cache[list][ index8 - 1 ];
735 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
736 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
738 int diagonal_ref, match_count;
740 assert(part_width==1 || part_width==2 || part_width==4);
750 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
751 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
752 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
753 if(match_count > 1){ //most common
754 *mx= mid_pred(A[0], B[0], C[0]);
755 *my= mid_pred(A[1], B[1], C[1]);
756 }else if(match_count==1){
760 }else if(top_ref==ref){
768 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
772 *mx= mid_pred(A[0], B[0], C[0]);
773 *my= mid_pred(A[1], B[1], C[1]);
777 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
781 * gets the directionally predicted 16x8 MV.
782 * @param n the block index
783 * @param mx the x component of the predicted motion vector
784 * @param my the y component of the predicted motion vector
786 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
788 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
789 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
791 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
799 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
800 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
802 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
812 pred_motion(h, n, 4, list, ref, mx, my);
816 * gets the directionally predicted 8x16 MV.
817 * @param n the block index
818 * @param mx the x component of the predicted motion vector
819 * @param my the y component of the predicted motion vector
821 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
823 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
824 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
826 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
839 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
841 if(diagonal_ref == ref){
849 pred_motion(h, n, 2, list, ref, mx, my);
852 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
853 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
854 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
856 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
858 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
859 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
860 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
866 pred_motion(h, 0, 4, 0, 0, mx, my);
871 static inline void direct_dist_scale_factor(H264Context * const h){
872 MpegEncContext * const s = &h->s;
873 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
874 const int poc1 = h->ref_list[1][0].poc;
876 for(i=0; i<h->ref_count[0]; i++){
877 int poc0 = h->ref_list[0][i].poc;
878 int td = av_clip(poc1 - poc0, -128, 127);
879 if(td == 0 || h->ref_list[0][i].long_ref){
880 h->dist_scale_factor[i] = 256;
882 int tb = av_clip(poc - poc0, -128, 127);
883 int tx = (16384 + (FFABS(td) >> 1)) / td;
884 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
888 for(i=0; i<h->ref_count[0]; i++){
889 h->dist_scale_factor_field[2*i] =
890 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
894 static inline void direct_ref_list_init(H264Context * const h){
895 MpegEncContext * const s = &h->s;
896 Picture * const ref1 = &h->ref_list[1][0];
897 Picture * const cur = s->current_picture_ptr;
899 int sidx= s->picture_structure&1;
900 int ref1sidx= ref1->reference&1;
901 for(list=0; list<2; list++){
902 cur->ref_count[sidx][list] = h->ref_count[list];
903 for(j=0; j<h->ref_count[list]; j++)
904 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
906 if(s->picture_structure == PICT_FRAME){
907 memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
908 memcpy(cur->ref_poc [0], cur->ref_poc [1], sizeof(cur->ref_poc [0]));
910 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
912 for(list=0; list<2; list++){
913 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
914 int poc = ref1->ref_poc[ref1sidx][list][i];
915 if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
916 poc= (poc&~3) + s->picture_structure;
917 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
918 for(j=0; j<h->ref_count[list]; j++)
919 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
920 h->map_col_to_list0[list][i] = j;
926 for(list=0; list<2; list++){
927 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
928 j = h->map_col_to_list0[list][i];
929 h->map_col_to_list0_field[list][2*i] = 2*j;
930 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
936 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
937 MpegEncContext * const s = &h->s;
938 int b8_stride = h->b8_stride;
939 int b4_stride = h->b_stride;
940 int mb_xy = h->mb_xy;
942 const int16_t (*l1mv0)[2], (*l1mv1)[2];
943 const int8_t *l1ref0, *l1ref1;
944 const int is_b8x8 = IS_8X8(*mb_type);
945 unsigned int sub_mb_type;
948 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
950 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
951 if(h->ref_list[1][0].reference == PICT_FRAME){ // AFL/AFR/FR/FL -> AFL
952 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL
953 int cur_poc = s->current_picture_ptr->poc;
954 int *col_poc = h->ref_list[1]->field_poc;
955 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
956 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
959 }else if(!(s->picture_structure & h->ref_list[1][0].reference)){// FL -> FL & differ parity
960 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
961 mb_xy += s->mb_stride*fieldoff;
964 }else{ // AFL/AFR/FR/FL -> AFR/FR
965 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
966 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
967 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
968 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
971 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
972 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
973 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
975 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
976 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
978 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
979 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
981 }else{ // AFR/FR -> AFR/FR
984 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
985 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
986 /* FIXME save sub mb types from previous frames (or derive from MVs)
987 * so we know exactly what block size to use */
988 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
989 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
990 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
991 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
992 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
994 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
995 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1000 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1001 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1002 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1003 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1006 l1ref0 += h->b8_stride;
1007 l1ref1 += h->b8_stride;
1008 l1mv0 += 2*b4_stride;
1009 l1mv1 += 2*b4_stride;
1013 if(h->direct_spatial_mv_pred){
1018 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1020 /* ref = min(neighbors) */
1021 for(list=0; list<2; list++){
1022 int refa = h->ref_cache[list][scan8[0] - 1];
1023 int refb = h->ref_cache[list][scan8[0] - 8];
1024 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1025 if(refc == PART_NOT_AVAILABLE)
1026 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1027 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1032 if(ref[0] < 0 && ref[1] < 0){
1033 ref[0] = ref[1] = 0;
1034 mv[0][0] = mv[0][1] =
1035 mv[1][0] = mv[1][1] = 0;
1037 for(list=0; list<2; list++){
1039 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1041 mv[list][0] = mv[list][1] = 0;
1047 *mb_type &= ~MB_TYPE_L1;
1048 sub_mb_type &= ~MB_TYPE_L1;
1049 }else if(ref[0] < 0){
1051 *mb_type &= ~MB_TYPE_L0;
1052 sub_mb_type &= ~MB_TYPE_L0;
1055 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1056 for(i8=0; i8<4; i8++){
1059 int xy8 = x8+y8*b8_stride;
1060 int xy4 = 3*x8+y8*b4_stride;
1063 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1065 h->sub_mb_type[i8] = sub_mb_type;
1067 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1068 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1069 if(!IS_INTRA(mb_type_col[y8])
1070 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1071 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1073 a= pack16to32(mv[0][0],mv[0][1]);
1075 b= pack16to32(mv[1][0],mv[1][1]);
1077 a= pack16to32(mv[0][0],mv[0][1]);
1078 b= pack16to32(mv[1][0],mv[1][1]);
1080 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1081 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1083 }else if(IS_16X16(*mb_type)){
1086 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1087 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1088 if(!IS_INTRA(mb_type_col[0])
1089 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1090 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1091 && (h->x264_build>33 || !h->x264_build)))){
1093 a= pack16to32(mv[0][0],mv[0][1]);
1095 b= pack16to32(mv[1][0],mv[1][1]);
1097 a= pack16to32(mv[0][0],mv[0][1]);
1098 b= pack16to32(mv[1][0],mv[1][1]);
1100 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1101 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1103 for(i8=0; i8<4; i8++){
1104 const int x8 = i8&1;
1105 const int y8 = i8>>1;
1107 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1109 h->sub_mb_type[i8] = sub_mb_type;
1111 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1112 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1113 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1114 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1118 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1119 && (h->x264_build>33 || !h->x264_build)))){
1120 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1121 if(IS_SUB_8X8(sub_mb_type)){
1122 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1123 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1125 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1127 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1130 for(i4=0; i4<4; i4++){
1131 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1132 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1134 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1136 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1142 }else{ /* direct temporal mv pred */
1143 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1144 const int *dist_scale_factor = h->dist_scale_factor;
1146 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1147 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1148 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1149 dist_scale_factor = h->dist_scale_factor_field;
1151 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1152 /* FIXME assumes direct_8x8_inference == 1 */
1153 int y_shift = 2*!IS_INTERLACED(*mb_type);
1154 int ref_shift= FRAME_MBAFF ? y_shift : 1;
1156 for(i8=0; i8<4; i8++){
1157 const int x8 = i8&1;
1158 const int y8 = i8>>1;
1160 const int16_t (*l1mv)[2]= l1mv0;
1162 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1164 h->sub_mb_type[i8] = sub_mb_type;
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1167 if(IS_INTRA(mb_type_col[y8])){
1168 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1169 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1170 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 ref0 = l1ref0[x8 + y8*b8_stride];
1176 ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
1178 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
1181 scale = dist_scale_factor[ref0];
1182 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1185 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1186 int my_col = (mv_col[1]<<y_shift)/2;
1187 int mx = (scale * mv_col[0] + 128) >> 8;
1188 int my = (scale * my_col + 128) >> 8;
1189 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1190 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1196 /* one-to-one mv scaling */
1198 if(IS_16X16(*mb_type)){
1201 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1202 if(IS_INTRA(mb_type_col[0])){
1205 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1206 : map_col_to_list0[1][l1ref1[0]];
1207 const int scale = dist_scale_factor[ref0];
1208 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1210 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1211 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1213 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1214 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1216 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1217 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1218 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1220 for(i8=0; i8<4; i8++){
1221 const int x8 = i8&1;
1222 const int y8 = i8>>1;
1224 const int16_t (*l1mv)[2]= l1mv0;
1226 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1228 h->sub_mb_type[i8] = sub_mb_type;
1229 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1230 if(IS_INTRA(mb_type_col[0])){
1231 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1232 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1233 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1237 ref0 = l1ref0[x8 + y8*b8_stride];
1239 ref0 = map_col_to_list0[0][ref0];
1241 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]];
1244 scale = dist_scale_factor[ref0];
1246 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1247 if(IS_SUB_8X8(sub_mb_type)){
1248 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1249 int mx = (scale * mv_col[0] + 128) >> 8;
1250 int my = (scale * mv_col[1] + 128) >> 8;
1251 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1252 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1254 for(i4=0; i4<4; i4++){
1255 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1256 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1257 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1258 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1259 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1260 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 static inline void write_back_motion(H264Context *h, int mb_type){
1268 MpegEncContext * const s = &h->s;
1269 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1270 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1273 if(!USES_LIST(mb_type, 0))
1274 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1276 for(list=0; list<h->list_count; list++){
1278 if(!USES_LIST(mb_type, list))
1282 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1283 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1285 if( h->pps.cabac ) {
1286 if(IS_SKIP(mb_type))
1287 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1290 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1291 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1296 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1297 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1298 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1299 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1300 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1304 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1305 if(IS_8X8(mb_type)){
1306 uint8_t *direct_table = &h->direct_table[b8_xy];
1307 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1308 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1309 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1315 * Decodes a network abstraction layer unit.
1316 * @param consumed is the number of bytes used as input
1317 * @param length is the length of the array
1318 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1319 * @returns decoded bytes, might be src+1 if no escapes
1321 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1326 // src[0]&0x80; //forbidden bit
1327 h->nal_ref_idc= src[0]>>5;
1328 h->nal_unit_type= src[0]&0x1F;
1332 for(i=0; i<length; i++)
1333 printf("%2X ", src[i]);
1335 for(i=0; i+1<length; i+=2){
1336 if(src[i]) continue;
1337 if(i>0 && src[i-1]==0) i--;
1338 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1340 /* startcode, so we must be past the end */
1347 if(i>=length-1){ //no escaped 0
1348 *dst_length= length;
1349 *consumed= length+1; //+1 for the header
1353 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1354 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1355 dst= h->rbsp_buffer[bufidx];
1361 //printf("decoding esc\n");
1364 //remove escapes (very rare 1:2^22)
1365 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1366 if(src[si+2]==3){ //escape
1371 }else //next start code
1375 dst[di++]= src[si++];
1379 *consumed= si + 1;//+1 for the header
1380 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1385 * identifies the exact end of the bitstream
1386 * @return the length of the trailing, or 0 if damaged
1388 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1392 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1402 * IDCT transforms the 16 dc values and dequantizes them.
1403 * @param qp quantization parameter
1405 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1408 int temp[16]; //FIXME check if this is a good idea
1409 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1410 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1412 //memset(block, 64, 2*256);
1415 const int offset= y_offset[i];
1416 const int z0= block[offset+stride*0] + block[offset+stride*4];
1417 const int z1= block[offset+stride*0] - block[offset+stride*4];
1418 const int z2= block[offset+stride*1] - block[offset+stride*5];
1419 const int z3= block[offset+stride*1] + block[offset+stride*5];
1428 const int offset= x_offset[i];
1429 const int z0= temp[4*0+i] + temp[4*2+i];
1430 const int z1= temp[4*0+i] - temp[4*2+i];
1431 const int z2= temp[4*1+i] - temp[4*3+i];
1432 const int z3= temp[4*1+i] + temp[4*3+i];
1434 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1435 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1436 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1437 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1443 * DCT transforms the 16 dc values.
1444 * @param qp quantization parameter ??? FIXME
1446 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1447 // const int qmul= dequant_coeff[qp][0];
1449 int temp[16]; //FIXME check if this is a good idea
1450 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1451 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1454 const int offset= y_offset[i];
1455 const int z0= block[offset+stride*0] + block[offset+stride*4];
1456 const int z1= block[offset+stride*0] - block[offset+stride*4];
1457 const int z2= block[offset+stride*1] - block[offset+stride*5];
1458 const int z3= block[offset+stride*1] + block[offset+stride*5];
1467 const int offset= x_offset[i];
1468 const int z0= temp[4*0+i] + temp[4*2+i];
1469 const int z1= temp[4*0+i] - temp[4*2+i];
1470 const int z2= temp[4*1+i] - temp[4*3+i];
1471 const int z3= temp[4*1+i] + temp[4*3+i];
1473 block[stride*0 +offset]= (z0 + z3)>>1;
1474 block[stride*2 +offset]= (z1 + z2)>>1;
1475 block[stride*8 +offset]= (z1 - z2)>>1;
1476 block[stride*10+offset]= (z0 - z3)>>1;
1484 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1485 const int stride= 16*2;
1486 const int xStride= 16;
1489 a= block[stride*0 + xStride*0];
1490 b= block[stride*0 + xStride*1];
1491 c= block[stride*1 + xStride*0];
1492 d= block[stride*1 + xStride*1];
1499 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1500 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1501 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1502 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1506 static void chroma_dc_dct_c(DCTELEM *block){
1507 const int stride= 16*2;
1508 const int xStride= 16;
1511 a= block[stride*0 + xStride*0];
1512 b= block[stride*0 + xStride*1];
1513 c= block[stride*1 + xStride*0];
1514 d= block[stride*1 + xStride*1];
1521 block[stride*0 + xStride*0]= (a+c);
1522 block[stride*0 + xStride*1]= (e+b);
1523 block[stride*1 + xStride*0]= (a-c);
1524 block[stride*1 + xStride*1]= (e-b);
1529 * gets the chroma qp.
1531 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1532 return h->pps.chroma_qp_table[t][qscale];
1535 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1536 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1537 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1539 const int * const quant_table= quant_coeff[qscale];
1540 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1541 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1542 const unsigned int threshold2= (threshold1<<1);
1548 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1549 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1550 const unsigned int dc_threshold2= (dc_threshold1<<1);
1552 int level= block[0]*quant_coeff[qscale+18][0];
1553 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1555 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1558 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1561 // last_non_zero = i;
1566 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1567 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1568 const unsigned int dc_threshold2= (dc_threshold1<<1);
1570 int level= block[0]*quant_table[0];
1571 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1573 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1576 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1579 // last_non_zero = i;
1592 const int j= scantable[i];
1593 int level= block[j]*quant_table[j];
1595 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1596 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1597 if(((unsigned)(level+threshold1))>threshold2){
1599 level= (bias + level)>>QUANT_SHIFT;
1602 level= (bias - level)>>QUANT_SHIFT;
1611 return last_non_zero;
1614 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1615 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1616 int src_x_offset, int src_y_offset,
1617 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1618 MpegEncContext * const s = &h->s;
1619 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1620 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1621 const int luma_xy= (mx&3) + ((my&3)<<2);
1622 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1623 uint8_t * src_cb, * src_cr;
1624 int extra_width= h->emu_edge_width;
1625 int extra_height= h->emu_edge_height;
1627 const int full_mx= mx>>2;
1628 const int full_my= my>>2;
1629 const int pic_width = 16*s->mb_width;
1630 const int pic_height = 16*s->mb_height >> MB_FIELD;
1632 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1635 if(mx&7) extra_width -= 3;
1636 if(my&7) extra_height -= 3;
1638 if( full_mx < 0-extra_width
1639 || full_my < 0-extra_height
1640 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1641 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1642 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1643 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1647 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1649 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1652 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1655 // chroma offset when predicting from a field of opposite parity
1656 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1657 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1659 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1660 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1663 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1664 src_cb= s->edge_emu_buffer;
1666 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1669 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1670 src_cr= s->edge_emu_buffer;
1672 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1675 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1676 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1677 int x_offset, int y_offset,
1678 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1679 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1680 int list0, int list1){
1681 MpegEncContext * const s = &h->s;
1682 qpel_mc_func *qpix_op= qpix_put;
1683 h264_chroma_mc_func chroma_op= chroma_put;
1685 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1686 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1687 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1688 x_offset += 8*s->mb_x;
1689 y_offset += 8*(s->mb_y >> MB_FIELD);
1692 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1693 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1694 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1695 qpix_op, chroma_op);
1698 chroma_op= chroma_avg;
1702 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1703 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1704 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1705 qpix_op, chroma_op);
1709 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1710 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1711 int x_offset, int y_offset,
1712 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1713 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1714 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1715 int list0, int list1){
1716 MpegEncContext * const s = &h->s;
1718 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1719 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1720 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1721 x_offset += 8*s->mb_x;
1722 y_offset += 8*(s->mb_y >> MB_FIELD);
1725 /* don't optimize for luma-only case, since B-frames usually
1726 * use implicit weights => chroma too. */
1727 uint8_t *tmp_cb = s->obmc_scratchpad;
1728 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1729 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1730 int refn0 = h->ref_cache[0][ scan8[n] ];
1731 int refn1 = h->ref_cache[1][ scan8[n] ];
1733 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1734 dest_y, dest_cb, dest_cr,
1735 x_offset, y_offset, qpix_put, chroma_put);
1736 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1737 tmp_y, tmp_cb, tmp_cr,
1738 x_offset, y_offset, qpix_put, chroma_put);
1740 if(h->use_weight == 2){
1741 int weight0 = h->implicit_weight[refn0][refn1];
1742 int weight1 = 64 - weight0;
1743 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1744 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1745 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1747 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1748 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1749 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1750 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1751 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1752 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1753 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1755 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1758 int list = list1 ? 1 : 0;
1759 int refn = h->ref_cache[list][ scan8[n] ];
1760 Picture *ref= &h->ref_list[list][refn];
1761 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1762 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1763 qpix_put, chroma_put);
1765 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1766 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1767 if(h->use_weight_chroma){
1768 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1769 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1770 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1771 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1776 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1777 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1778 int x_offset, int y_offset,
1779 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1780 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1781 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1782 int list0, int list1){
1783 if((h->use_weight==2 && list0 && list1
1784 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1785 || h->use_weight==1)
1786 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1787 x_offset, y_offset, qpix_put, chroma_put,
1788 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1790 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1791 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1794 static inline void prefetch_motion(H264Context *h, int list){
1795 /* fetch pixels for estimated mv 4 macroblocks ahead
1796 * optimized for 64byte cache lines */
1797 MpegEncContext * const s = &h->s;
1798 const int refn = h->ref_cache[list][scan8[0]];
1800 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1801 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1802 uint8_t **src= h->ref_list[list][refn].data;
1803 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1804 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1805 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1806 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1810 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1811 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1812 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1813 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1814 MpegEncContext * const s = &h->s;
1815 const int mb_xy= h->mb_xy;
1816 const int mb_type= s->current_picture.mb_type[mb_xy];
1818 assert(IS_INTER(mb_type));
1820 prefetch_motion(h, 0);
1822 if(IS_16X16(mb_type)){
1823 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1824 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1825 &weight_op[0], &weight_avg[0],
1826 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1827 }else if(IS_16X8(mb_type)){
1828 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1829 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1830 &weight_op[1], &weight_avg[1],
1831 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1832 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1833 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1834 &weight_op[1], &weight_avg[1],
1835 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1836 }else if(IS_8X16(mb_type)){
1837 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1838 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1839 &weight_op[2], &weight_avg[2],
1840 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1841 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1842 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1843 &weight_op[2], &weight_avg[2],
1844 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1848 assert(IS_8X8(mb_type));
1851 const int sub_mb_type= h->sub_mb_type[i];
1853 int x_offset= (i&1)<<2;
1854 int y_offset= (i&2)<<1;
1856 if(IS_SUB_8X8(sub_mb_type)){
1857 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1858 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1859 &weight_op[3], &weight_avg[3],
1860 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1861 }else if(IS_SUB_8X4(sub_mb_type)){
1862 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1863 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1864 &weight_op[4], &weight_avg[4],
1865 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1866 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1867 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1868 &weight_op[4], &weight_avg[4],
1869 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1870 }else if(IS_SUB_4X8(sub_mb_type)){
1871 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1872 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1873 &weight_op[5], &weight_avg[5],
1874 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1875 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1876 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1877 &weight_op[5], &weight_avg[5],
1878 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1881 assert(IS_SUB_4X4(sub_mb_type));
1883 int sub_x_offset= x_offset + 2*(j&1);
1884 int sub_y_offset= y_offset + (j&2);
1885 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1886 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1887 &weight_op[6], &weight_avg[6],
1888 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1894 prefetch_motion(h, 1);
1897 static av_cold void decode_init_vlc(void){
1898 static int done = 0;
1905 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1906 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1907 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1908 &chroma_dc_coeff_token_len [0], 1, 1,
1909 &chroma_dc_coeff_token_bits[0], 1, 1,
1910 INIT_VLC_USE_NEW_STATIC);
1914 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1915 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1916 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1917 &coeff_token_len [i][0], 1, 1,
1918 &coeff_token_bits[i][0], 1, 1,
1919 INIT_VLC_USE_NEW_STATIC);
1920 offset += coeff_token_vlc_tables_size[i];
1923 * This is a one time safety check to make sure that
1924 * the packed static coeff_token_vlc table sizes
1925 * were initialized correctly.
1927 assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
1930 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1931 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1932 init_vlc(&chroma_dc_total_zeros_vlc[i],
1933 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1934 &chroma_dc_total_zeros_len [i][0], 1, 1,
1935 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1936 INIT_VLC_USE_NEW_STATIC);
1938 for(i=0; i<15; i++){
1939 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1940 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1941 init_vlc(&total_zeros_vlc[i],
1942 TOTAL_ZEROS_VLC_BITS, 16,
1943 &total_zeros_len [i][0], 1, 1,
1944 &total_zeros_bits[i][0], 1, 1,
1945 INIT_VLC_USE_NEW_STATIC);
1949 run_vlc[i].table = run_vlc_tables[i];
1950 run_vlc[i].table_allocated = run_vlc_tables_size;
1951 init_vlc(&run_vlc[i],
1953 &run_len [i][0], 1, 1,
1954 &run_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1957 run7_vlc.table = run7_vlc_table,
1958 run7_vlc.table_allocated = run7_vlc_table_size;
1959 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1960 &run_len [6][0], 1, 1,
1961 &run_bits[6][0], 1, 1,
1962 INIT_VLC_USE_NEW_STATIC);
1966 static void free_tables(H264Context *h){
1969 av_freep(&h->intra4x4_pred_mode);
1970 av_freep(&h->chroma_pred_mode_table);
1971 av_freep(&h->cbp_table);
1972 av_freep(&h->mvd_table[0]);
1973 av_freep(&h->mvd_table[1]);
1974 av_freep(&h->direct_table);
1975 av_freep(&h->non_zero_count);
1976 av_freep(&h->slice_table_base);
1977 h->slice_table= NULL;
1979 av_freep(&h->mb2b_xy);
1980 av_freep(&h->mb2b8_xy);
1982 for(i = 0; i < MAX_SPS_COUNT; i++)
1983 av_freep(h->sps_buffers + i);
1985 for(i = 0; i < MAX_PPS_COUNT; i++)
1986 av_freep(h->pps_buffers + i);
1988 for(i = 0; i < h->s.avctx->thread_count; i++) {
1989 hx = h->thread_context[i];
1991 av_freep(&hx->top_borders[1]);
1992 av_freep(&hx->top_borders[0]);
1993 av_freep(&hx->s.obmc_scratchpad);
1997 static void init_dequant8_coeff_table(H264Context *h){
1999 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2000 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2001 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2003 for(i=0; i<2; i++ ){
2004 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2005 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2009 for(q=0; q<52; q++){
2010 int shift = ff_div6[q];
2011 int idx = ff_rem6[q];
2013 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2014 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2015 h->pps.scaling_matrix8[i][x]) << shift;
2020 static void init_dequant4_coeff_table(H264Context *h){
2022 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2023 for(i=0; i<6; i++ ){
2024 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2026 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2027 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2034 for(q=0; q<52; q++){
2035 int shift = ff_div6[q] + 2;
2036 int idx = ff_rem6[q];
2038 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2039 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2040 h->pps.scaling_matrix4[i][x]) << shift;
2045 static void init_dequant_tables(H264Context *h){
2047 init_dequant4_coeff_table(h);
2048 if(h->pps.transform_8x8_mode)
2049 init_dequant8_coeff_table(h);
2050 if(h->sps.transform_bypass){
2053 h->dequant4_coeff[i][0][x] = 1<<6;
2054 if(h->pps.transform_8x8_mode)
2057 h->dequant8_coeff[i][0][x] = 1<<6;
2064 * needs width/height
2066 static int alloc_tables(H264Context *h){
2067 MpegEncContext * const s = &h->s;
2068 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2071 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2073 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2074 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2075 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2077 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2078 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2079 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2080 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2082 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2083 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2085 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2086 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2087 for(y=0; y<s->mb_height; y++){
2088 for(x=0; x<s->mb_width; x++){
2089 const int mb_xy= x + y*s->mb_stride;
2090 const int b_xy = 4*x + 4*y*h->b_stride;
2091 const int b8_xy= 2*x + 2*y*h->b8_stride;
2093 h->mb2b_xy [mb_xy]= b_xy;
2094 h->mb2b8_xy[mb_xy]= b8_xy;
2098 s->obmc_scratchpad = NULL;
2100 if(!h->dequant4_coeff[0])
2101 init_dequant_tables(h);
2110 * Mimic alloc_tables(), but for every context thread.
2112 static void clone_tables(H264Context *dst, H264Context *src){
2113 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2114 dst->non_zero_count = src->non_zero_count;
2115 dst->slice_table = src->slice_table;
2116 dst->cbp_table = src->cbp_table;
2117 dst->mb2b_xy = src->mb2b_xy;
2118 dst->mb2b8_xy = src->mb2b8_xy;
2119 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2120 dst->mvd_table[0] = src->mvd_table[0];
2121 dst->mvd_table[1] = src->mvd_table[1];
2122 dst->direct_table = src->direct_table;
2124 dst->s.obmc_scratchpad = NULL;
2125 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2130 * Allocate buffers which are not shared amongst multiple threads.
2132 static int context_init(H264Context *h){
2133 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2134 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2138 return -1; // free_tables will clean up for us
2141 static av_cold void common_init(H264Context *h){
2142 MpegEncContext * const s = &h->s;
2144 s->width = s->avctx->width;
2145 s->height = s->avctx->height;
2146 s->codec_id= s->avctx->codec->id;
2148 ff_h264_pred_init(&h->hpc, s->codec_id);
2150 h->dequant_coeff_pps= -1;
2151 s->unrestricted_mv=1;
2152 s->decode=1; //FIXME
2154 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2155 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2158 static av_cold int decode_init(AVCodecContext *avctx){
2159 H264Context *h= avctx->priv_data;
2160 MpegEncContext * const s = &h->s;
2162 MPV_decode_defaults(s);
2167 s->out_format = FMT_H264;
2168 s->workaround_bugs= avctx->workaround_bugs;
2171 // s->decode_mb= ff_h263_decode_mb;
2172 s->quarter_sample = 1;
2175 if(avctx->codec_id == CODEC_ID_SVQ3)
2176 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2178 avctx->pix_fmt= PIX_FMT_YUV420P;
2182 if(avctx->extradata_size > 0 && avctx->extradata &&
2183 *(char *)avctx->extradata == 1){
2190 h->thread_context[0] = h;
2191 h->outputed_poc = INT_MIN;
2195 static int frame_start(H264Context *h){
2196 MpegEncContext * const s = &h->s;
2199 if(MPV_frame_start(s, s->avctx) < 0)
2201 ff_er_frame_start(s);
2203 * MPV_frame_start uses pict_type to derive key_frame.
2204 * This is incorrect for H.264; IDR markings must be used.
2205 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2206 * See decode_nal_units().
2208 s->current_picture_ptr->key_frame= 0;
2210 assert(s->linesize && s->uvlinesize);
2212 for(i=0; i<16; i++){
2213 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2214 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2217 h->block_offset[16+i]=
2218 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2219 h->block_offset[24+16+i]=
2220 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2223 /* can't be in alloc_tables because linesize isn't known there.
2224 * FIXME: redo bipred weight to not require extra buffer? */
2225 for(i = 0; i < s->avctx->thread_count; i++)
2226 if(!h->thread_context[i]->s.obmc_scratchpad)
2227 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2229 /* some macroblocks will be accessed before they're available */
2230 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2231 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2233 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2235 // We mark the current picture as non-reference after allocating it, so
2236 // that if we break out due to an error it can be released automatically
2237 // in the next MPV_frame_start().
2238 // SVQ3 as well as most other codecs have only last/next/current and thus
2239 // get released even with set reference, besides SVQ3 and others do not
2240 // mark frames as reference later "naturally".
2241 if(s->codec_id != CODEC_ID_SVQ3)
2242 s->current_picture_ptr->reference= 0;
2244 s->current_picture_ptr->field_poc[0]=
2245 s->current_picture_ptr->field_poc[1]= INT_MAX;
2246 assert(s->current_picture_ptr->long_ref==0);
2251 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2252 MpegEncContext * const s = &h->s;
2261 src_cb -= uvlinesize;
2262 src_cr -= uvlinesize;
2264 if(!simple && FRAME_MBAFF){
2266 offset = MB_MBAFF ? 1 : 17;
2267 uvoffset= MB_MBAFF ? 1 : 9;
2269 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2270 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2271 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2272 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2273 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2278 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2279 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2280 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2281 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2287 top_idx = MB_MBAFF ? 0 : 1;
2289 step= MB_MBAFF ? 2 : 1;
2292 // There are two lines saved, the line above the the top macroblock of a pair,
2293 // and the line above the bottom macroblock
2294 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2295 for(i=1; i<17 - skiplast; i++){
2296 h->left_border[offset+i*step]= src_y[15+i* linesize];
2299 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2300 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2302 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2303 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2304 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2305 for(i=1; i<9 - skiplast; i++){
2306 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2307 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2309 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2310 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2314 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2315 MpegEncContext * const s = &h->s;
2326 if(!simple && FRAME_MBAFF){
2328 offset = MB_MBAFF ? 1 : 17;
2329 uvoffset= MB_MBAFF ? 1 : 9;
2333 top_idx = MB_MBAFF ? 0 : 1;
2335 step= MB_MBAFF ? 2 : 1;
2338 if(h->deblocking_filter == 2) {
2340 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2341 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2343 deblock_left = (s->mb_x > 0);
2344 deblock_top = (s->mb_y > 0);
2347 src_y -= linesize + 1;
2348 src_cb -= uvlinesize + 1;
2349 src_cr -= uvlinesize + 1;
2351 #define XCHG(a,b,t,xchg)\
2358 for(i = !deblock_top; i<16; i++){
2359 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2361 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2365 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2366 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2367 if(s->mb_x+1 < s->mb_width){
2368 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2372 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2374 for(i = !deblock_top; i<8; i++){
2375 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2376 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2378 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2379 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2382 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2383 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2388 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2389 MpegEncContext * const s = &h->s;
2390 const int mb_x= s->mb_x;
2391 const int mb_y= s->mb_y;
2392 const int mb_xy= h->mb_xy;
2393 const int mb_type= s->current_picture.mb_type[mb_xy];
2394 uint8_t *dest_y, *dest_cb, *dest_cr;
2395 int linesize, uvlinesize /*dct_offset*/;
2397 int *block_offset = &h->block_offset[0];
2398 const unsigned int bottom = mb_y & 1;
2399 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2400 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2401 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2403 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2404 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2405 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2407 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2408 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2410 if (!simple && MB_FIELD) {
2411 linesize = h->mb_linesize = s->linesize * 2;
2412 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2413 block_offset = &h->block_offset[24];
2414 if(mb_y&1){ //FIXME move out of this function?
2415 dest_y -= s->linesize*15;
2416 dest_cb-= s->uvlinesize*7;
2417 dest_cr-= s->uvlinesize*7;
2421 for(list=0; list<h->list_count; list++){
2422 if(!USES_LIST(mb_type, list))
2424 if(IS_16X16(mb_type)){
2425 int8_t *ref = &h->ref_cache[list][scan8[0]];
2426 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2428 for(i=0; i<16; i+=4){
2429 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2430 int ref = h->ref_cache[list][scan8[i]];
2432 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2438 linesize = h->mb_linesize = s->linesize;
2439 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2440 // dct_offset = s->linesize * 16;
2443 if(transform_bypass){
2445 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2446 }else if(IS_8x8DCT(mb_type)){
2447 idct_dc_add = s->dsp.h264_idct8_dc_add;
2448 idct_add = s->dsp.h264_idct8_add;
2450 idct_dc_add = s->dsp.h264_idct_dc_add;
2451 idct_add = s->dsp.h264_idct_add;
2454 if (!simple && IS_INTRA_PCM(mb_type)) {
2455 for (i=0; i<16; i++) {
2456 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2458 for (i=0; i<8; i++) {
2459 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2460 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2463 if(IS_INTRA(mb_type)){
2464 if(h->deblocking_filter)
2465 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2467 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2468 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2469 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2472 if(IS_INTRA4x4(mb_type)){
2473 if(simple || !s->encoding){
2474 if(IS_8x8DCT(mb_type)){
2475 for(i=0; i<16; i+=4){
2476 uint8_t * const ptr= dest_y + block_offset[i];
2477 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2478 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2479 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2480 (h->topright_samples_available<<i)&0x4000, linesize);
2482 if(nnz == 1 && h->mb[i*16])
2483 idct_dc_add(ptr, h->mb + i*16, linesize);
2485 idct_add(ptr, h->mb + i*16, linesize);
2489 for(i=0; i<16; i++){
2490 uint8_t * const ptr= dest_y + block_offset[i];
2492 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2495 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2496 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2497 assert(mb_y || linesize <= block_offset[i]);
2498 if(!topright_avail){
2499 tr= ptr[3 - linesize]*0x01010101;
2500 topright= (uint8_t*) &tr;
2502 topright= ptr + 4 - linesize;
2506 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2507 nnz = h->non_zero_count_cache[ scan8[i] ];
2510 if(nnz == 1 && h->mb[i*16])
2511 idct_dc_add(ptr, h->mb + i*16, linesize);
2513 idct_add(ptr, h->mb + i*16, linesize);
2515 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2520 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2522 if(!transform_bypass)
2523 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2525 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2527 if(h->deblocking_filter)
2528 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2530 hl_motion(h, dest_y, dest_cb, dest_cr,
2531 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2532 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2533 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2537 if(!IS_INTRA4x4(mb_type)){
2539 if(IS_INTRA16x16(mb_type)){
2540 for(i=0; i<16; i++){
2541 if(h->non_zero_count_cache[ scan8[i] ])
2542 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2543 else if(h->mb[i*16])
2544 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2547 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2548 for(i=0; i<16; i+=di){
2549 int nnz = h->non_zero_count_cache[ scan8[i] ];
2551 if(nnz==1 && h->mb[i*16])
2552 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2554 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2559 for(i=0; i<16; i++){
2560 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2561 uint8_t * const ptr= dest_y + block_offset[i];
2562 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2568 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2569 uint8_t *dest[2] = {dest_cb, dest_cr};
2570 if(transform_bypass){
2571 idct_add = idct_dc_add = s->dsp.add_pixels4;
2573 idct_add = s->dsp.h264_idct_add;
2574 idct_dc_add = s->dsp.h264_idct_dc_add;
2575 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2576 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2579 for(i=16; i<16+8; i++){
2580 if(h->non_zero_count_cache[ scan8[i] ])
2581 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2582 else if(h->mb[i*16])
2583 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2586 for(i=16; i<16+8; i++){
2587 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2588 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2589 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2595 if(h->deblocking_filter) {
2596 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2597 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2598 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2599 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2600 if (!simple && FRAME_MBAFF) {
2601 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2603 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2609 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2611 static void hl_decode_mb_simple(H264Context *h){
2612 hl_decode_mb_internal(h, 1);
2616 * Process a macroblock; this handles edge cases, such as interlacing.
2618 static void av_noinline hl_decode_mb_complex(H264Context *h){
2619 hl_decode_mb_internal(h, 0);
2622 static void hl_decode_mb(H264Context *h){
2623 MpegEncContext * const s = &h->s;
2624 const int mb_xy= h->mb_xy;
2625 const int mb_type= s->current_picture.mb_type[mb_xy];
2626 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2627 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2629 if(ENABLE_H264_ENCODER && !s->decode)
2633 hl_decode_mb_complex(h);
2634 else hl_decode_mb_simple(h);
2637 static void pic_as_field(Picture *pic, const int parity){
2639 for (i = 0; i < 4; ++i) {
2640 if (parity == PICT_BOTTOM_FIELD)
2641 pic->data[i] += pic->linesize[i];
2642 pic->reference = parity;
2643 pic->linesize[i] *= 2;
2645 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2648 static int split_field_copy(Picture *dest, Picture *src,
2649 int parity, int id_add){
2650 int match = !!(src->reference & parity);
2654 if(parity != PICT_FRAME){
2655 pic_as_field(dest, parity);
2657 dest->pic_id += id_add;
2664 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2668 while(i[0]<len || i[1]<len){
2669 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2671 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2674 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2675 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2678 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2679 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2686 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2691 best_poc= dir ? INT_MIN : INT_MAX;
2693 for(i=0; i<len; i++){
2694 const int poc= src[i]->poc;
2695 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2697 sorted[out_i]= src[i];
2700 if(best_poc == (dir ? INT_MIN : INT_MAX))
2702 limit= sorted[out_i++]->poc - dir;
2708 * fills the default_ref_list.
2710 static int fill_default_ref_list(H264Context *h){
2711 MpegEncContext * const s = &h->s;
2714 if(h->slice_type_nos==FF_B_TYPE){
2715 Picture *sorted[32];
2720 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2722 cur_poc= s->current_picture_ptr->poc;
2724 for(list= 0; list<2; list++){
2725 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2726 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2728 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2729 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2732 if(len < h->ref_count[list])
2733 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2737 if(lens[0] == lens[1] && lens[1] > 1){
2738 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2740 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2743 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2744 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2746 if(len < h->ref_count[0])
2747 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2750 for (i=0; i<h->ref_count[0]; i++) {
2751 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2753 if(h->slice_type_nos==FF_B_TYPE){
2754 for (i=0; i<h->ref_count[1]; i++) {
2755 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2762 static void print_short_term(H264Context *h);
2763 static void print_long_term(H264Context *h);
2766 * Extract structure information about the picture described by pic_num in
2767 * the current decoding context (frame or field). Note that pic_num is
2768 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2769 * @param pic_num picture number for which to extract structure information
2770 * @param structure one of PICT_XXX describing structure of picture
2772 * @return frame number (short term) or long term index of picture
2773 * described by pic_num
2775 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2776 MpegEncContext * const s = &h->s;
2778 *structure = s->picture_structure;
2781 /* opposite field */
2782 *structure ^= PICT_FRAME;
2789 static int decode_ref_pic_list_reordering(H264Context *h){
2790 MpegEncContext * const s = &h->s;
2791 int list, index, pic_structure;
2793 print_short_term(h);
2796 for(list=0; list<h->list_count; list++){
2797 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2799 if(get_bits1(&s->gb)){
2800 int pred= h->curr_pic_num;
2802 for(index=0; ; index++){
2803 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2804 unsigned int pic_id;
2806 Picture *ref = NULL;
2808 if(reordering_of_pic_nums_idc==3)
2811 if(index >= h->ref_count[list]){
2812 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2816 if(reordering_of_pic_nums_idc<3){
2817 if(reordering_of_pic_nums_idc<2){
2818 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2821 if(abs_diff_pic_num > h->max_pic_num){
2822 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2826 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2827 else pred+= abs_diff_pic_num;
2828 pred &= h->max_pic_num - 1;
2830 frame_num = pic_num_extract(h, pred, &pic_structure);
2832 for(i= h->short_ref_count-1; i>=0; i--){
2833 ref = h->short_ref[i];
2834 assert(ref->reference);
2835 assert(!ref->long_ref);
2837 ref->frame_num == frame_num &&
2838 (ref->reference & pic_structure)
2846 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2848 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2851 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2854 ref = h->long_ref[long_idx];
2855 assert(!(ref && !ref->reference));
2856 if(ref && (ref->reference & pic_structure)){
2857 ref->pic_id= pic_id;
2858 assert(ref->long_ref);
2866 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2867 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2869 for(i=index; i+1<h->ref_count[list]; i++){
2870 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2873 for(; i > index; i--){
2874 h->ref_list[list][i]= h->ref_list[list][i-1];
2876 h->ref_list[list][index]= *ref;
2878 pic_as_field(&h->ref_list[list][index], pic_structure);
2882 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2888 for(list=0; list<h->list_count; list++){
2889 for(index= 0; index < h->ref_count[list]; index++){
2890 if(!h->ref_list[list][index].data[0]){
2891 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2892 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2897 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
2898 direct_dist_scale_factor(h);
2899 direct_ref_list_init(h);
2903 static void fill_mbaff_ref_list(H264Context *h){
2905 for(list=0; list<2; list++){ //FIXME try list_count
2906 for(i=0; i<h->ref_count[list]; i++){
2907 Picture *frame = &h->ref_list[list][i];
2908 Picture *field = &h->ref_list[list][16+2*i];
2911 field[0].linesize[j] <<= 1;
2912 field[0].reference = PICT_TOP_FIELD;
2913 field[1] = field[0];
2915 field[1].data[j] += frame->linesize[j];
2916 field[1].reference = PICT_BOTTOM_FIELD;
2918 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2919 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2921 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2922 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2926 for(j=0; j<h->ref_count[1]; j++){
2927 for(i=0; i<h->ref_count[0]; i++)
2928 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2929 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2930 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2934 static int pred_weight_table(H264Context *h){
2935 MpegEncContext * const s = &h->s;
2937 int luma_def, chroma_def;
2940 h->use_weight_chroma= 0;
2941 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2942 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2943 luma_def = 1<<h->luma_log2_weight_denom;
2944 chroma_def = 1<<h->chroma_log2_weight_denom;
2946 for(list=0; list<2; list++){
2947 for(i=0; i<h->ref_count[list]; i++){
2948 int luma_weight_flag, chroma_weight_flag;
2950 luma_weight_flag= get_bits1(&s->gb);
2951 if(luma_weight_flag){
2952 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2953 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2954 if( h->luma_weight[list][i] != luma_def
2955 || h->luma_offset[list][i] != 0)
2958 h->luma_weight[list][i]= luma_def;
2959 h->luma_offset[list][i]= 0;
2963 chroma_weight_flag= get_bits1(&s->gb);
2964 if(chroma_weight_flag){
2967 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2968 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2969 if( h->chroma_weight[list][i][j] != chroma_def
2970 || h->chroma_offset[list][i][j] != 0)
2971 h->use_weight_chroma= 1;
2976 h->chroma_weight[list][i][j]= chroma_def;
2977 h->chroma_offset[list][i][j]= 0;
2982 if(h->slice_type_nos != FF_B_TYPE) break;
2984 h->use_weight= h->use_weight || h->use_weight_chroma;
2988 static void implicit_weight_table(H264Context *h){
2989 MpegEncContext * const s = &h->s;
2991 int cur_poc = s->current_picture_ptr->poc;
2993 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2994 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2996 h->use_weight_chroma= 0;
3001 h->use_weight_chroma= 2;
3002 h->luma_log2_weight_denom= 5;
3003 h->chroma_log2_weight_denom= 5;
3005 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3006 int poc0 = h->ref_list[0][ref0].poc;
3007 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3008 int poc1 = h->ref_list[1][ref1].poc;
3009 int td = av_clip(poc1 - poc0, -128, 127);
3011 int tb = av_clip(cur_poc - poc0, -128, 127);
3012 int tx = (16384 + (FFABS(td) >> 1)) / td;
3013 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3014 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3015 h->implicit_weight[ref0][ref1] = 32;
3017 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3019 h->implicit_weight[ref0][ref1] = 32;
3025 * Mark a picture as no longer needed for reference. The refmask
3026 * argument allows unreferencing of individual fields or the whole frame.
3027 * If the picture becomes entirely unreferenced, but is being held for
3028 * display purposes, it is marked as such.
3029 * @param refmask mask of fields to unreference; the mask is bitwise
3030 * anded with the reference marking of pic
3031 * @return non-zero if pic becomes entirely unreferenced (except possibly
3032 * for display purposes) zero if one of the fields remains in
3035 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3037 if (pic->reference &= refmask) {
3040 for(i = 0; h->delayed_pic[i]; i++)
3041 if(pic == h->delayed_pic[i]){
3042 pic->reference=DELAYED_PIC_REF;
3050 * instantaneous decoder refresh.
3052 static void idr(H264Context *h){
3055 for(i=0; i<16; i++){
3056 remove_long(h, i, 0);
3058 assert(h->long_ref_count==0);
3060 for(i=0; i<h->short_ref_count; i++){
3061 unreference_pic(h, h->short_ref[i], 0);
3062 h->short_ref[i]= NULL;
3064 h->short_ref_count=0;
3065 h->prev_frame_num= 0;
3066 h->prev_frame_num_offset= 0;
3071 /* forget old pics after a seek */
3072 static void flush_dpb(AVCodecContext *avctx){
3073 H264Context *h= avctx->priv_data;
3075 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3076 if(h->delayed_pic[i])
3077 h->delayed_pic[i]->reference= 0;
3078 h->delayed_pic[i]= NULL;
3080 h->outputed_poc= INT_MIN;
3082 if(h->s.current_picture_ptr)
3083 h->s.current_picture_ptr->reference= 0;
3084 h->s.first_field= 0;
3085 ff_mpeg_flush(avctx);
3089 * Find a Picture in the short term reference list by frame number.
3090 * @param frame_num frame number to search for
3091 * @param idx the index into h->short_ref where returned picture is found
3092 * undefined if no picture found.
3093 * @return pointer to the found picture, or NULL if no pic with the provided
3094 * frame number is found
3096 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3097 MpegEncContext * const s = &h->s;
3100 for(i=0; i<h->short_ref_count; i++){
3101 Picture *pic= h->short_ref[i];
3102 if(s->avctx->debug&FF_DEBUG_MMCO)
3103 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3104 if(pic->frame_num == frame_num) {
3113 * Remove a picture from the short term reference list by its index in
3114 * that list. This does no checking on the provided index; it is assumed
3115 * to be valid. Other list entries are shifted down.
3116 * @param i index into h->short_ref of picture to remove.
3118 static void remove_short_at_index(H264Context *h, int i){
3119 assert(i >= 0 && i < h->short_ref_count);
3120 h->short_ref[i]= NULL;
3121 if (--h->short_ref_count)
3122 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3127 * @return the removed picture or NULL if an error occurs
3129 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3130 MpegEncContext * const s = &h->s;
3134 if(s->avctx->debug&FF_DEBUG_MMCO)
3135 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3137 pic = find_short(h, frame_num, &i);
3139 if(unreference_pic(h, pic, ref_mask))
3140 remove_short_at_index(h, i);
3147 * Remove a picture from the long term reference list by its index in
3149 * @return the removed picture or NULL if an error occurs
3151 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3154 pic= h->long_ref[i];
3156 if(unreference_pic(h, pic, ref_mask)){
3157 assert(h->long_ref[i]->long_ref == 1);
3158 h->long_ref[i]->long_ref= 0;
3159 h->long_ref[i]= NULL;
3160 h->long_ref_count--;
3168 * print short term list
3170 static void print_short_term(H264Context *h) {
3172 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3173 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3174 for(i=0; i<h->short_ref_count; i++){
3175 Picture *pic= h->short_ref[i];
3176 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3182 * print long term list
3184 static void print_long_term(H264Context *h) {
3186 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3187 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3188 for(i = 0; i < 16; i++){
3189 Picture *pic= h->long_ref[i];
3191 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3198 * Executes the reference picture marking (memory management control operations).
3200 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3201 MpegEncContext * const s = &h->s;
3203 int current_ref_assigned=0;
3206 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3207 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3209 for(i=0; i<mmco_count; i++){
3210 int structure, frame_num;
3211 if(s->avctx->debug&FF_DEBUG_MMCO)
3212 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3214 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3215 || mmco[i].opcode == MMCO_SHORT2LONG){
3216 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3217 pic = find_short(h, frame_num, &j);
3219 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3220 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3221 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3226 switch(mmco[i].opcode){
3227 case MMCO_SHORT2UNUSED:
3228 if(s->avctx->debug&FF_DEBUG_MMCO)
3229 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3230 remove_short(h, frame_num, structure ^ PICT_FRAME);
3232 case MMCO_SHORT2LONG:
3233 if (h->long_ref[mmco[i].long_arg] != pic)
3234 remove_long(h, mmco[i].long_arg, 0);
3236 remove_short_at_index(h, j);
3237 h->long_ref[ mmco[i].long_arg ]= pic;
3238 if (h->long_ref[ mmco[i].long_arg ]){
3239 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3240 h->long_ref_count++;
3243 case MMCO_LONG2UNUSED:
3244 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3245 pic = h->long_ref[j];
3247 remove_long(h, j, structure ^ PICT_FRAME);
3248 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3249 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3252 // Comment below left from previous code as it is an interresting note.
3253 /* First field in pair is in short term list or
3254 * at a different long term index.
3255 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3256 * Report the problem and keep the pair where it is,
3257 * and mark this field valid.
3260 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3261 remove_long(h, mmco[i].long_arg, 0);
3263 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3264 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3265 h->long_ref_count++;
3268 s->current_picture_ptr->reference |= s->picture_structure;
3269 current_ref_assigned=1;
3271 case MMCO_SET_MAX_LONG:
3272 assert(mmco[i].long_arg <= 16);
3273 // just remove the long term which index is greater than new max
3274 for(j = mmco[i].long_arg; j<16; j++){
3275 remove_long(h, j, 0);
3279 while(h->short_ref_count){
3280 remove_short(h, h->short_ref[0]->frame_num, 0);
3282 for(j = 0; j < 16; j++) {
3283 remove_long(h, j, 0);
3285 s->current_picture_ptr->poc=
3286 s->current_picture_ptr->field_poc[0]=
3287 s->current_picture_ptr->field_poc[1]=
3291 s->current_picture_ptr->frame_num= 0;
3297 if (!current_ref_assigned) {
3298 /* Second field of complementary field pair; the first field of
3299 * which is already referenced. If short referenced, it
3300 * should be first entry in short_ref. If not, it must exist
3301 * in long_ref; trying to put it on the short list here is an
3302 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3304 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3305 /* Just mark the second field valid */
3306 s->current_picture_ptr->reference = PICT_FRAME;
3307 } else if (s->current_picture_ptr->long_ref) {
3308 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3309 "assignment for second field "
3310 "in complementary field pair "
3311 "(first field is long term)\n");
3313 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3315 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3318 if(h->short_ref_count)
3319 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3321 h->short_ref[0]= s->current_picture_ptr;
3322 h->short_ref_count++;
3323 s->current_picture_ptr->reference |= s->picture_structure;
3327 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3329 /* We have too many reference frames, probably due to corrupted
3330 * stream. Need to discard one frame. Prevents overrun of the
3331 * short_ref and long_ref buffers.
3333 av_log(h->s.avctx, AV_LOG_ERROR,
3334 "number of reference frames exceeds max (probably "
3335 "corrupt input), discarding one\n");
3337 if (h->long_ref_count && !h->short_ref_count) {
3338 for (i = 0; i < 16; ++i)
3343 remove_long(h, i, 0);
3345 pic = h->short_ref[h->short_ref_count - 1];
3346 remove_short(h, pic->frame_num, 0);
3350 print_short_term(h);
3355 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3356 MpegEncContext * const s = &h->s;
3360 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3361 s->broken_link= get_bits1(gb) -1;
3363 h->mmco[0].opcode= MMCO_LONG;
3364 h->mmco[0].long_arg= 0;
3368 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3369 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3370 MMCOOpcode opcode= get_ue_golomb(gb);
3372 h->mmco[i].opcode= opcode;
3373 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3374 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3375 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3376 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3380 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3381 unsigned int long_arg= get_ue_golomb(gb);
3382 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3383 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3386 h->mmco[i].long_arg= long_arg;
3389 if(opcode > (unsigned)MMCO_LONG){
3390 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3393 if(opcode == MMCO_END)
3398 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3400 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3401 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3402 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3403 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3405 if (FIELD_PICTURE) {
3406 h->mmco[0].short_pic_num *= 2;
3407 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3408 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3418 static int init_poc(H264Context *h){
3419 MpegEncContext * const s = &h->s;
3420 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3422 Picture *cur = s->current_picture_ptr;
3424 h->frame_num_offset= h->prev_frame_num_offset;
3425 if(h->frame_num < h->prev_frame_num)
3426 h->frame_num_offset += max_frame_num;
3428 if(h->sps.poc_type==0){
3429 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3431 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3432 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3433 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3434 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3436 h->poc_msb = h->prev_poc_msb;
3437 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3439 field_poc[1] = h->poc_msb + h->poc_lsb;
3440 if(s->picture_structure == PICT_FRAME)
3441 field_poc[1] += h->delta_poc_bottom;
3442 }else if(h->sps.poc_type==1){
3443 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3446 if(h->sps.poc_cycle_length != 0)
3447 abs_frame_num = h->frame_num_offset + h->frame_num;
3451 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3454 expected_delta_per_poc_cycle = 0;
3455 for(i=0; i < h->sps.poc_cycle_length; i++)
3456 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3458 if(abs_frame_num > 0){
3459 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3460 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3462 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3463 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3464 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3468 if(h->nal_ref_idc == 0)
3469 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3471 field_poc[0] = expectedpoc + h->delta_poc[0];
3472 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3474 if(s->picture_structure == PICT_FRAME)
3475 field_poc[1] += h->delta_poc[1];
3477 int poc= 2*(h->frame_num_offset + h->frame_num);
3486 if(s->picture_structure != PICT_BOTTOM_FIELD)
3487 s->current_picture_ptr->field_poc[0]= field_poc[0];
3488 if(s->picture_structure != PICT_TOP_FIELD)
3489 s->current_picture_ptr->field_poc[1]= field_poc[1];
3490 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3497 * initialize scan tables
3499 static void init_scan_tables(H264Context *h){
3500 MpegEncContext * const s = &h->s;
3502 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3503 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3504 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3506 for(i=0; i<16; i++){
3507 #define T(x) (x>>2) | ((x<<2) & 0xF)
3508 h->zigzag_scan[i] = T(zigzag_scan[i]);
3509 h-> field_scan[i] = T( field_scan[i]);
3513 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3514 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3515 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3516 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3517 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3519 for(i=0; i<64; i++){
3520 #define T(x) (x>>3) | ((x&7)<<3)
3521 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3522 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3523 h->field_scan8x8[i] = T(field_scan8x8[i]);
3524 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3528 if(h->sps.transform_bypass){ //FIXME same ugly
3529 h->zigzag_scan_q0 = zigzag_scan;
3530 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3531 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3532 h->field_scan_q0 = field_scan;
3533 h->field_scan8x8_q0 = field_scan8x8;
3534 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3536 h->zigzag_scan_q0 = h->zigzag_scan;
3537 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3538 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3539 h->field_scan_q0 = h->field_scan;
3540 h->field_scan8x8_q0 = h->field_scan8x8;
3541 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3546 * Replicates H264 "master" context to thread contexts.
3548 static void clone_slice(H264Context *dst, H264Context *src)
3550 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3551 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3552 dst->s.current_picture = src->s.current_picture;
3553 dst->s.linesize = src->s.linesize;
3554 dst->s.uvlinesize = src->s.uvlinesize;
3555 dst->s.first_field = src->s.first_field;
3557 dst->prev_poc_msb = src->prev_poc_msb;
3558 dst->prev_poc_lsb = src->prev_poc_lsb;
3559 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3560 dst->prev_frame_num = src->prev_frame_num;
3561 dst->short_ref_count = src->short_ref_count;
3563 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3564 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3565 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3566 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3568 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3569 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3573 * decodes a slice header.
3574 * This will also call MPV_common_init() and frame_start() as needed.
3576 * @param h h264context
3577 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3579 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3581 static int decode_slice_header(H264Context *h, H264Context *h0){
3582 MpegEncContext * const s = &h->s;
3583 MpegEncContext * const s0 = &h0->s;
3584 unsigned int first_mb_in_slice;
3585 unsigned int pps_id;
3586 int num_ref_idx_active_override_flag;
3587 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3588 unsigned int slice_type, tmp, i, j;
3589 int default_ref_list_done = 0;
3590 int last_pic_structure;
3592 s->dropable= h->nal_ref_idc == 0;
3594 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3595 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3596 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3598 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3599 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3602 first_mb_in_slice= get_ue_golomb(&s->gb);
3604 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3605 h0->current_slice = 0;
3606 if (!s0->first_field)
3607 s->current_picture_ptr= NULL;
3610 slice_type= get_ue_golomb(&s->gb);
3612 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3617 h->slice_type_fixed=1;
3619 h->slice_type_fixed=0;
3621 slice_type= slice_type_map[ slice_type ];
3622 if (slice_type == FF_I_TYPE
3623 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3624 default_ref_list_done = 1;
3626 h->slice_type= slice_type;
3627 h->slice_type_nos= slice_type & 3;
3629 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3630 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3631 av_log(h->s.avctx, AV_LOG_ERROR,
3632 "B picture before any references, skipping\n");
3636 pps_id= get_ue_golomb(&s->gb);
3637 if(pps_id>=MAX_PPS_COUNT){
3638 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3641 if(!h0->pps_buffers[pps_id]) {
3642 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3645 h->pps= *h0->pps_buffers[pps_id];
3647 if(!h0->sps_buffers[h->pps.sps_id]) {
3648 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3651 h->sps = *h0->sps_buffers[h->pps.sps_id];
3653 if(h == h0 && h->dequant_coeff_pps != pps_id){
3654 h->dequant_coeff_pps = pps_id;
3655 init_dequant_tables(h);
3658 s->mb_width= h->sps.mb_width;
3659 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3661 h->b_stride= s->mb_width*4;
3662 h->b8_stride= s->mb_width*2;
3664 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3665 if(h->sps.frame_mbs_only_flag)
3666 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3668 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3670 if (s->context_initialized
3671 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3673 return -1; // width / height changed during parallelized decoding
3677 if (!s->context_initialized) {
3679 return -1; // we cant (re-)initialize context during parallel decoding
3680 if (MPV_common_init(s) < 0)
3684 init_scan_tables(h);
3687 for(i = 1; i < s->avctx->thread_count; i++) {
3689 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3690 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3691 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3694 init_scan_tables(c);
3698 for(i = 0; i < s->avctx->thread_count; i++)
3699 if(context_init(h->thread_context[i]) < 0)
3702 s->avctx->width = s->width;
3703 s->avctx->height = s->height;
3704 s->avctx->sample_aspect_ratio= h->sps.sar;
3705 if(!s->avctx->sample_aspect_ratio.den)
3706 s->avctx->sample_aspect_ratio.den = 1;
3708 if(h->sps.timing_info_present_flag){
3709 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3710 if(h->x264_build > 0 && h->x264_build < 44)
3711 s->avctx->time_base.den *= 2;
3712 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3713 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3717 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3720 h->mb_aff_frame = 0;
3721 last_pic_structure = s0->picture_structure;
3722 if(h->sps.frame_mbs_only_flag){
3723 s->picture_structure= PICT_FRAME;
3725 if(get_bits1(&s->gb)) { //field_pic_flag
3726 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3728 s->picture_structure= PICT_FRAME;
3729 h->mb_aff_frame = h->sps.mb_aff;
3732 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3734 if(h0->current_slice == 0){
3735 while(h->frame_num != h->prev_frame_num &&
3736 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3737 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3739 h->prev_frame_num++;
3740 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3741 s->current_picture_ptr->frame_num= h->prev_frame_num;
3742 execute_ref_pic_marking(h, NULL, 0);
3745 /* See if we have a decoded first field looking for a pair... */
3746 if (s0->first_field) {
3747 assert(s0->current_picture_ptr);
3748 assert(s0->current_picture_ptr->data[0]);
3749 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3751 /* figure out if we have a complementary field pair */
3752 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3754 * Previous field is unmatched. Don't display it, but let it
3755 * remain for reference if marked as such.
3757 s0->current_picture_ptr = NULL;
3758 s0->first_field = FIELD_PICTURE;
3761 if (h->nal_ref_idc &&
3762 s0->current_picture_ptr->reference &&
3763 s0->current_picture_ptr->frame_num != h->frame_num) {
3765 * This and previous field were reference, but had
3766 * different frame_nums. Consider this field first in
3767 * pair. Throw away previous field except for reference
3770 s0->first_field = 1;
3771 s0->current_picture_ptr = NULL;
3774 /* Second field in complementary pair */
3775 s0->first_field = 0;
3780 /* Frame or first field in a potentially complementary pair */
3781 assert(!s0->current_picture_ptr);
3782 s0->first_field = FIELD_PICTURE;
3785 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3786 s0->first_field = 0;
3793 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3795 assert(s->mb_num == s->mb_width * s->mb_height);
3796 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3797 first_mb_in_slice >= s->mb_num){
3798 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3801 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3802 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3803 if (s->picture_structure == PICT_BOTTOM_FIELD)
3804 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3805 assert(s->mb_y < s->mb_height);
3807 if(s->picture_structure==PICT_FRAME){
3808 h->curr_pic_num= h->frame_num;
3809 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3811 h->curr_pic_num= 2*h->frame_num + 1;
3812 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3815 if(h->nal_unit_type == NAL_IDR_SLICE){
3816 get_ue_golomb(&s->gb); /* idr_pic_id */
3819 if(h->sps.poc_type==0){
3820 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3822 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3823 h->delta_poc_bottom= get_se_golomb(&s->gb);
3827 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3828 h->delta_poc[0]= get_se_golomb(&s->gb);
3830 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3831 h->delta_poc[1]= get_se_golomb(&s->gb);
3836 if(h->pps.redundant_pic_cnt_present){
3837 h->redundant_pic_count= get_ue_golomb(&s->gb);
3840 //set defaults, might be overridden a few lines later
3841 h->ref_count[0]= h->pps.ref_count[0];
3842 h->ref_count[1]= h->pps.ref_count[1];
3844 if(h->slice_type_nos != FF_I_TYPE){
3845 if(h->slice_type_nos == FF_B_TYPE){
3846 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3848 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3850 if(num_ref_idx_active_override_flag){
3851 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3852 if(h->slice_type_nos==FF_B_TYPE)
3853 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3855 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3856 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3857 h->ref_count[0]= h->ref_count[1]= 1;
3861 if(h->slice_type_nos == FF_B_TYPE)
3868 if(!default_ref_list_done){
3869 fill_default_ref_list(h);
3872 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3875 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3876 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3877 pred_weight_table(h);
3878 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3879 implicit_weight_table(h);
3884 decode_ref_pic_marking(h0, &s->gb);
3887 fill_mbaff_ref_list(h);
3889 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3890 tmp = get_ue_golomb(&s->gb);
3892 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3895 h->cabac_init_idc= tmp;
3898 h->last_qscale_diff = 0;
3899 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3901 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3905 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3906 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3907 //FIXME qscale / qp ... stuff
3908 if(h->slice_type == FF_SP_TYPE){
3909 get_bits1(&s->gb); /* sp_for_switch_flag */
3911 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3912 get_se_golomb(&s->gb); /* slice_qs_delta */
3915 h->deblocking_filter = 1;
3916 h->slice_alpha_c0_offset = 0;
3917 h->slice_beta_offset = 0;
3918 if( h->pps.deblocking_filter_parameters_present ) {
3919 tmp= get_ue_golomb(&s->gb);
3921 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3924 h->deblocking_filter= tmp;
3925 if(h->deblocking_filter < 2)
3926 h->deblocking_filter^= 1; // 1<->0
3928 if( h->deblocking_filter ) {
3929 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3930 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3934 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3935 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3936 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3937 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3938 h->deblocking_filter= 0;
3940 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3941 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3942 /* Cheat slightly for speed:
3943 Do not bother to deblock across slices. */
3944 h->deblocking_filter = 2;
3946 h0->max_contexts = 1;
3947 if(!h0->single_decode_warning) {
3948 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3949 h0->single_decode_warning = 1;
3952 return 1; // deblocking switched inside frame
3957 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3958 slice_group_change_cycle= get_bits(&s->gb, ?);
3961 h0->last_slice_type = slice_type;
3962 h->slice_num = ++h0->current_slice;
3965 int *ref2frm= h->ref2frm[h->slice_num&15][j];
3969 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3970 +(h->ref_list[j][i].reference&3);
3973 for(i=16; i<48; i++)
3974 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3975 +(h->ref_list[j][i].reference&3);
3978 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3979 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3981 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3982 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3984 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3986 av_get_pict_type_char(h->slice_type),
3987 pps_id, h->frame_num,
3988 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3989 h->ref_count[0], h->ref_count[1],
3991 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
3993 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
3994 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4004 static inline int get_level_prefix(GetBitContext *gb){
4008 OPEN_READER(re, gb);
4009 UPDATE_CACHE(re, gb);
4010 buf=GET_CACHE(re, gb);
4012 log= 32 - av_log2(buf);
4014 print_bin(buf>>(32-log), log);
4015 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4018 LAST_SKIP_BITS(re, gb, log);
4019 CLOSE_READER(re, gb);
4024 static inline int get_dct8x8_allowed(H264Context *h){
4027 if(!IS_SUB_8X8(h->sub_mb_type[i])
4028 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4035 * decodes a residual block.
4036 * @param n block index
4037 * @param scantable scantable
4038 * @param max_coeff number of coefficients in the block
4039 * @return <0 if an error occurred
4041 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4042 MpegEncContext * const s = &h->s;
4043 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4045 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4047 //FIXME put trailing_onex into the context
4049 if(n == CHROMA_DC_BLOCK_INDEX){
4050 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4051 total_coeff= coeff_token>>2;
4053 if(n == LUMA_DC_BLOCK_INDEX){
4054 total_coeff= pred_non_zero_count(h, 0);
4055 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4056 total_coeff= coeff_token>>2;
4058 total_coeff= pred_non_zero_count(h, n);
4059 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4060 total_coeff= coeff_token>>2;
4061 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4065 //FIXME set last_non_zero?
4069 if(total_coeff > (unsigned)max_coeff) {
4070 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4074 trailing_ones= coeff_token&3;
4075 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4076 assert(total_coeff<=16);
4078 for(i=0; i<trailing_ones; i++){
4079 level[i]= 1 - 2*get_bits1(gb);
4083 int level_code, mask;
4084 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4085 int prefix= get_level_prefix(gb);
4087 //first coefficient has suffix_length equal to 0 or 1
4088 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4090 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4092 level_code= (prefix<<suffix_length); //part
4093 }else if(prefix==14){
4095 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4097 level_code= prefix + get_bits(gb, 4); //part
4099 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4100 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4102 level_code += (1<<(prefix-3))-4096;
4105 if(trailing_ones < 3) level_code += 2;
4110 mask= -(level_code&1);
4111 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4114 //remaining coefficients have suffix_length > 0
4115 for(;i<total_coeff;i++) {
4116 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4117 prefix = get_level_prefix(gb);
4119 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4121 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4123 level_code += (1<<(prefix-3))-4096;
4125 mask= -(level_code&1);
4126 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4127 if(level_code > suffix_limit[suffix_length])
4132 if(total_coeff == max_coeff)
4135 if(n == CHROMA_DC_BLOCK_INDEX)
4136 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4138 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4141 coeff_num = zeros_left + total_coeff - 1;
4142 j = scantable[coeff_num];
4144 block[j] = level[0];
4145 for(i=1;i<total_coeff;i++) {
4148 else if(zeros_left < 7){
4149 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4151 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4153 zeros_left -= run_before;
4154 coeff_num -= 1 + run_before;
4155 j= scantable[ coeff_num ];
4160 block[j] = (level[0] * qmul[j] + 32)>>6;
4161 for(i=1;i<total_coeff;i++) {
4164 else if(zeros_left < 7){
4165 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4167 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4169 zeros_left -= run_before;
4170 coeff_num -= 1 + run_before;
4171 j= scantable[ coeff_num ];
4173 block[j]= (level[i] * qmul[j] + 32)>>6;
4178 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4185 static void predict_field_decoding_flag(H264Context *h){
4186 MpegEncContext * const s = &h->s;
4187 const int mb_xy= h->mb_xy;
4188 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4189 ? s->current_picture.mb_type[mb_xy-1]
4190 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4191 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4193 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4197 * decodes a P_SKIP or B_SKIP macroblock
4199 static void decode_mb_skip(H264Context *h){
4200 MpegEncContext * const s = &h->s;
4201 const int mb_xy= h->mb_xy;
4204 memset(h->non_zero_count[mb_xy], 0, 16);
4205 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4208 mb_type|= MB_TYPE_INTERLACED;
4210 if( h->slice_type_nos == FF_B_TYPE )
4212 // just for fill_caches. pred_direct_motion will set the real mb_type
4213 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4215 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4216 pred_direct_motion(h, &mb_type);
4217 mb_type|= MB_TYPE_SKIP;
4222 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4224 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4225 pred_pskip_motion(h, &mx, &my);
4226 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4227 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4230 write_back_motion(h, mb_type);
4231 s->current_picture.mb_type[mb_xy]= mb_type;
4232 s->current_picture.qscale_table[mb_xy]= s->qscale;
4233 h->slice_table[ mb_xy ]= h->slice_num;
4234 h->prev_mb_skipped= 1;
4238 * decodes a macroblock
4239 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4241 static int decode_mb_cavlc(H264Context *h){
4242 MpegEncContext * const s = &h->s;
4244 int partition_count;
4245 unsigned int mb_type, cbp;
4246 int dct8x8_allowed= h->pps.transform_8x8_mode;
4248 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4250 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4252 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4253 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4255 if(h->slice_type_nos != FF_I_TYPE){
4256 if(s->mb_skip_run==-1)
4257 s->mb_skip_run= get_ue_golomb(&s->gb);
4259 if (s->mb_skip_run--) {
4260 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4261 if(s->mb_skip_run==0)
4262 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4264 predict_field_decoding_flag(h);
4271 if( (s->mb_y&1) == 0 )
4272 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4275 h->prev_mb_skipped= 0;
4277 mb_type= get_ue_golomb(&s->gb);
4278 if(h->slice_type_nos == FF_B_TYPE){
4280 partition_count= b_mb_type_info[mb_type].partition_count;
4281 mb_type= b_mb_type_info[mb_type].type;
4284 goto decode_intra_mb;
4286 }else if(h->slice_type_nos == FF_P_TYPE){
4288 partition_count= p_mb_type_info[mb_type].partition_count;
4289 mb_type= p_mb_type_info[mb_type].type;
4292 goto decode_intra_mb;
4295 assert(h->slice_type_nos == FF_I_TYPE);
4296 if(h->slice_type == FF_SI_TYPE && mb_type)
4300 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4304 cbp= i_mb_type_info[mb_type].cbp;
4305 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4306 mb_type= i_mb_type_info[mb_type].type;
4310 mb_type |= MB_TYPE_INTERLACED;
4312 h->slice_table[ mb_xy ]= h->slice_num;
4314 if(IS_INTRA_PCM(mb_type)){
4317 // We assume these blocks are very rare so we do not optimize it.
4318 align_get_bits(&s->gb);
4320 // The pixels are stored in the same order as levels in h->mb array.
4321 for(x=0; x < (CHROMA ? 384 : 256); x++){
4322 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4325 // In deblocking, the quantizer is 0
4326 s->current_picture.qscale_table[mb_xy]= 0;
4327 // All coeffs are present
4328 memset(h->non_zero_count[mb_xy], 16, 16);
4330 s->current_picture.mb_type[mb_xy]= mb_type;
4335 h->ref_count[0] <<= 1;
4336 h->ref_count[1] <<= 1;
4339 fill_caches(h, mb_type, 0);
4342 if(IS_INTRA(mb_type)){
4344 // init_top_left_availability(h);
4345 if(IS_INTRA4x4(mb_type)){
4348 if(dct8x8_allowed && get_bits1(&s->gb)){
4349 mb_type |= MB_TYPE_8x8DCT;
4353 // fill_intra4x4_pred_table(h);
4354 for(i=0; i<16; i+=di){
4355 int mode= pred_intra_mode(h, i);
4357 if(!get_bits1(&s->gb)){
4358 const int rem_mode= get_bits(&s->gb, 3);
4359 mode = rem_mode + (rem_mode >= mode);
4363 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4365 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4367 write_back_intra_pred_mode(h);
4368 if( check_intra4x4_pred_mode(h) < 0)
4371 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4372 if(h->intra16x16_pred_mode < 0)
4376 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4379 h->chroma_pred_mode= pred_mode;
4381 }else if(partition_count==4){
4382 int i, j, sub_partition_count[4], list, ref[2][4];
4384 if(h->slice_type_nos == FF_B_TYPE){
4386 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4387 if(h->sub_mb_type[i] >=13){
4388 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4391 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4392 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4394 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4395 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4396 pred_direct_motion(h, &mb_type);
4397 h->ref_cache[0][scan8[4]] =
4398 h->ref_cache[1][scan8[4]] =
4399 h->ref_cache[0][scan8[12]] =
4400 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4403 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4405 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4406 if(h->sub_mb_type[i] >=4){
4407 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4410 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4411 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4415 for(list=0; list<h->list_count; list++){
4416 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4418 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4419 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4420 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4422 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4434 dct8x8_allowed = get_dct8x8_allowed(h);
4436 for(list=0; list<h->list_count; list++){
4438 if(IS_DIRECT(h->sub_mb_type[i])) {
4439 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4442 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4443 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4445 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4446 const int sub_mb_type= h->sub_mb_type[i];
4447 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4448 for(j=0; j<sub_partition_count[i]; j++){
4450 const int index= 4*i + block_width*j;
4451 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4452 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4453 mx += get_se_golomb(&s->gb);
4454 my += get_se_golomb(&s->gb);
4455 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4457 if(IS_SUB_8X8(sub_mb_type)){
4459 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4461 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4462 }else if(IS_SUB_8X4(sub_mb_type)){
4463 mv_cache[ 1 ][0]= mx;
4464 mv_cache[ 1 ][1]= my;
4465 }else if(IS_SUB_4X8(sub_mb_type)){
4466 mv_cache[ 8 ][0]= mx;
4467 mv_cache[ 8 ][1]= my;
4469 mv_cache[ 0 ][0]= mx;
4470 mv_cache[ 0 ][1]= my;
4473 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4479 }else if(IS_DIRECT(mb_type)){
4480 pred_direct_motion(h, &mb_type);
4481 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4483 int list, mx, my, i;
4484 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4485 if(IS_16X16(mb_type)){
4486 for(list=0; list<h->list_count; list++){
4488 if(IS_DIR(mb_type, 0, list)){
4489 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4490 if(val >= h->ref_count[list]){
4491 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4495 val= LIST_NOT_USED&0xFF;
4496 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4498 for(list=0; list<h->list_count; list++){
4500 if(IS_DIR(mb_type, 0, list)){
4501 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4502 mx += get_se_golomb(&s->gb);
4503 my += get_se_golomb(&s->gb);
4504 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4506 val= pack16to32(mx,my);
4509 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4512 else if(IS_16X8(mb_type)){
4513 for(list=0; list<h->list_count; list++){
4516 if(IS_DIR(mb_type, i, list)){
4517 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4518 if(val >= h->ref_count[list]){
4519 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4523 val= LIST_NOT_USED&0xFF;
4524 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4527 for(list=0; list<h->list_count; list++){
4530 if(IS_DIR(mb_type, i, list)){
4531 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4532 mx += get_se_golomb(&s->gb);
4533 my += get_se_golomb(&s->gb);
4534 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4536 val= pack16to32(mx,my);
4539 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4543 assert(IS_8X16(mb_type));
4544 for(list=0; list<h->list_count; list++){
4547 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4548 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4549 if(val >= h->ref_count[list]){
4550 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4554 val= LIST_NOT_USED&0xFF;
4555 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4558 for(list=0; list<h->list_count; list++){
4561 if(IS_DIR(mb_type, i, list)){
4562 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4563 mx += get_se_golomb(&s->gb);
4564 my += get_se_golomb(&s->gb);
4565 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4567 val= pack16to32(mx,my);
4570 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4576 if(IS_INTER(mb_type))
4577 write_back_motion(h, mb_type);
4579 if(!IS_INTRA16x16(mb_type)){
4580 cbp= get_ue_golomb(&s->gb);
4582 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4587 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4588 else cbp= golomb_to_inter_cbp [cbp];
4590 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4591 else cbp= golomb_to_inter_cbp_gray[cbp];
4596 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4597 if(get_bits1(&s->gb)){
4598 mb_type |= MB_TYPE_8x8DCT;
4599 h->cbp_table[mb_xy]= cbp;
4602 s->current_picture.mb_type[mb_xy]= mb_type;
4604 if(cbp || IS_INTRA16x16(mb_type)){
4605 int i8x8, i4x4, chroma_idx;
4607 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4608 const uint8_t *scan, *scan8x8, *dc_scan;
4610 // fill_non_zero_count_cache(h);
4612 if(IS_INTERLACED(mb_type)){
4613 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4614 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4615 dc_scan= luma_dc_field_scan;
4617 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4618 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4619 dc_scan= luma_dc_zigzag_scan;
4622 dquant= get_se_golomb(&s->gb);
4624 if( dquant > 25 || dquant < -26 ){
4625 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4629 s->qscale += dquant;
4630 if(((unsigned)s->qscale) > 51){
4631 if(s->qscale<0) s->qscale+= 52;
4632 else s->qscale-= 52;
4635 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4636 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4637 if(IS_INTRA16x16(mb_type)){
4638 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4639 return -1; //FIXME continue if partitioned and other return -1 too
4642 assert((cbp&15) == 0 || (cbp&15) == 15);
4645 for(i8x8=0; i8x8<4; i8x8++){
4646 for(i4x4=0; i4x4<4; i4x4++){
4647 const int index= i4x4 + 4*i8x8;
4648 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4654 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4657 for(i8x8=0; i8x8<4; i8x8++){
4658 if(cbp & (1<<i8x8)){
4659 if(IS_8x8DCT(mb_type)){
4660 DCTELEM *buf = &h->mb[64*i8x8];
4662 for(i4x4=0; i4x4<4; i4x4++){
4663 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4664 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4667 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4668 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4670 for(i4x4=0; i4x4<4; i4x4++){
4671 const int index= i4x4 + 4*i8x8;
4673 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4679 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4680 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4686 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4687 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4693 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4694 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4695 for(i4x4=0; i4x4<4; i4x4++){
4696 const int index= 16 + 4*chroma_idx + i4x4;
4697 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4703 uint8_t * const nnz= &h->non_zero_count_cache[0];
4704 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4705 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4708 uint8_t * const nnz= &h->non_zero_count_cache[0];
4709 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4710 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4711 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4713 s->current_picture.qscale_table[mb_xy]= s->qscale;
4714 write_back_non_zero_count(h);
4717 h->ref_count[0] >>= 1;
4718 h->ref_count[1] >>= 1;
4724 static int decode_cabac_field_decoding_flag(H264Context *h) {
4725 MpegEncContext * const s = &h->s;
4726 const int mb_x = s->mb_x;
4727 const int mb_y = s->mb_y & ~1;
4728 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4729 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4731 unsigned int ctx = 0;
4733 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4736 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4740 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4743 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4744 uint8_t *state= &h->cabac_state[ctx_base];
4748 MpegEncContext * const s = &h->s;
4749 const int mba_xy = h->left_mb_xy[0];
4750 const int mbb_xy = h->top_mb_xy;
4752 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4754 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4756 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4757 return 0; /* I4x4 */
4760 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4761 return 0; /* I4x4 */
4764 if( get_cabac_terminate( &h->cabac ) )
4765 return 25; /* PCM */
4767 mb_type = 1; /* I16x16 */
4768 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4769 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4770 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4771 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4772 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4776 static int decode_cabac_mb_type( H264Context *h ) {
4777 MpegEncContext * const s = &h->s;
4779 if( h->slice_type_nos == FF_I_TYPE ) {
4780 return decode_cabac_intra_mb_type(h, 3, 1);
4781 } else if( h->slice_type_nos == FF_P_TYPE ) {
4782 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4784 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4785 /* P_L0_D16x16, P_8x8 */
4786 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4788 /* P_L0_D8x16, P_L0_D16x8 */
4789 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4792 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4794 } else if( h->slice_type_nos == FF_B_TYPE ) {
4795 const int mba_xy = h->left_mb_xy[0];
4796 const int mbb_xy = h->top_mb_xy;
4800 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4802 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4805 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4806 return 0; /* B_Direct_16x16 */
4808 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4809 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4812 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4813 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4814 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4815 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4817 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4818 else if( bits == 13 ) {
4819 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4820 } else if( bits == 14 )
4821 return 11; /* B_L1_L0_8x16 */
4822 else if( bits == 15 )
4823 return 22; /* B_8x8 */
4825 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4826 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4828 /* TODO SI/SP frames? */
4833 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4834 MpegEncContext * const s = &h->s;
4838 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4839 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4842 && h->slice_table[mba_xy] == h->slice_num
4843 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4844 mba_xy += s->mb_stride;
4846 mbb_xy = mb_xy - s->mb_stride;
4848 && h->slice_table[mbb_xy] == h->slice_num
4849 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4850 mbb_xy -= s->mb_stride;
4852 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4854 int mb_xy = h->mb_xy;
4856 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4859 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4861 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4864 if( h->slice_type_nos == FF_B_TYPE )
4866 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4869 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4872 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4875 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4876 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4877 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4879 if( mode >= pred_mode )
4885 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4886 const int mba_xy = h->left_mb_xy[0];
4887 const int mbb_xy = h->top_mb_xy;
4891 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4892 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4895 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4898 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4901 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4903 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4909 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4910 int cbp_b, cbp_a, ctx, cbp = 0;
4912 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4913 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4915 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4916 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4917 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4918 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4919 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4920 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4921 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4922 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4925 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4929 cbp_a = (h->left_cbp>>4)&0x03;
4930 cbp_b = (h-> top_cbp>>4)&0x03;
4933 if( cbp_a > 0 ) ctx++;
4934 if( cbp_b > 0 ) ctx += 2;
4935 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4939 if( cbp_a == 2 ) ctx++;
4940 if( cbp_b == 2 ) ctx += 2;
4941 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4943 static int decode_cabac_mb_dqp( H264Context *h) {
4947 if( h->last_qscale_diff != 0 )
4950 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4956 if(val > 102) //prevent infinite loop
4963 return -(val + 1)/2;
4965 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4966 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4968 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4970 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4974 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4976 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4977 return 0; /* B_Direct_8x8 */
4978 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4979 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4981 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4982 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4983 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4986 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4987 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4991 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4992 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4995 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4996 int refa = h->ref_cache[list][scan8[n] - 1];
4997 int refb = h->ref_cache[list][scan8[n] - 8];
5001 if( h->slice_type_nos == FF_B_TYPE) {
5002 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5004 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5013 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5019 if(ref >= 32 /*h->ref_list[list]*/){
5020 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5021 return 0; //FIXME we should return -1 and check the return everywhere
5027 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5028 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5029 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5030 int ctxbase = (l == 0) ? 40 : 47;
5035 else if( amvd > 32 )
5040 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5045 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5053 while( get_cabac_bypass( &h->cabac ) ) {
5057 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5062 if( get_cabac_bypass( &h->cabac ) )
5066 return get_cabac_bypass_sign( &h->cabac, -mvd );
5069 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5075 nza = h->left_cbp&0x100;
5076 nzb = h-> top_cbp&0x100;
5078 nza = (h->left_cbp>>(6+idx))&0x01;
5079 nzb = (h-> top_cbp>>(6+idx))&0x01;
5083 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5084 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5086 assert(cat == 1 || cat == 2);
5087 nza = h->non_zero_count_cache[scan8[idx] - 1];
5088 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5098 return ctx + 4 * cat;
5101 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5102 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5103 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5104 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5105 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5108 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5109 static const int significant_coeff_flag_offset[2][6] = {
5110 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5111 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5113 static const int last_coeff_flag_offset[2][6] = {
5114 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5115 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5117 static const int coeff_abs_level_m1_offset[6] = {
5118 227+0, 227+10, 227+20, 227+30, 227+39, 426
5120 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5121 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5122 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5123 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5124 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5125 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5126 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5127 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5128 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5130 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5131 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5132 * map node ctx => cabac ctx for level=1 */
5133 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5134 /* map node ctx => cabac ctx for level>1 */
5135 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5136 static const uint8_t coeff_abs_level_transition[2][8] = {
5137 /* update node ctx after decoding a level=1 */
5138 { 1, 2, 3, 3, 4, 5, 6, 7 },
5139 /* update node ctx after decoding a level>1 */
5140 { 4, 4, 4, 4, 5, 6, 7, 7 }
5146 int coeff_count = 0;
5149 uint8_t *significant_coeff_ctx_base;
5150 uint8_t *last_coeff_ctx_base;
5151 uint8_t *abs_level_m1_ctx_base;
5154 #define CABAC_ON_STACK
5156 #ifdef CABAC_ON_STACK
5159 cc.range = h->cabac.range;
5160 cc.low = h->cabac.low;
5161 cc.bytestream= h->cabac.bytestream;
5163 #define CC &h->cabac
5167 /* cat: 0-> DC 16x16 n = 0
5168 * 1-> AC 16x16 n = luma4x4idx
5169 * 2-> Luma4x4 n = luma4x4idx
5170 * 3-> DC Chroma n = iCbCr
5171 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5172 * 5-> Luma8x8 n = 4 * luma8x8idx
5175 /* read coded block flag */
5176 if( is_dc || cat != 5 ) {
5177 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5180 h->non_zero_count_cache[scan8[16+n]] = 0;
5182 h->non_zero_count_cache[scan8[n]] = 0;
5185 #ifdef CABAC_ON_STACK
5186 h->cabac.range = cc.range ;
5187 h->cabac.low = cc.low ;
5188 h->cabac.bytestream= cc.bytestream;
5194 significant_coeff_ctx_base = h->cabac_state
5195 + significant_coeff_flag_offset[MB_FIELD][cat];
5196 last_coeff_ctx_base = h->cabac_state
5197 + last_coeff_flag_offset[MB_FIELD][cat];
5198 abs_level_m1_ctx_base = h->cabac_state
5199 + coeff_abs_level_m1_offset[cat];
5201 if( !is_dc && cat == 5 ) {
5202 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5203 for(last= 0; last < coefs; last++) { \
5204 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5205 if( get_cabac( CC, sig_ctx )) { \
5206 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5207 index[coeff_count++] = last; \
5208 if( get_cabac( CC, last_ctx ) ) { \
5214 if( last == max_coeff -1 ) {\
5215 index[coeff_count++] = last;\
5217 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5218 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5219 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5221 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5223 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5225 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5228 assert(coeff_count > 0);
5232 h->cbp_table[h->mb_xy] |= 0x100;
5234 h->cbp_table[h->mb_xy] |= 0x40 << n;
5237 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5239 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5241 assert( cat == 1 || cat == 2 );
5242 h->non_zero_count_cache[scan8[n]] = coeff_count;
5247 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5249 int j= scantable[index[--coeff_count]];
5251 if( get_cabac( CC, ctx ) == 0 ) {
5252 node_ctx = coeff_abs_level_transition[0][node_ctx];
5254 block[j] = get_cabac_bypass_sign( CC, -1);
5256 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5260 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5261 node_ctx = coeff_abs_level_transition[1][node_ctx];
5263 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5267 if( coeff_abs >= 15 ) {
5269 while( get_cabac_bypass( CC ) ) {
5275 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5281 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5283 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5286 } while( coeff_count );
5287 #ifdef CABAC_ON_STACK
5288 h->cabac.range = cc.range ;
5289 h->cabac.low = cc.low ;
5290 h->cabac.bytestream= cc.bytestream;
5295 #ifndef CONFIG_SMALL
5296 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5297 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5300 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5301 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5305 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5307 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5309 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5310 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5314 static inline void compute_mb_neighbors(H264Context *h)
5316 MpegEncContext * const s = &h->s;
5317 const int mb_xy = h->mb_xy;
5318 h->top_mb_xy = mb_xy - s->mb_stride;
5319 h->left_mb_xy[0] = mb_xy - 1;
5321 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5322 const int top_pair_xy = pair_xy - s->mb_stride;
5323 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5324 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5325 const int curr_mb_frame_flag = !MB_FIELD;
5326 const int bottom = (s->mb_y & 1);
5328 ? !curr_mb_frame_flag // bottom macroblock
5329 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5331 h->top_mb_xy -= s->mb_stride;
5333 if (left_mb_frame_flag != curr_mb_frame_flag) {
5334 h->left_mb_xy[0] = pair_xy - 1;
5336 } else if (FIELD_PICTURE) {
5337 h->top_mb_xy -= s->mb_stride;
5343 * decodes a macroblock
5344 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5346 static int decode_mb_cabac(H264Context *h) {
5347 MpegEncContext * const s = &h->s;
5349 int mb_type, partition_count, cbp = 0;
5350 int dct8x8_allowed= h->pps.transform_8x8_mode;
5352 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5354 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5356 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5357 if( h->slice_type_nos != FF_I_TYPE ) {
5359 /* a skipped mb needs the aff flag from the following mb */
5360 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5361 predict_field_decoding_flag(h);
5362 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5363 skip = h->next_mb_skipped;
5365 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5366 /* read skip flags */
5368 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5369 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5370 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5371 if(h->next_mb_skipped)
5372 predict_field_decoding_flag(h);
5374 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5379 h->cbp_table[mb_xy] = 0;
5380 h->chroma_pred_mode_table[mb_xy] = 0;
5381 h->last_qscale_diff = 0;
5388 if( (s->mb_y&1) == 0 )
5390 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5393 h->prev_mb_skipped = 0;
5395 compute_mb_neighbors(h);
5396 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5397 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5401 if( h->slice_type_nos == FF_B_TYPE ) {
5403 partition_count= b_mb_type_info[mb_type].partition_count;
5404 mb_type= b_mb_type_info[mb_type].type;
5407 goto decode_intra_mb;
5409 } else if( h->slice_type_nos == FF_P_TYPE ) {
5411 partition_count= p_mb_type_info[mb_type].partition_count;
5412 mb_type= p_mb_type_info[mb_type].type;
5415 goto decode_intra_mb;
5418 if(h->slice_type == FF_SI_TYPE && mb_type)
5420 assert(h->slice_type_nos == FF_I_TYPE);
5422 partition_count = 0;
5423 cbp= i_mb_type_info[mb_type].cbp;
5424 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5425 mb_type= i_mb_type_info[mb_type].type;
5428 mb_type |= MB_TYPE_INTERLACED;
5430 h->slice_table[ mb_xy ]= h->slice_num;
5432 if(IS_INTRA_PCM(mb_type)) {
5435 // We assume these blocks are very rare so we do not optimize it.
5436 // FIXME The two following lines get the bitstream position in the cabac
5437 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5438 ptr= h->cabac.bytestream;
5439 if(h->cabac.low&0x1) ptr--;
5441 if(h->cabac.low&0x1FF) ptr--;
5444 // The pixels are stored in the same order as levels in h->mb array.
5445 memcpy(h->mb, ptr, 256); ptr+=256;
5447 memcpy(h->mb+128, ptr, 128); ptr+=128;
5450 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5452 // All blocks are present
5453 h->cbp_table[mb_xy] = 0x1ef;
5454 h->chroma_pred_mode_table[mb_xy] = 0;
5455 // In deblocking, the quantizer is 0
5456 s->current_picture.qscale_table[mb_xy]= 0;
5457 // All coeffs are present
5458 memset(h->non_zero_count[mb_xy], 16, 16);
5459 s->current_picture.mb_type[mb_xy]= mb_type;
5460 h->last_qscale_diff = 0;
5465 h->ref_count[0] <<= 1;
5466 h->ref_count[1] <<= 1;
5469 fill_caches(h, mb_type, 0);
5471 if( IS_INTRA( mb_type ) ) {
5473 if( IS_INTRA4x4( mb_type ) ) {
5474 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5475 mb_type |= MB_TYPE_8x8DCT;
5476 for( i = 0; i < 16; i+=4 ) {
5477 int pred = pred_intra_mode( h, i );
5478 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5479 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5482 for( i = 0; i < 16; i++ ) {
5483 int pred = pred_intra_mode( h, i );
5484 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5486 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5489 write_back_intra_pred_mode(h);
5490 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5492 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5493 if( h->intra16x16_pred_mode < 0 ) return -1;
5496 h->chroma_pred_mode_table[mb_xy] =
5497 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5499 pred_mode= check_intra_pred_mode( h, pred_mode );
5500 if( pred_mode < 0 ) return -1;
5501 h->chroma_pred_mode= pred_mode;
5503 } else if( partition_count == 4 ) {
5504 int i, j, sub_partition_count[4], list, ref[2][4];
5506 if( h->slice_type_nos == FF_B_TYPE ) {
5507 for( i = 0; i < 4; i++ ) {
5508 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5509 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5510 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5512 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5513 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5514 pred_direct_motion(h, &mb_type);
5515 h->ref_cache[0][scan8[4]] =
5516 h->ref_cache[1][scan8[4]] =
5517 h->ref_cache[0][scan8[12]] =
5518 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5519 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5520 for( i = 0; i < 4; i++ )
5521 if( IS_DIRECT(h->sub_mb_type[i]) )
5522 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5526 for( i = 0; i < 4; i++ ) {
5527 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5528 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5529 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5533 for( list = 0; list < h->list_count; list++ ) {
5534 for( i = 0; i < 4; i++ ) {
5535 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5536 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5537 if( h->ref_count[list] > 1 )
5538 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5544 h->ref_cache[list][ scan8[4*i]+1 ]=
5545 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5550 dct8x8_allowed = get_dct8x8_allowed(h);
5552 for(list=0; list<h->list_count; list++){
5554 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5555 if(IS_DIRECT(h->sub_mb_type[i])){
5556 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5560 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5561 const int sub_mb_type= h->sub_mb_type[i];
5562 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5563 for(j=0; j<sub_partition_count[i]; j++){
5566 const int index= 4*i + block_width*j;
5567 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5568 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5569 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5571 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5572 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5573 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5575 if(IS_SUB_8X8(sub_mb_type)){
5577 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5579 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5582 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5584 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5585 }else if(IS_SUB_8X4(sub_mb_type)){
5586 mv_cache[ 1 ][0]= mx;
5587 mv_cache[ 1 ][1]= my;
5589 mvd_cache[ 1 ][0]= mx - mpx;
5590 mvd_cache[ 1 ][1]= my - mpy;
5591 }else if(IS_SUB_4X8(sub_mb_type)){
5592 mv_cache[ 8 ][0]= mx;
5593 mv_cache[ 8 ][1]= my;
5595 mvd_cache[ 8 ][0]= mx - mpx;
5596 mvd_cache[ 8 ][1]= my - mpy;
5598 mv_cache[ 0 ][0]= mx;
5599 mv_cache[ 0 ][1]= my;
5601 mvd_cache[ 0 ][0]= mx - mpx;
5602 mvd_cache[ 0 ][1]= my - mpy;
5605 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5606 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5607 p[0] = p[1] = p[8] = p[9] = 0;
5608 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5612 } else if( IS_DIRECT(mb_type) ) {
5613 pred_direct_motion(h, &mb_type);
5614 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5615 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5616 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5618 int list, mx, my, i, mpx, mpy;
5619 if(IS_16X16(mb_type)){
5620 for(list=0; list<h->list_count; list++){
5621 if(IS_DIR(mb_type, 0, list)){
5622 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5623 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5625 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5627 for(list=0; list<h->list_count; list++){
5628 if(IS_DIR(mb_type, 0, list)){
5629 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5631 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5632 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5633 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5635 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5636 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5638 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5641 else if(IS_16X8(mb_type)){
5642 for(list=0; list<h->list_count; list++){
5644 if(IS_DIR(mb_type, i, list)){
5645 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5646 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5648 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5651 for(list=0; list<h->list_count; list++){
5653 if(IS_DIR(mb_type, i, list)){
5654 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5655 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5656 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5657 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5659 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5660 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5662 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5663 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5668 assert(IS_8X16(mb_type));
5669 for(list=0; list<h->list_count; list++){
5671 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5672 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5673 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5675 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5678 for(list=0; list<h->list_count; list++){
5680 if(IS_DIR(mb_type, i, list)){
5681 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5682 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5683 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5685 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5686 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5687 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5689 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5690 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5697 if( IS_INTER( mb_type ) ) {
5698 h->chroma_pred_mode_table[mb_xy] = 0;
5699 write_back_motion( h, mb_type );
5702 if( !IS_INTRA16x16( mb_type ) ) {
5703 cbp = decode_cabac_mb_cbp_luma( h );
5705 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5708 h->cbp_table[mb_xy] = h->cbp = cbp;
5710 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5711 if( decode_cabac_mb_transform_size( h ) )
5712 mb_type |= MB_TYPE_8x8DCT;
5714 s->current_picture.mb_type[mb_xy]= mb_type;
5716 if( cbp || IS_INTRA16x16( mb_type ) ) {
5717 const uint8_t *scan, *scan8x8, *dc_scan;
5718 const uint32_t *qmul;
5721 if(IS_INTERLACED(mb_type)){
5722 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5723 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5724 dc_scan= luma_dc_field_scan;
5726 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5727 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5728 dc_scan= luma_dc_zigzag_scan;
5731 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5732 if( dqp == INT_MIN ){
5733 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5737 if(((unsigned)s->qscale) > 51){
5738 if(s->qscale<0) s->qscale+= 52;
5739 else s->qscale-= 52;
5741 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5742 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5744 if( IS_INTRA16x16( mb_type ) ) {
5746 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5747 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5750 qmul = h->dequant4_coeff[0][s->qscale];
5751 for( i = 0; i < 16; i++ ) {
5752 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5753 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5756 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5760 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5761 if( cbp & (1<<i8x8) ) {
5762 if( IS_8x8DCT(mb_type) ) {
5763 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5764 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5766 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5767 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5768 const int index = 4*i8x8 + i4x4;
5769 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5771 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5772 //STOP_TIMER("decode_residual")
5776 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5777 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5784 for( c = 0; c < 2; c++ ) {
5785 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5786 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5792 for( c = 0; c < 2; c++ ) {
5793 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5794 for( i = 0; i < 4; i++ ) {
5795 const int index = 16 + 4 * c + i;
5796 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5797 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5801 uint8_t * const nnz= &h->non_zero_count_cache[0];
5802 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5803 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5806 uint8_t * const nnz= &h->non_zero_count_cache[0];
5807 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5808 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5809 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5810 h->last_qscale_diff = 0;
5813 s->current_picture.qscale_table[mb_xy]= s->qscale;
5814 write_back_non_zero_count(h);
5817 h->ref_count[0] >>= 1;
5818 h->ref_count[1] >>= 1;
5825 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5827 const int index_a = qp + h->slice_alpha_c0_offset;
5828 const int alpha = (alpha_table+52)[index_a];
5829 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5834 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5835 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5837 /* 16px edge length, because bS=4 is triggered by being at
5838 * the edge of an intra MB, so all 4 bS are the same */
5839 for( d = 0; d < 16; d++ ) {
5840 const int p0 = pix[-1];
5841 const int p1 = pix[-2];
5842 const int p2 = pix[-3];
5844 const int q0 = pix[0];
5845 const int q1 = pix[1];
5846 const int q2 = pix[2];
5848 if( FFABS( p0 - q0 ) < alpha &&
5849 FFABS( p1 - p0 ) < beta &&
5850 FFABS( q1 - q0 ) < beta ) {
5852 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5853 if( FFABS( p2 - p0 ) < beta)
5855 const int p3 = pix[-4];
5857 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5858 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5859 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5862 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5864 if( FFABS( q2 - q0 ) < beta)
5866 const int q3 = pix[3];
5868 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5869 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5870 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5873 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5877 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5878 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5880 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5886 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5888 const int index_a = qp + h->slice_alpha_c0_offset;
5889 const int alpha = (alpha_table+52)[index_a];
5890 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5895 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5896 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5898 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5902 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5904 for( i = 0; i < 16; i++, pix += stride) {
5910 int bS_index = (i >> 1);
5913 bS_index |= (i & 1);
5916 if( bS[bS_index] == 0 ) {
5920 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5921 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5922 alpha = (alpha_table+52)[index_a];
5923 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5925 if( bS[bS_index] < 4 ) {
5926 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5927 const int p0 = pix[-1];
5928 const int p1 = pix[-2];
5929 const int p2 = pix[-3];
5930 const int q0 = pix[0];
5931 const int q1 = pix[1];
5932 const int q2 = pix[2];
5934 if( FFABS( p0 - q0 ) < alpha &&
5935 FFABS( p1 - p0 ) < beta &&
5936 FFABS( q1 - q0 ) < beta ) {
5940 if( FFABS( p2 - p0 ) < beta ) {
5941 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5944 if( FFABS( q2 - q0 ) < beta ) {
5945 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5949 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5950 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5951 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5952 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5955 const int p0 = pix[-1];
5956 const int p1 = pix[-2];
5957 const int p2 = pix[-3];
5959 const int q0 = pix[0];
5960 const int q1 = pix[1];
5961 const int q2 = pix[2];
5963 if( FFABS( p0 - q0 ) < alpha &&
5964 FFABS( p1 - p0 ) < beta &&
5965 FFABS( q1 - q0 ) < beta ) {
5967 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5968 if( FFABS( p2 - p0 ) < beta)
5970 const int p3 = pix[-4];
5972 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5973 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5974 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5977 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5979 if( FFABS( q2 - q0 ) < beta)
5981 const int q3 = pix[3];
5983 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5984 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5985 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5988 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5992 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5993 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5995 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6000 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6002 for( i = 0; i < 8; i++, pix += stride) {
6010 if( bS[bS_index] == 0 ) {
6014 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6015 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6016 alpha = (alpha_table+52)[index_a];
6017 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6019 if( bS[bS_index] < 4 ) {
6020 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6021 const int p0 = pix[-1];
6022 const int p1 = pix[-2];
6023 const int q0 = pix[0];
6024 const int q1 = pix[1];
6026 if( FFABS( p0 - q0 ) < alpha &&
6027 FFABS( p1 - p0 ) < beta &&
6028 FFABS( q1 - q0 ) < beta ) {
6029 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6031 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6032 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6033 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6036 const int p0 = pix[-1];
6037 const int p1 = pix[-2];
6038 const int q0 = pix[0];
6039 const int q1 = pix[1];
6041 if( FFABS( p0 - q0 ) < alpha &&
6042 FFABS( p1 - p0 ) < beta &&
6043 FFABS( q1 - q0 ) < beta ) {
6045 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6046 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6047 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6053 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6055 const int index_a = qp + h->slice_alpha_c0_offset;
6056 const int alpha = (alpha_table+52)[index_a];
6057 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6058 const int pix_next = stride;
6063 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6064 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6066 /* 16px edge length, see filter_mb_edgev */
6067 for( d = 0; d < 16; d++ ) {
6068 const int p0 = pix[-1*pix_next];
6069 const int p1 = pix[-2*pix_next];
6070 const int p2 = pix[-3*pix_next];
6071 const int q0 = pix[0];
6072 const int q1 = pix[1*pix_next];
6073 const int q2 = pix[2*pix_next];
6075 if( FFABS( p0 - q0 ) < alpha &&
6076 FFABS( p1 - p0 ) < beta &&
6077 FFABS( q1 - q0 ) < beta ) {
6079 const int p3 = pix[-4*pix_next];
6080 const int q3 = pix[ 3*pix_next];
6082 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6083 if( FFABS( p2 - p0 ) < beta) {
6085 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6086 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6087 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6090 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6092 if( FFABS( q2 - q0 ) < beta) {
6094 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6095 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6096 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6099 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6103 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6104 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6106 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6113 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6115 const int index_a = qp + h->slice_alpha_c0_offset;
6116 const int alpha = (alpha_table+52)[index_a];
6117 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6122 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6123 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6125 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6129 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6130 MpegEncContext * const s = &h->s;
6131 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6133 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6137 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6139 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6140 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6141 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6144 assert(!FRAME_MBAFF);
6146 mb_type = s->current_picture.mb_type[mb_xy];
6147 qp = s->current_picture.qscale_table[mb_xy];
6148 qp0 = s->current_picture.qscale_table[mb_xy-1];
6149 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6150 qpc = get_chroma_qp( h, 0, qp );
6151 qpc0 = get_chroma_qp( h, 0, qp0 );
6152 qpc1 = get_chroma_qp( h, 0, qp1 );
6153 qp0 = (qp + qp0 + 1) >> 1;
6154 qp1 = (qp + qp1 + 1) >> 1;
6155 qpc0 = (qpc + qpc0 + 1) >> 1;
6156 qpc1 = (qpc + qpc1 + 1) >> 1;
6157 qp_thresh = 15 - h->slice_alpha_c0_offset;
6158 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6159 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6162 if( IS_INTRA(mb_type) ) {
6163 int16_t bS4[4] = {4,4,4,4};
6164 int16_t bS3[4] = {3,3,3,3};
6165 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6166 if( IS_8x8DCT(mb_type) ) {
6167 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6168 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6169 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6170 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6172 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6173 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6174 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6175 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6176 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6177 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6178 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6179 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6181 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6182 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6183 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6184 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6185 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6186 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6187 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6188 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6191 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6192 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6194 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6196 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6198 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6199 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6200 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6201 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6203 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6204 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6205 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6206 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6208 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6209 bSv[0][0] = 0x0004000400040004ULL;
6210 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6211 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6213 #define FILTER(hv,dir,edge)\
6214 if(bSv[dir][edge]) {\
6215 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6217 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6218 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6224 } else if( IS_8x8DCT(mb_type) ) {
6243 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6244 MpegEncContext * const s = &h->s;
6245 const int mb_xy= mb_x + mb_y*s->mb_stride;
6246 const int mb_type = s->current_picture.mb_type[mb_xy];
6247 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6248 int first_vertical_edge_done = 0;
6251 //for sufficiently low qp, filtering wouldn't do anything
6252 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6254 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6255 int qp = s->current_picture.qscale_table[mb_xy];
6257 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6258 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6263 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6264 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6265 int top_type, left_type[2];
6266 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6267 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6268 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6270 if(IS_8x8DCT(top_type)){
6271 h->non_zero_count_cache[4+8*0]=
6272 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6273 h->non_zero_count_cache[6+8*0]=
6274 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6276 if(IS_8x8DCT(left_type[0])){
6277 h->non_zero_count_cache[3+8*1]=
6278 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6280 if(IS_8x8DCT(left_type[1])){
6281 h->non_zero_count_cache[3+8*3]=
6282 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6285 if(IS_8x8DCT(mb_type)){
6286 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6287 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6289 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6290 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6292 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6293 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6295 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6296 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6301 // left mb is in picture
6302 && h->slice_table[mb_xy-1] != 255
6303 // and current and left pair do not have the same interlaced type
6304 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6305 // and left mb is in the same slice if deblocking_filter == 2
6306 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6307 /* First vertical edge is different in MBAFF frames
6308 * There are 8 different bS to compute and 2 different Qp
6310 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6311 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6316 int mb_qp, mbn0_qp, mbn1_qp;
6318 first_vertical_edge_done = 1;
6320 if( IS_INTRA(mb_type) )
6321 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6323 for( i = 0; i < 8; i++ ) {
6324 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6326 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6328 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6329 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6330 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6337 mb_qp = s->current_picture.qscale_table[mb_xy];
6338 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6339 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6340 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6341 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6342 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6343 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6344 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6345 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6346 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6347 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6348 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6349 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6352 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6353 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6354 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6355 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6356 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6358 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6359 for( dir = 0; dir < 2; dir++ )
6362 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6363 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6364 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &15 ][0] + (MB_MBAFF ? 20 : 2);
6365 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&15 ][0] + (MB_MBAFF ? 20 : 2);
6366 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6368 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6369 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6370 // how often to recheck mv-based bS when iterating between edges
6371 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6372 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6373 // how often to recheck mv-based bS when iterating along each edge
6374 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6376 if (first_vertical_edge_done) {
6378 first_vertical_edge_done = 0;
6381 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6384 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6385 && !IS_INTERLACED(mb_type)
6386 && IS_INTERLACED(mbm_type)
6388 // This is a special case in the norm where the filtering must
6389 // be done twice (one each of the field) even if we are in a
6390 // frame macroblock.
6392 static const int nnz_idx[4] = {4,5,6,3};
6393 unsigned int tmp_linesize = 2 * linesize;
6394 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6395 int mbn_xy = mb_xy - 2 * s->mb_stride;
6400 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6401 if( IS_INTRA(mb_type) ||
6402 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6403 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6405 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6406 for( i = 0; i < 4; i++ ) {
6407 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6408 mbn_nnz[nnz_idx[i]] != 0 )
6414 // Do not use s->qscale as luma quantizer because it has not the same
6415 // value in IPCM macroblocks.
6416 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6417 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6418 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6419 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6420 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6421 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6422 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6423 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6430 for( edge = start; edge < edges; edge++ ) {
6431 /* mbn_xy: neighbor macroblock */
6432 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6433 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6434 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6438 if( (edge&1) && IS_8x8DCT(mb_type) )
6441 if( IS_INTRA(mb_type) ||
6442 IS_INTRA(mbn_type) ) {
6445 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6446 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6455 bS[0] = bS[1] = bS[2] = bS[3] = value;
6460 if( edge & mask_edge ) {
6461 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6464 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6465 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6468 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6469 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6470 int bn_idx= b_idx - (dir ? 8:1);
6473 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6474 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6475 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6476 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6479 if(h->slice_type_nos == FF_B_TYPE && v){
6481 for( l = 0; !v && l < 2; l++ ) {
6483 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6484 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6485 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6489 bS[0] = bS[1] = bS[2] = bS[3] = v;
6495 for( i = 0; i < 4; i++ ) {
6496 int x = dir == 0 ? edge : i;
6497 int y = dir == 0 ? i : edge;
6498 int b_idx= 8 + 4 + x + 8*y;
6499 int bn_idx= b_idx - (dir ? 8:1);
6501 if( h->non_zero_count_cache[b_idx] != 0 ||
6502 h->non_zero_count_cache[bn_idx] != 0 ) {
6508 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6509 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6510 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6511 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6517 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6519 for( l = 0; l < 2; l++ ) {
6521 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6522 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6523 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6532 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6537 // Do not use s->qscale as luma quantizer because it has not the same
6538 // value in IPCM macroblocks.
6539 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6540 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6541 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6542 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6544 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6545 if( (edge&1) == 0 ) {
6546 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6547 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6548 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6549 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6552 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6553 if( (edge&1) == 0 ) {
6554 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6555 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6556 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6557 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6564 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6565 MpegEncContext * const s = &h->s;
6566 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6570 if( h->pps.cabac ) {
6574 align_get_bits( &s->gb );
6577 ff_init_cabac_states( &h->cabac);
6578 ff_init_cabac_decoder( &h->cabac,
6579 s->gb.buffer + get_bits_count(&s->gb)/8,
6580 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6581 /* calculate pre-state */
6582 for( i= 0; i < 460; i++ ) {
6584 if( h->slice_type_nos == FF_I_TYPE )
6585 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6587 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6590 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6592 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6597 int ret = decode_mb_cabac(h);
6599 //STOP_TIMER("decode_mb_cabac")
6601 if(ret>=0) hl_decode_mb(h);
6603 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6606 if(ret>=0) ret = decode_mb_cabac(h);
6608 if(ret>=0) hl_decode_mb(h);
6611 eos = get_cabac_terminate( &h->cabac );
6613 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6614 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6615 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6619 if( ++s->mb_x >= s->mb_width ) {
6621 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6623 if(FIELD_OR_MBAFF_PICTURE) {
6628 if( eos || s->mb_y >= s->mb_height ) {
6629 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6630 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6637 int ret = decode_mb_cavlc(h);
6639 if(ret>=0) hl_decode_mb(h);
6641 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6643 ret = decode_mb_cavlc(h);
6645 if(ret>=0) hl_decode_mb(h);
6650 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6651 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6656 if(++s->mb_x >= s->mb_width){
6658 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6660 if(FIELD_OR_MBAFF_PICTURE) {
6663 if(s->mb_y >= s->mb_height){
6664 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6666 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6667 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6671 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6678 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6679 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6680 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6681 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6685 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6694 for(;s->mb_y < s->mb_height; s->mb_y++){
6695 for(;s->mb_x < s->mb_width; s->mb_x++){
6696 int ret= decode_mb(h);
6701 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6702 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6707 if(++s->mb_x >= s->mb_width){
6709 if(++s->mb_y >= s->mb_height){
6710 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6711 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6715 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6722 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6723 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6724 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6728 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6735 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6738 return -1; //not reached
6741 static int decode_unregistered_user_data(H264Context *h, int size){
6742 MpegEncContext * const s = &h->s;
6743 uint8_t user_data[16+256];
6749 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6750 user_data[i]= get_bits(&s->gb, 8);
6754 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6755 if(e==1 && build>=0)
6756 h->x264_build= build;
6758 if(s->avctx->debug & FF_DEBUG_BUGS)
6759 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6762 skip_bits(&s->gb, 8);
6767 static int decode_sei(H264Context *h){
6768 MpegEncContext * const s = &h->s;
6770 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6775 type+= show_bits(&s->gb, 8);
6776 }while(get_bits(&s->gb, 8) == 255);
6780 size+= show_bits(&s->gb, 8);
6781 }while(get_bits(&s->gb, 8) == 255);
6785 if(decode_unregistered_user_data(h, size) < 0)
6789 skip_bits(&s->gb, 8*size);
6792 //FIXME check bits here
6793 align_get_bits(&s->gb);
6799 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6800 MpegEncContext * const s = &h->s;
6802 cpb_count = get_ue_golomb(&s->gb) + 1;
6803 get_bits(&s->gb, 4); /* bit_rate_scale */
6804 get_bits(&s->gb, 4); /* cpb_size_scale */
6805 for(i=0; i<cpb_count; i++){
6806 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6807 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6808 get_bits1(&s->gb); /* cbr_flag */
6810 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6811 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6812 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6813 get_bits(&s->gb, 5); /* time_offset_length */
6816 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6817 MpegEncContext * const s = &h->s;
6818 int aspect_ratio_info_present_flag;
6819 unsigned int aspect_ratio_idc;
6820 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6822 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6824 if( aspect_ratio_info_present_flag ) {
6825 aspect_ratio_idc= get_bits(&s->gb, 8);
6826 if( aspect_ratio_idc == EXTENDED_SAR ) {
6827 sps->sar.num= get_bits(&s->gb, 16);
6828 sps->sar.den= get_bits(&s->gb, 16);
6829 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6830 sps->sar= pixel_aspect[aspect_ratio_idc];
6832 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6839 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6841 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6842 get_bits1(&s->gb); /* overscan_appropriate_flag */
6845 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6846 get_bits(&s->gb, 3); /* video_format */
6847 get_bits1(&s->gb); /* video_full_range_flag */
6848 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6849 get_bits(&s->gb, 8); /* colour_primaries */
6850 get_bits(&s->gb, 8); /* transfer_characteristics */
6851 get_bits(&s->gb, 8); /* matrix_coefficients */
6855 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6856 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6857 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6860 sps->timing_info_present_flag = get_bits1(&s->gb);
6861 if(sps->timing_info_present_flag){
6862 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6863 sps->time_scale = get_bits_long(&s->gb, 32);
6864 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6867 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6868 if(nal_hrd_parameters_present_flag)
6869 decode_hrd_parameters(h, sps);
6870 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6871 if(vcl_hrd_parameters_present_flag)
6872 decode_hrd_parameters(h, sps);
6873 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6874 get_bits1(&s->gb); /* low_delay_hrd_flag */
6875 get_bits1(&s->gb); /* pic_struct_present_flag */
6877 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6878 if(sps->bitstream_restriction_flag){
6879 unsigned int num_reorder_frames;
6880 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6881 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6882 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6883 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6884 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6885 num_reorder_frames= get_ue_golomb(&s->gb);
6886 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6888 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6889 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6893 sps->num_reorder_frames= num_reorder_frames;
6899 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6900 const uint8_t *jvt_list, const uint8_t *fallback_list){
6901 MpegEncContext * const s = &h->s;
6902 int i, last = 8, next = 8;
6903 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6904 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6905 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6907 for(i=0;i<size;i++){
6909 next = (last + get_se_golomb(&s->gb)) & 0xff;
6910 if(!i && !next){ /* matrix not written, we use the preset one */
6911 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6914 last = factors[scan[i]] = next ? next : last;
6918 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6919 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6920 MpegEncContext * const s = &h->s;
6921 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6922 const uint8_t *fallback[4] = {
6923 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6924 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6925 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6926 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6928 if(get_bits1(&s->gb)){
6929 sps->scaling_matrix_present |= is_sps;
6930 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6931 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6932 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6933 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6934 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6935 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6936 if(is_sps || pps->transform_8x8_mode){
6937 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6938 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6940 } else if(fallback_sps) {
6941 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
6942 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
6947 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
6950 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
6951 const size_t size, const char *name)
6954 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
6959 vec[id] = av_mallocz(size);
6961 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
6966 static inline int decode_seq_parameter_set(H264Context *h){
6967 MpegEncContext * const s = &h->s;
6968 int profile_idc, level_idc;
6969 unsigned int sps_id, tmp, mb_width, mb_height;
6973 profile_idc= get_bits(&s->gb, 8);
6974 get_bits1(&s->gb); //constraint_set0_flag
6975 get_bits1(&s->gb); //constraint_set1_flag
6976 get_bits1(&s->gb); //constraint_set2_flag
6977 get_bits1(&s->gb); //constraint_set3_flag
6978 get_bits(&s->gb, 4); // reserved
6979 level_idc= get_bits(&s->gb, 8);
6980 sps_id= get_ue_golomb(&s->gb);
6982 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
6986 sps->profile_idc= profile_idc;
6987 sps->level_idc= level_idc;
6989 if(sps->profile_idc >= 100){ //high profile
6990 sps->chroma_format_idc= get_ue_golomb(&s->gb);
6991 if(sps->chroma_format_idc == 3)
6992 get_bits1(&s->gb); //residual_color_transform_flag
6993 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6994 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6995 sps->transform_bypass = get_bits1(&s->gb);
6996 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6998 sps->scaling_matrix_present = 0;
6999 sps->chroma_format_idc= 1;
7002 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7003 sps->poc_type= get_ue_golomb(&s->gb);
7005 if(sps->poc_type == 0){ //FIXME #define
7006 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7007 } else if(sps->poc_type == 1){//FIXME #define
7008 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7009 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7010 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7011 tmp= get_ue_golomb(&s->gb);
7013 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7014 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7017 sps->poc_cycle_length= tmp;
7019 for(i=0; i<sps->poc_cycle_length; i++)
7020 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7021 }else if(sps->poc_type != 2){
7022 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7026 tmp= get_ue_golomb(&s->gb);
7027 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7028 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7031 sps->ref_frame_count= tmp;
7032 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7033 mb_width= get_ue_golomb(&s->gb) + 1;
7034 mb_height= get_ue_golomb(&s->gb) + 1;
7035 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7036 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7037 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7040 sps->mb_width = mb_width;
7041 sps->mb_height= mb_height;
7043 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7044 if(!sps->frame_mbs_only_flag)
7045 sps->mb_aff= get_bits1(&s->gb);
7049 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7051 #ifndef ALLOW_INTERLACE
7053 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7055 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7056 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7058 sps->crop= get_bits1(&s->gb);
7060 sps->crop_left = get_ue_golomb(&s->gb);
7061 sps->crop_right = get_ue_golomb(&s->gb);
7062 sps->crop_top = get_ue_golomb(&s->gb);
7063 sps->crop_bottom= get_ue_golomb(&s->gb);
7064 if(sps->crop_left || sps->crop_top){
7065 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7067 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7068 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7074 sps->crop_bottom= 0;
7077 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7078 if( sps->vui_parameters_present_flag )
7079 decode_vui_parameters(h, sps);
7081 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7082 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7083 sps_id, sps->profile_idc, sps->level_idc,
7085 sps->ref_frame_count,
7086 sps->mb_width, sps->mb_height,
7087 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7088 sps->direct_8x8_inference_flag ? "8B8" : "",
7089 sps->crop_left, sps->crop_right,
7090 sps->crop_top, sps->crop_bottom,
7091 sps->vui_parameters_present_flag ? "VUI" : "",
7092 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7099 build_qp_table(PPS *pps, int t, int index)
7102 for(i = 0; i < 52; i++)
7103 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7106 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7107 MpegEncContext * const s = &h->s;
7108 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7111 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7115 tmp= get_ue_golomb(&s->gb);
7116 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7117 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7122 pps->cabac= get_bits1(&s->gb);
7123 pps->pic_order_present= get_bits1(&s->gb);
7124 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7125 if(pps->slice_group_count > 1 ){
7126 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7127 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7128 switch(pps->mb_slice_group_map_type){
7131 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7132 | run_length[ i ] |1 |ue(v) |
7137 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7139 | top_left_mb[ i ] |1 |ue(v) |
7140 | bottom_right_mb[ i ] |1 |ue(v) |
7148 | slice_group_change_direction_flag |1 |u(1) |
7149 | slice_group_change_rate_minus1 |1 |ue(v) |
7154 | slice_group_id_cnt_minus1 |1 |ue(v) |
7155 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7157 | slice_group_id[ i ] |1 |u(v) |
7162 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7163 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7164 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7165 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7166 pps->ref_count[0]= pps->ref_count[1]= 1;
7170 pps->weighted_pred= get_bits1(&s->gb);
7171 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7172 pps->init_qp= get_se_golomb(&s->gb) + 26;
7173 pps->init_qs= get_se_golomb(&s->gb) + 26;
7174 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7175 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7176 pps->constrained_intra_pred= get_bits1(&s->gb);
7177 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7179 pps->transform_8x8_mode= 0;
7180 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7181 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7182 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7184 if(get_bits_count(&s->gb) < bit_length){
7185 pps->transform_8x8_mode= get_bits1(&s->gb);
7186 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7187 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7189 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7192 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7193 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7194 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7195 h->pps.chroma_qp_diff= 1;
7197 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7198 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7199 pps_id, pps->sps_id,
7200 pps->cabac ? "CABAC" : "CAVLC",
7201 pps->slice_group_count,
7202 pps->ref_count[0], pps->ref_count[1],
7203 pps->weighted_pred ? "weighted" : "",
7204 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7205 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7206 pps->constrained_intra_pred ? "CONSTR" : "",
7207 pps->redundant_pic_cnt_present ? "REDU" : "",
7208 pps->transform_8x8_mode ? "8x8DCT" : ""
7216 * Call decode_slice() for each context.
7218 * @param h h264 master context
7219 * @param context_count number of contexts to execute
7221 static void execute_decode_slices(H264Context *h, int context_count){
7222 MpegEncContext * const s = &h->s;
7223 AVCodecContext * const avctx= s->avctx;
7227 if(context_count == 1) {
7228 decode_slice(avctx, h);
7230 for(i = 1; i < context_count; i++) {
7231 hx = h->thread_context[i];
7232 hx->s.error_resilience = avctx->error_resilience;
7233 hx->s.error_count = 0;
7236 avctx->execute(avctx, (void *)decode_slice,
7237 (void **)h->thread_context, NULL, context_count);
7239 /* pull back stuff from slices to master context */
7240 hx = h->thread_context[context_count - 1];
7241 s->mb_x = hx->s.mb_x;
7242 s->mb_y = hx->s.mb_y;
7243 s->dropable = hx->s.dropable;
7244 s->picture_structure = hx->s.picture_structure;
7245 for(i = 1; i < context_count; i++)
7246 h->s.error_count += h->thread_context[i]->s.error_count;
7251 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7252 MpegEncContext * const s = &h->s;
7253 AVCodecContext * const avctx= s->avctx;
7255 H264Context *hx; ///< thread context
7256 int context_count = 0;
7258 h->max_contexts = avctx->thread_count;
7261 for(i=0; i<50; i++){
7262 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7265 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7266 h->current_slice = 0;
7267 if (!s->first_field)
7268 s->current_picture_ptr= NULL;
7280 if(buf_index >= buf_size) break;
7282 for(i = 0; i < h->nal_length_size; i++)
7283 nalsize = (nalsize << 8) | buf[buf_index++];
7284 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7289 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7294 // start code prefix search
7295 for(; buf_index + 3 < buf_size; buf_index++){
7296 // This should always succeed in the first iteration.
7297 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7301 if(buf_index+3 >= buf_size) break;
7306 hx = h->thread_context[context_count];
7308 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7309 if (ptr==NULL || dst_length < 0){
7312 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7314 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7316 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7317 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7320 if (h->is_avc && (nalsize != consumed)){
7321 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7325 buf_index += consumed;
7327 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7328 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7333 switch(hx->nal_unit_type){
7335 if (h->nal_unit_type != NAL_IDR_SLICE) {
7336 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7339 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7341 init_get_bits(&hx->s.gb, ptr, bit_length);
7343 hx->inter_gb_ptr= &hx->s.gb;
7344 hx->s.data_partitioning = 0;
7346 if((err = decode_slice_header(hx, h)))
7349 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7350 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7351 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7352 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7353 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7354 && avctx->skip_frame < AVDISCARD_ALL)
7358 init_get_bits(&hx->s.gb, ptr, bit_length);
7360 hx->inter_gb_ptr= NULL;
7361 hx->s.data_partitioning = 1;
7363 err = decode_slice_header(hx, h);
7366 init_get_bits(&hx->intra_gb, ptr, bit_length);
7367 hx->intra_gb_ptr= &hx->intra_gb;
7370 init_get_bits(&hx->inter_gb, ptr, bit_length);
7371 hx->inter_gb_ptr= &hx->inter_gb;
7373 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7374 && s->context_initialized
7376 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7377 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7378 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7379 && avctx->skip_frame < AVDISCARD_ALL)
7383 init_get_bits(&s->gb, ptr, bit_length);
7387 init_get_bits(&s->gb, ptr, bit_length);
7388 decode_seq_parameter_set(h);
7390 if(s->flags& CODEC_FLAG_LOW_DELAY)
7393 if(avctx->has_b_frames < 2)
7394 avctx->has_b_frames= !s->low_delay;
7397 init_get_bits(&s->gb, ptr, bit_length);
7399 decode_picture_parameter_set(h, bit_length);
7403 case NAL_END_SEQUENCE:
7404 case NAL_END_STREAM:
7405 case NAL_FILLER_DATA:
7407 case NAL_AUXILIARY_SLICE:
7410 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7413 if(context_count == h->max_contexts) {
7414 execute_decode_slices(h, context_count);
7419 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7421 /* Slice could not be decoded in parallel mode, copy down
7422 * NAL unit stuff to context 0 and restart. Note that
7423 * rbsp_buffer is not transferred, but since we no longer
7424 * run in parallel mode this should not be an issue. */
7425 h->nal_unit_type = hx->nal_unit_type;
7426 h->nal_ref_idc = hx->nal_ref_idc;
7432 execute_decode_slices(h, context_count);
7437 * returns the number of bytes consumed for building the current frame
7439 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7440 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7441 if(pos+10>buf_size) pos=buf_size; // oops ;)
7446 static int decode_frame(AVCodecContext *avctx,
7447 void *data, int *data_size,
7448 const uint8_t *buf, int buf_size)
7450 H264Context *h = avctx->priv_data;
7451 MpegEncContext *s = &h->s;
7452 AVFrame *pict = data;
7455 s->flags= avctx->flags;
7456 s->flags2= avctx->flags2;
7458 /* end of stream, output what is still in the buffers */
7459 if (buf_size == 0) {
7463 //FIXME factorize this with the output code below
7464 out = h->delayed_pic[0];
7466 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7467 if(h->delayed_pic[i]->poc < out->poc){
7468 out = h->delayed_pic[i];
7472 for(i=out_idx; h->delayed_pic[i]; i++)
7473 h->delayed_pic[i] = h->delayed_pic[i+1];
7476 *data_size = sizeof(AVFrame);
7477 *pict= *(AVFrame*)out;
7483 if(h->is_avc && !h->got_avcC) {
7484 int i, cnt, nalsize;
7485 unsigned char *p = avctx->extradata;
7486 if(avctx->extradata_size < 7) {
7487 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7491 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7494 /* sps and pps in the avcC always have length coded with 2 bytes,
7495 so put a fake nal_length_size = 2 while parsing them */
7496 h->nal_length_size = 2;
7497 // Decode sps from avcC
7498 cnt = *(p+5) & 0x1f; // Number of sps
7500 for (i = 0; i < cnt; i++) {
7501 nalsize = AV_RB16(p) + 2;
7502 if(decode_nal_units(h, p, nalsize) < 0) {
7503 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7508 // Decode pps from avcC
7509 cnt = *(p++); // Number of pps
7510 for (i = 0; i < cnt; i++) {
7511 nalsize = AV_RB16(p) + 2;
7512 if(decode_nal_units(h, p, nalsize) != nalsize) {
7513 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7518 // Now store right nal length size, that will be use to parse all other nals
7519 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7520 // Do not reparse avcC
7524 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7525 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7529 buf_index=decode_nal_units(h, buf, buf_size);
7533 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7534 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7535 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7539 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7540 Picture *out = s->current_picture_ptr;
7541 Picture *cur = s->current_picture_ptr;
7542 int i, pics, cross_idr, out_of_order, out_idx;
7546 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7547 s->current_picture_ptr->pict_type= s->pict_type;
7550 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7551 h->prev_poc_msb= h->poc_msb;
7552 h->prev_poc_lsb= h->poc_lsb;
7554 h->prev_frame_num_offset= h->frame_num_offset;
7555 h->prev_frame_num= h->frame_num;
7558 * FIXME: Error handling code does not seem to support interlaced
7559 * when slices span multiple rows
7560 * The ff_er_add_slice calls don't work right for bottom
7561 * fields; they cause massive erroneous error concealing
7562 * Error marking covers both fields (top and bottom).
7563 * This causes a mismatched s->error_count
7564 * and a bad error table. Further, the error count goes to
7565 * INT_MAX when called for bottom field, because mb_y is
7566 * past end by one (callers fault) and resync_mb_y != 0
7567 * causes problems for the first MB line, too.
7574 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7575 /* Wait for second field. */
7579 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7580 /* Derive top_field_first from field pocs. */
7581 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7583 //FIXME do something with unavailable reference frames
7585 /* Sort B-frames into display order */
7587 if(h->sps.bitstream_restriction_flag
7588 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7589 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7593 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7594 && !h->sps.bitstream_restriction_flag){
7595 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7600 while(h->delayed_pic[pics]) pics++;
7602 assert(pics <= MAX_DELAYED_PIC_COUNT);
7604 h->delayed_pic[pics++] = cur;
7605 if(cur->reference == 0)
7606 cur->reference = DELAYED_PIC_REF;
7608 out = h->delayed_pic[0];
7610 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7611 if(h->delayed_pic[i]->poc < out->poc){
7612 out = h->delayed_pic[i];
7615 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7617 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7619 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7621 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7623 ((!cross_idr && out->poc > h->outputed_poc + 2)
7624 || cur->pict_type == FF_B_TYPE)))
7627 s->avctx->has_b_frames++;
7630 if(out_of_order || pics > s->avctx->has_b_frames){
7631 out->reference &= ~DELAYED_PIC_REF;
7632 for(i=out_idx; h->delayed_pic[i]; i++)
7633 h->delayed_pic[i] = h->delayed_pic[i+1];
7635 if(!out_of_order && pics > s->avctx->has_b_frames){
7636 *data_size = sizeof(AVFrame);
7638 h->outputed_poc = out->poc;
7639 *pict= *(AVFrame*)out;
7641 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7646 assert(pict->data[0] || !*data_size);
7647 ff_print_debug_info(s, pict);
7648 //printf("out %d\n", (int)pict->data[0]);
7651 /* Return the Picture timestamp as the frame number */
7652 /* we subtract 1 because it is added on utils.c */
7653 avctx->frame_number = s->picture_number - 1;
7655 return get_consumed_bytes(s, buf_index, buf_size);
7658 static inline void fill_mb_avail(H264Context *h){
7659 MpegEncContext * const s = &h->s;
7660 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7663 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7664 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7665 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7671 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7672 h->mb_avail[4]= 1; //FIXME move out
7673 h->mb_avail[5]= 0; //FIXME move out
7681 #define SIZE (COUNT*40)
7687 // int int_temp[10000];
7689 AVCodecContext avctx;
7691 dsputil_init(&dsp, &avctx);
7693 init_put_bits(&pb, temp, SIZE);
7694 printf("testing unsigned exp golomb\n");
7695 for(i=0; i<COUNT; i++){
7697 set_ue_golomb(&pb, i);
7698 STOP_TIMER("set_ue_golomb");
7700 flush_put_bits(&pb);
7702 init_get_bits(&gb, temp, 8*SIZE);
7703 for(i=0; i<COUNT; i++){
7706 s= show_bits(&gb, 24);
7709 j= get_ue_golomb(&gb);
7711 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7714 STOP_TIMER("get_ue_golomb");
7718 init_put_bits(&pb, temp, SIZE);
7719 printf("testing signed exp golomb\n");
7720 for(i=0; i<COUNT; i++){
7722 set_se_golomb(&pb, i - COUNT/2);
7723 STOP_TIMER("set_se_golomb");
7725 flush_put_bits(&pb);
7727 init_get_bits(&gb, temp, 8*SIZE);
7728 for(i=0; i<COUNT; i++){
7731 s= show_bits(&gb, 24);
7734 j= get_se_golomb(&gb);
7735 if(j != i - COUNT/2){
7736 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7739 STOP_TIMER("get_se_golomb");
7743 printf("testing 4x4 (I)DCT\n");
7746 uint8_t src[16], ref[16];
7747 uint64_t error= 0, max_error=0;
7749 for(i=0; i<COUNT; i++){
7751 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7752 for(j=0; j<16; j++){
7753 ref[j]= random()%255;
7754 src[j]= random()%255;
7757 h264_diff_dct_c(block, src, ref, 4);
7760 for(j=0; j<16; j++){
7761 // printf("%d ", block[j]);
7762 block[j]= block[j]*4;
7763 if(j&1) block[j]= (block[j]*4 + 2)/5;
7764 if(j&4) block[j]= (block[j]*4 + 2)/5;
7768 s->dsp.h264_idct_add(ref, block, 4);
7769 /* for(j=0; j<16; j++){
7770 printf("%d ", ref[j]);
7774 for(j=0; j<16; j++){
7775 int diff= FFABS(src[j] - ref[j]);
7778 max_error= FFMAX(max_error, diff);
7781 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7782 printf("testing quantizer\n");
7783 for(qp=0; qp<52; qp++){
7785 src1_block[i]= src2_block[i]= random()%255;
7788 printf("Testing NAL layer\n");
7790 uint8_t bitstream[COUNT];
7791 uint8_t nal[COUNT*2];
7793 memset(&h, 0, sizeof(H264Context));
7795 for(i=0; i<COUNT; i++){
7803 for(j=0; j<COUNT; j++){
7804 bitstream[j]= (random() % 255) + 1;
7807 for(j=0; j<zeros; j++){
7808 int pos= random() % COUNT;
7809 while(bitstream[pos] == 0){
7818 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7820 printf("encoding failed\n");
7824 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7828 if(out_length != COUNT){
7829 printf("incorrect length %d %d\n", out_length, COUNT);
7833 if(consumed != nal_length){
7834 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7838 if(memcmp(bitstream, out, COUNT)){
7839 printf("mismatch\n");
7845 printf("Testing RBSP\n");
7853 static av_cold int decode_end(AVCodecContext *avctx)
7855 H264Context *h = avctx->priv_data;
7856 MpegEncContext *s = &h->s;
7858 av_freep(&h->rbsp_buffer[0]);
7859 av_freep(&h->rbsp_buffer[1]);
7860 free_tables(h); //FIXME cleanup init stuff perhaps
7863 // memset(h, 0, sizeof(H264Context));
7869 AVCodec h264_decoder = {
7873 sizeof(H264Context),
7878 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7880 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),