2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
64 static Picture * remove_long(H264Context *h, int i, int ref_mask);
66 static av_always_inline uint32_t pack16to32(int a, int b){
67 #ifdef WORDS_BIGENDIAN
68 return (b&0xFFFF) + (a<<16);
70 return (a&0xFFFF) + (b<<16);
74 const uint8_t ff_rem6[52]={
75 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
78 const uint8_t ff_div6[52]={
79 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
83 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
84 MpegEncContext * const s = &h->s;
85 const int mb_xy= h->mb_xy;
86 int topleft_xy, top_xy, topright_xy, left_xy[2];
87 int topleft_type, top_type, topright_type, left_type[2];
89 int topleft_partition= -1;
92 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
94 //FIXME deblocking could skip the intra and nnz parts.
95 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
98 /* Wow, what a mess, why didn't they simplify the interlacing & intra
99 * stuff, I can't imagine that these complex rules are worth it. */
101 topleft_xy = top_xy - 1;
102 topright_xy= top_xy + 1;
103 left_xy[1] = left_xy[0] = mb_xy-1;
113 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
114 const int top_pair_xy = pair_xy - s->mb_stride;
115 const int topleft_pair_xy = top_pair_xy - 1;
116 const int topright_pair_xy = top_pair_xy + 1;
117 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
118 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
119 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
120 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
121 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
122 const int bottom = (s->mb_y & 1);
123 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
125 ? !curr_mb_frame_flag // bottom macroblock
126 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
128 top_xy -= s->mb_stride;
131 ? !curr_mb_frame_flag // bottom macroblock
132 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
134 topleft_xy -= s->mb_stride;
135 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
136 topleft_xy += s->mb_stride;
137 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
138 topleft_partition = 0;
141 ? !curr_mb_frame_flag // bottom macroblock
142 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
144 topright_xy -= s->mb_stride;
146 if (left_mb_frame_flag != curr_mb_frame_flag) {
147 left_xy[1] = left_xy[0] = pair_xy - 1;
148 if (curr_mb_frame_flag) {
169 left_xy[1] += s->mb_stride;
182 h->top_mb_xy = top_xy;
183 h->left_mb_xy[0] = left_xy[0];
184 h->left_mb_xy[1] = left_xy[1];
188 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
189 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
190 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
192 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
194 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
196 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
197 for(list=0; list<h->list_count; list++){
198 if(USES_LIST(mb_type,list)){
199 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
200 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
201 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
202 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
208 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
209 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
211 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
212 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
214 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
215 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
220 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
221 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
222 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
223 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
224 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
227 if(IS_INTRA(mb_type)){
228 h->topleft_samples_available=
229 h->top_samples_available=
230 h->left_samples_available= 0xFFFF;
231 h->topright_samples_available= 0xEEEA;
233 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
234 h->topleft_samples_available= 0xB3FF;
235 h->top_samples_available= 0x33FF;
236 h->topright_samples_available= 0x26EA;
239 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
240 h->topleft_samples_available&= 0xDF5F;
241 h->left_samples_available&= 0x5F5F;
245 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
246 h->topleft_samples_available&= 0x7FFF;
248 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
249 h->topright_samples_available&= 0xFBFF;
251 if(IS_INTRA4x4(mb_type)){
252 if(IS_INTRA4x4(top_type)){
253 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
254 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
255 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
256 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
259 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
264 h->intra4x4_pred_mode_cache[4+8*0]=
265 h->intra4x4_pred_mode_cache[5+8*0]=
266 h->intra4x4_pred_mode_cache[6+8*0]=
267 h->intra4x4_pred_mode_cache[7+8*0]= pred;
270 if(IS_INTRA4x4(left_type[i])){
271 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
272 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
275 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
280 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
281 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
296 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
298 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
299 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
300 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
301 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
303 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
304 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
306 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
307 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
310 h->non_zero_count_cache[4+8*0]=
311 h->non_zero_count_cache[5+8*0]=
312 h->non_zero_count_cache[6+8*0]=
313 h->non_zero_count_cache[7+8*0]=
315 h->non_zero_count_cache[1+8*0]=
316 h->non_zero_count_cache[2+8*0]=
318 h->non_zero_count_cache[1+8*3]=
319 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
323 for (i=0; i<2; i++) {
325 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
326 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
327 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
330 h->non_zero_count_cache[3+8*1 + 2*8*i]=
331 h->non_zero_count_cache[3+8*2 + 2*8*i]=
332 h->non_zero_count_cache[0+8*1 + 8*i]=
333 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
340 h->top_cbp = h->cbp_table[top_xy];
341 } else if(IS_INTRA(mb_type)) {
348 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
349 } else if(IS_INTRA(mb_type)) {
355 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
358 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
363 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
365 for(list=0; list<h->list_count; list++){
366 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
367 /*if(!h->mv_cache_clean[list]){
368 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
369 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
370 h->mv_cache_clean[list]= 1;
374 h->mv_cache_clean[list]= 0;
376 if(USES_LIST(top_type, list)){
377 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
378 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
383 h->ref_cache[list][scan8[0] + 0 - 1*8]=
384 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
385 h->ref_cache[list][scan8[0] + 2 - 1*8]=
386 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
392 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
396 int cache_idx = scan8[0] - 1 + i*2*8;
397 if(USES_LIST(left_type[i], list)){
398 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
399 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
400 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
401 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
402 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
403 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
405 *(uint32_t*)h->mv_cache [list][cache_idx ]=
406 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
407 h->ref_cache[list][cache_idx ]=
408 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
412 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
415 if(USES_LIST(topleft_type, list)){
416 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
417 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
418 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
419 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
421 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
422 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
425 if(USES_LIST(topright_type, list)){
426 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
427 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
428 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
429 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
431 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
432 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
435 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
438 h->ref_cache[list][scan8[5 ]+1] =
439 h->ref_cache[list][scan8[7 ]+1] =
440 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
441 h->ref_cache[list][scan8[4 ]] =
442 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
443 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
445 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
446 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
447 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
450 /* XXX beurk, Load mvd */
451 if(USES_LIST(top_type, list)){
452 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
463 if(USES_LIST(left_type[0], list)){
464 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
466 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
469 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
471 if(USES_LIST(left_type[1], list)){
472 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
474 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
477 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
479 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
481 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
482 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
483 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
485 if(h->slice_type_nos == FF_B_TYPE){
486 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
488 if(IS_DIRECT(top_type)){
489 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
490 }else if(IS_8X8(top_type)){
491 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
492 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
493 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
495 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
498 if(IS_DIRECT(left_type[0]))
499 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
500 else if(IS_8X8(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
503 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
505 if(IS_DIRECT(left_type[1]))
506 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
507 else if(IS_8X8(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
510 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
516 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
517 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
522 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
524 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
525 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
527 #define MAP_F2F(idx, mb_type)\
528 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
529 h->ref_cache[list][idx] <<= 1;\
530 h->mv_cache[list][idx][1] /= 2;\
531 h->mvd_cache[list][idx][1] /= 2;\
536 #define MAP_F2F(idx, mb_type)\
537 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
538 h->ref_cache[list][idx] >>= 1;\
539 h->mv_cache[list][idx][1] <<= 1;\
540 h->mvd_cache[list][idx][1] <<= 1;\
550 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
553 static inline void write_back_intra_pred_mode(H264Context *h){
554 const int mb_xy= h->mb_xy;
556 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
557 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
558 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
559 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
560 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
561 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
562 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
566 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
568 static inline int check_intra4x4_pred_mode(H264Context *h){
569 MpegEncContext * const s = &h->s;
570 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
571 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 if(!(h->top_samples_available&0x8000)){
576 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
578 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
581 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
586 if(!(h->left_samples_available&0x8000)){
588 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
590 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
593 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
599 } //FIXME cleanup like next
602 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
604 static inline int check_intra_pred_mode(H264Context *h, int mode){
605 MpegEncContext * const s = &h->s;
606 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
607 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
610 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
614 if(!(h->top_samples_available&0x8000)){
617 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
622 if(!(h->left_samples_available&0x8000)){
625 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 * gets the predicted intra4x4 prediction mode.
636 static inline int pred_intra_mode(H264Context *h, int n){
637 const int index8= scan8[n];
638 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
639 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
640 const int min= FFMIN(left, top);
642 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
644 if(min<0) return DC_PRED;
648 static inline void write_back_non_zero_count(H264Context *h){
649 const int mb_xy= h->mb_xy;
651 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
652 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
653 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
654 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
655 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
656 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
657 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
659 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
660 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
661 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
663 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
664 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
665 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
668 // store all luma nnzs, for deblocking
671 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
672 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static inline void direct_dist_scale_factor(H264Context * const h){
898 const int poc = h->s.current_picture_ptr->poc;
899 const int poc1 = h->ref_list[1][0].poc;
901 for(i=0; i<h->ref_count[0]; i++){
902 int poc0 = h->ref_list[0][i].poc;
903 int td = av_clip(poc1 - poc0, -128, 127);
904 if(td == 0 /* FIXME || pic0 is a long-term ref */){
905 h->dist_scale_factor[i] = 256;
907 int tb = av_clip(poc - poc0, -128, 127);
908 int tx = (16384 + (FFABS(td) >> 1)) / td;
909 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
913 for(i=0; i<h->ref_count[0]; i++){
914 h->dist_scale_factor_field[2*i] =
915 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
919 static inline void direct_ref_list_init(H264Context * const h){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 Picture * const cur = s->current_picture_ptr;
924 if(cur->pict_type == FF_I_TYPE)
925 cur->ref_count[0] = 0;
926 if(cur->pict_type != FF_B_TYPE)
927 cur->ref_count[1] = 0;
928 for(list=0; list<2; list++){
929 cur->ref_count[list] = h->ref_count[list];
930 for(j=0; j<h->ref_count[list]; j++)
931 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
933 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
935 for(list=0; list<2; list++){
936 for(i=0; i<ref1->ref_count[list]; i++){
937 const int poc = ref1->ref_poc[list][i];
938 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
939 for(j=0; j<h->ref_count[list]; j++)
940 if(h->ref_list[list][j].poc == poc){
941 h->map_col_to_list0[list][i] = j;
947 for(list=0; list<2; list++){
948 for(i=0; i<ref1->ref_count[list]; i++){
949 j = h->map_col_to_list0[list][i];
950 h->map_col_to_list0_field[list][2*i] = 2*j;
951 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
957 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
958 MpegEncContext * const s = &h->s;
959 const int mb_xy = h->mb_xy;
960 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
961 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
962 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
963 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
964 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
965 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
966 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
967 const int is_b8x8 = IS_8X8(*mb_type);
968 unsigned int sub_mb_type;
971 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
972 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
973 /* FIXME save sub mb types from previous frames (or derive from MVs)
974 * so we know exactly what block size to use */
975 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
976 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
977 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
978 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
979 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
981 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
982 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
985 *mb_type |= MB_TYPE_DIRECT2;
987 *mb_type |= MB_TYPE_INTERLACED;
989 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
991 if(h->direct_spatial_mv_pred){
996 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
998 /* ref = min(neighbors) */
999 for(list=0; list<2; list++){
1000 int refa = h->ref_cache[list][scan8[0] - 1];
1001 int refb = h->ref_cache[list][scan8[0] - 8];
1002 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1004 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1005 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1010 if(ref[0] < 0 && ref[1] < 0){
1011 ref[0] = ref[1] = 0;
1012 mv[0][0] = mv[0][1] =
1013 mv[1][0] = mv[1][1] = 0;
1015 for(list=0; list<2; list++){
1017 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1019 mv[list][0] = mv[list][1] = 0;
1025 *mb_type &= ~MB_TYPE_L1;
1026 sub_mb_type &= ~MB_TYPE_L1;
1027 }else if(ref[0] < 0){
1029 *mb_type &= ~MB_TYPE_L0;
1030 sub_mb_type &= ~MB_TYPE_L0;
1033 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1034 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1035 int mb_types_col[2];
1036 int b8_stride = h->b8_stride;
1037 int b4_stride = h->b_stride;
1039 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1041 if(IS_INTERLACED(*mb_type)){
1042 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1043 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1045 l1ref0 -= 2*b8_stride;
1046 l1ref1 -= 2*b8_stride;
1047 l1mv0 -= 4*b4_stride;
1048 l1mv1 -= 4*b4_stride;
1053 int cur_poc = s->current_picture_ptr->poc;
1054 int *col_poc = h->ref_list[1]->field_poc;
1055 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1056 int dy = 2*col_parity - (s->mb_y&1);
1058 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1059 l1ref0 += dy*b8_stride;
1060 l1ref1 += dy*b8_stride;
1061 l1mv0 += 2*dy*b4_stride;
1062 l1mv1 += 2*dy*b4_stride;
1066 for(i8=0; i8<4; i8++){
1069 int xy8 = x8+y8*b8_stride;
1070 int xy4 = 3*x8+y8*b4_stride;
1073 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1075 h->sub_mb_type[i8] = sub_mb_type;
1077 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1078 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1079 if(!IS_INTRA(mb_types_col[y8])
1080 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1081 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1083 a= pack16to32(mv[0][0],mv[0][1]);
1085 b= pack16to32(mv[1][0],mv[1][1]);
1087 a= pack16to32(mv[0][0],mv[0][1]);
1088 b= pack16to32(mv[1][0],mv[1][1]);
1090 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1091 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1093 }else if(IS_16X16(*mb_type)){
1096 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1097 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1098 if(!IS_INTRA(mb_type_col)
1099 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1100 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1101 && (h->x264_build>33 || !h->x264_build)))){
1103 a= pack16to32(mv[0][0],mv[0][1]);
1105 b= pack16to32(mv[1][0],mv[1][1]);
1107 a= pack16to32(mv[0][0],mv[0][1]);
1108 b= pack16to32(mv[1][0],mv[1][1]);
1110 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1111 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1113 for(i8=0; i8<4; i8++){
1114 const int x8 = i8&1;
1115 const int y8 = i8>>1;
1117 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1119 h->sub_mb_type[i8] = sub_mb_type;
1121 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1122 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1123 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1124 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1127 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1128 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1129 && (h->x264_build>33 || !h->x264_build)))){
1130 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1131 if(IS_SUB_8X8(sub_mb_type)){
1132 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1133 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1135 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1137 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1140 for(i4=0; i4<4; i4++){
1141 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1142 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1144 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1146 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1152 }else{ /* direct temporal mv pred */
1153 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1154 const int *dist_scale_factor = h->dist_scale_factor;
1157 if(IS_INTERLACED(*mb_type)){
1158 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1159 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1160 dist_scale_factor = h->dist_scale_factor_field;
1162 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1163 /* FIXME assumes direct_8x8_inference == 1 */
1164 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1165 int mb_types_col[2];
1168 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1169 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1170 | (*mb_type & MB_TYPE_INTERLACED);
1171 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1173 if(IS_INTERLACED(*mb_type)){
1174 /* frame to field scaling */
1175 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1176 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1178 l1ref0 -= 2*h->b8_stride;
1179 l1ref1 -= 2*h->b8_stride;
1180 l1mv0 -= 4*h->b_stride;
1181 l1mv1 -= 4*h->b_stride;
1185 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1186 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1188 *mb_type |= MB_TYPE_16x8;
1190 *mb_type |= MB_TYPE_8x8;
1192 /* field to frame scaling */
1193 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1194 * but in MBAFF, top and bottom POC are equal */
1195 int dy = (s->mb_y&1) ? 1 : 2;
1197 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1198 l1ref0 += dy*h->b8_stride;
1199 l1ref1 += dy*h->b8_stride;
1200 l1mv0 += 2*dy*h->b_stride;
1201 l1mv1 += 2*dy*h->b_stride;
1204 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1206 *mb_type |= MB_TYPE_16x16;
1208 *mb_type |= MB_TYPE_8x8;
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_types_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1231 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1252 /* one-to-one mv scaling */
1254 if(IS_16X16(*mb_type)){
1257 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1258 if(IS_INTRA(mb_type_col)){
1261 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1262 : map_col_to_list0[1][l1ref1[0]];
1263 const int scale = dist_scale_factor[ref0];
1264 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1266 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1267 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1269 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1270 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1272 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1273 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1274 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1276 for(i8=0; i8<4; i8++){
1277 const int x8 = i8&1;
1278 const int y8 = i8>>1;
1280 const int16_t (*l1mv)[2]= l1mv0;
1282 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1284 h->sub_mb_type[i8] = sub_mb_type;
1285 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1286 if(IS_INTRA(mb_type_col)){
1287 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1288 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1289 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1293 ref0 = l1ref0[x8 + y8*h->b8_stride];
1295 ref0 = map_col_to_list0[0][ref0];
1297 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1300 scale = dist_scale_factor[ref0];
1302 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1303 if(IS_SUB_8X8(sub_mb_type)){
1304 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1305 int mx = (scale * mv_col[0] + 128) >> 8;
1306 int my = (scale * mv_col[1] + 128) >> 8;
1307 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1308 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1310 for(i4=0; i4<4; i4++){
1311 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1312 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1313 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1314 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1315 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1316 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1323 static inline void write_back_motion(H264Context *h, int mb_type){
1324 MpegEncContext * const s = &h->s;
1325 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1326 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1329 if(!USES_LIST(mb_type, 0))
1330 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1332 for(list=0; list<h->list_count; list++){
1334 if(!USES_LIST(mb_type, list))
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1341 if( h->pps.cabac ) {
1342 if(IS_SKIP(mb_type))
1343 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1346 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1347 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1352 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1353 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1354 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1355 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1356 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1360 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1361 if(IS_8X8(mb_type)){
1362 uint8_t *direct_table = &h->direct_table[b8_xy];
1363 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1364 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1365 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1371 * Decodes a network abstraction layer unit.
1372 * @param consumed is the number of bytes used as input
1373 * @param length is the length of the array
1374 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1375 * @returns decoded bytes, might be src+1 if no escapes
1377 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1382 // src[0]&0x80; //forbidden bit
1383 h->nal_ref_idc= src[0]>>5;
1384 h->nal_unit_type= src[0]&0x1F;
1388 for(i=0; i<length; i++)
1389 printf("%2X ", src[i]);
1391 for(i=0; i+1<length; i+=2){
1392 if(src[i]) continue;
1393 if(i>0 && src[i-1]==0) i--;
1394 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1396 /* startcode, so we must be past the end */
1403 if(i>=length-1){ //no escaped 0
1404 *dst_length= length;
1405 *consumed= length+1; //+1 for the header
1409 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1410 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1411 dst= h->rbsp_buffer[bufidx];
1417 //printf("decoding esc\n");
1420 //remove escapes (very rare 1:2^22)
1421 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1422 if(src[si+2]==3){ //escape
1427 }else //next start code
1431 dst[di++]= src[si++];
1435 *consumed= si + 1;//+1 for the header
1436 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1441 * identifies the exact end of the bitstream
1442 * @return the length of the trailing, or 0 if damaged
1444 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1448 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1458 * IDCT transforms the 16 dc values and dequantizes them.
1459 * @param qp quantization parameter
1461 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1464 int temp[16]; //FIXME check if this is a good idea
1465 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1466 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1468 //memset(block, 64, 2*256);
1471 const int offset= y_offset[i];
1472 const int z0= block[offset+stride*0] + block[offset+stride*4];
1473 const int z1= block[offset+stride*0] - block[offset+stride*4];
1474 const int z2= block[offset+stride*1] - block[offset+stride*5];
1475 const int z3= block[offset+stride*1] + block[offset+stride*5];
1484 const int offset= x_offset[i];
1485 const int z0= temp[4*0+i] + temp[4*2+i];
1486 const int z1= temp[4*0+i] - temp[4*2+i];
1487 const int z2= temp[4*1+i] - temp[4*3+i];
1488 const int z3= temp[4*1+i] + temp[4*3+i];
1490 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1491 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1492 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1493 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1499 * DCT transforms the 16 dc values.
1500 * @param qp quantization parameter ??? FIXME
1502 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1503 // const int qmul= dequant_coeff[qp][0];
1505 int temp[16]; //FIXME check if this is a good idea
1506 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1507 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1510 const int offset= y_offset[i];
1511 const int z0= block[offset+stride*0] + block[offset+stride*4];
1512 const int z1= block[offset+stride*0] - block[offset+stride*4];
1513 const int z2= block[offset+stride*1] - block[offset+stride*5];
1514 const int z3= block[offset+stride*1] + block[offset+stride*5];
1523 const int offset= x_offset[i];
1524 const int z0= temp[4*0+i] + temp[4*2+i];
1525 const int z1= temp[4*0+i] - temp[4*2+i];
1526 const int z2= temp[4*1+i] - temp[4*3+i];
1527 const int z3= temp[4*1+i] + temp[4*3+i];
1529 block[stride*0 +offset]= (z0 + z3)>>1;
1530 block[stride*2 +offset]= (z1 + z2)>>1;
1531 block[stride*8 +offset]= (z1 - z2)>>1;
1532 block[stride*10+offset]= (z0 - z3)>>1;
1540 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1541 const int stride= 16*2;
1542 const int xStride= 16;
1545 a= block[stride*0 + xStride*0];
1546 b= block[stride*0 + xStride*1];
1547 c= block[stride*1 + xStride*0];
1548 d= block[stride*1 + xStride*1];
1555 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1556 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1557 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1558 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1562 static void chroma_dc_dct_c(DCTELEM *block){
1563 const int stride= 16*2;
1564 const int xStride= 16;
1567 a= block[stride*0 + xStride*0];
1568 b= block[stride*0 + xStride*1];
1569 c= block[stride*1 + xStride*0];
1570 d= block[stride*1 + xStride*1];
1577 block[stride*0 + xStride*0]= (a+c);
1578 block[stride*0 + xStride*1]= (e+b);
1579 block[stride*1 + xStride*0]= (a-c);
1580 block[stride*1 + xStride*1]= (e-b);
1585 * gets the chroma qp.
1587 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1588 return h->pps.chroma_qp_table[t][qscale];
1591 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1592 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1593 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1595 const int * const quant_table= quant_coeff[qscale];
1596 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1597 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1598 const unsigned int threshold2= (threshold1<<1);
1604 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1605 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1606 const unsigned int dc_threshold2= (dc_threshold1<<1);
1608 int level= block[0]*quant_coeff[qscale+18][0];
1609 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1611 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1614 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1617 // last_non_zero = i;
1622 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1623 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1624 const unsigned int dc_threshold2= (dc_threshold1<<1);
1626 int level= block[0]*quant_table[0];
1627 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1629 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1632 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1635 // last_non_zero = i;
1648 const int j= scantable[i];
1649 int level= block[j]*quant_table[j];
1651 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1652 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1653 if(((unsigned)(level+threshold1))>threshold2){
1655 level= (bias + level)>>QUANT_SHIFT;
1658 level= (bias - level)>>QUANT_SHIFT;
1667 return last_non_zero;
1670 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1671 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1672 int src_x_offset, int src_y_offset,
1673 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1674 MpegEncContext * const s = &h->s;
1675 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1676 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1677 const int luma_xy= (mx&3) + ((my&3)<<2);
1678 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1679 uint8_t * src_cb, * src_cr;
1680 int extra_width= h->emu_edge_width;
1681 int extra_height= h->emu_edge_height;
1683 const int full_mx= mx>>2;
1684 const int full_my= my>>2;
1685 const int pic_width = 16*s->mb_width;
1686 const int pic_height = 16*s->mb_height >> MB_FIELD;
1688 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1691 if(mx&7) extra_width -= 3;
1692 if(my&7) extra_height -= 3;
1694 if( full_mx < 0-extra_width
1695 || full_my < 0-extra_height
1696 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1697 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1698 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1699 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1703 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1705 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1708 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1711 // chroma offset when predicting from a field of opposite parity
1712 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1713 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1715 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1716 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1719 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1720 src_cb= s->edge_emu_buffer;
1722 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1725 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1726 src_cr= s->edge_emu_buffer;
1728 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1731 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1732 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1733 int x_offset, int y_offset,
1734 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1735 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1736 int list0, int list1){
1737 MpegEncContext * const s = &h->s;
1738 qpel_mc_func *qpix_op= qpix_put;
1739 h264_chroma_mc_func chroma_op= chroma_put;
1741 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1742 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1743 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1744 x_offset += 8*s->mb_x;
1745 y_offset += 8*(s->mb_y >> MB_FIELD);
1748 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1749 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1750 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1751 qpix_op, chroma_op);
1754 chroma_op= chroma_avg;
1758 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1759 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1760 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1761 qpix_op, chroma_op);
1765 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1766 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1767 int x_offset, int y_offset,
1768 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1769 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1770 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1771 int list0, int list1){
1772 MpegEncContext * const s = &h->s;
1774 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1775 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1776 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1777 x_offset += 8*s->mb_x;
1778 y_offset += 8*(s->mb_y >> MB_FIELD);
1781 /* don't optimize for luma-only case, since B-frames usually
1782 * use implicit weights => chroma too. */
1783 uint8_t *tmp_cb = s->obmc_scratchpad;
1784 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1785 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1786 int refn0 = h->ref_cache[0][ scan8[n] ];
1787 int refn1 = h->ref_cache[1][ scan8[n] ];
1789 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1790 dest_y, dest_cb, dest_cr,
1791 x_offset, y_offset, qpix_put, chroma_put);
1792 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1793 tmp_y, tmp_cb, tmp_cr,
1794 x_offset, y_offset, qpix_put, chroma_put);
1796 if(h->use_weight == 2){
1797 int weight0 = h->implicit_weight[refn0][refn1];
1798 int weight1 = 64 - weight0;
1799 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1801 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1803 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1804 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1805 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1806 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1807 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1808 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1809 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1810 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1811 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1814 int list = list1 ? 1 : 0;
1815 int refn = h->ref_cache[list][ scan8[n] ];
1816 Picture *ref= &h->ref_list[list][refn];
1817 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1818 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1819 qpix_put, chroma_put);
1821 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1822 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1823 if(h->use_weight_chroma){
1824 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1825 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1826 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1827 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1832 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1833 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1834 int x_offset, int y_offset,
1835 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1836 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1837 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1838 int list0, int list1){
1839 if((h->use_weight==2 && list0 && list1
1840 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1841 || h->use_weight==1)
1842 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1843 x_offset, y_offset, qpix_put, chroma_put,
1844 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1846 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1847 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1850 static inline void prefetch_motion(H264Context *h, int list){
1851 /* fetch pixels for estimated mv 4 macroblocks ahead
1852 * optimized for 64byte cache lines */
1853 MpegEncContext * const s = &h->s;
1854 const int refn = h->ref_cache[list][scan8[0]];
1856 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1857 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1858 uint8_t **src= h->ref_list[list][refn].data;
1859 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1860 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1861 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1862 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1866 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1867 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1868 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1869 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1870 MpegEncContext * const s = &h->s;
1871 const int mb_xy= h->mb_xy;
1872 const int mb_type= s->current_picture.mb_type[mb_xy];
1874 assert(IS_INTER(mb_type));
1876 prefetch_motion(h, 0);
1878 if(IS_16X16(mb_type)){
1879 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1880 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1881 &weight_op[0], &weight_avg[0],
1882 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1883 }else if(IS_16X8(mb_type)){
1884 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1885 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1886 &weight_op[1], &weight_avg[1],
1887 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1888 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1889 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1890 &weight_op[1], &weight_avg[1],
1891 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1892 }else if(IS_8X16(mb_type)){
1893 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1894 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1895 &weight_op[2], &weight_avg[2],
1896 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1897 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1898 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1899 &weight_op[2], &weight_avg[2],
1900 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1904 assert(IS_8X8(mb_type));
1907 const int sub_mb_type= h->sub_mb_type[i];
1909 int x_offset= (i&1)<<2;
1910 int y_offset= (i&2)<<1;
1912 if(IS_SUB_8X8(sub_mb_type)){
1913 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1914 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1915 &weight_op[3], &weight_avg[3],
1916 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1917 }else if(IS_SUB_8X4(sub_mb_type)){
1918 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1919 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1920 &weight_op[4], &weight_avg[4],
1921 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1922 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1923 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1924 &weight_op[4], &weight_avg[4],
1925 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1926 }else if(IS_SUB_4X8(sub_mb_type)){
1927 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1928 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1929 &weight_op[5], &weight_avg[5],
1930 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1931 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1932 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1933 &weight_op[5], &weight_avg[5],
1934 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1937 assert(IS_SUB_4X4(sub_mb_type));
1939 int sub_x_offset= x_offset + 2*(j&1);
1940 int sub_y_offset= y_offset + (j&2);
1941 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1942 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1943 &weight_op[6], &weight_avg[6],
1944 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1950 prefetch_motion(h, 1);
1953 static av_cold void decode_init_vlc(void){
1954 static int done = 0;
1960 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1961 &chroma_dc_coeff_token_len [0], 1, 1,
1962 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1965 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1966 &coeff_token_len [i][0], 1, 1,
1967 &coeff_token_bits[i][0], 1, 1, 1);
1971 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1972 &chroma_dc_total_zeros_len [i][0], 1, 1,
1973 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1975 for(i=0; i<15; i++){
1976 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1977 &total_zeros_len [i][0], 1, 1,
1978 &total_zeros_bits[i][0], 1, 1, 1);
1982 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1983 &run_len [i][0], 1, 1,
1984 &run_bits[i][0], 1, 1, 1);
1986 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1987 &run_len [6][0], 1, 1,
1988 &run_bits[6][0], 1, 1, 1);
1992 static void free_tables(H264Context *h){
1995 av_freep(&h->intra4x4_pred_mode);
1996 av_freep(&h->chroma_pred_mode_table);
1997 av_freep(&h->cbp_table);
1998 av_freep(&h->mvd_table[0]);
1999 av_freep(&h->mvd_table[1]);
2000 av_freep(&h->direct_table);
2001 av_freep(&h->non_zero_count);
2002 av_freep(&h->slice_table_base);
2003 h->slice_table= NULL;
2005 av_freep(&h->mb2b_xy);
2006 av_freep(&h->mb2b8_xy);
2008 for(i = 0; i < MAX_SPS_COUNT; i++)
2009 av_freep(h->sps_buffers + i);
2011 for(i = 0; i < MAX_PPS_COUNT; i++)
2012 av_freep(h->pps_buffers + i);
2014 for(i = 0; i < h->s.avctx->thread_count; i++) {
2015 hx = h->thread_context[i];
2017 av_freep(&hx->top_borders[1]);
2018 av_freep(&hx->top_borders[0]);
2019 av_freep(&hx->s.obmc_scratchpad);
2023 static void init_dequant8_coeff_table(H264Context *h){
2025 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2026 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2027 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2029 for(i=0; i<2; i++ ){
2030 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2031 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2035 for(q=0; q<52; q++){
2036 int shift = ff_div6[q];
2037 int idx = ff_rem6[q];
2039 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2040 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2041 h->pps.scaling_matrix8[i][x]) << shift;
2046 static void init_dequant4_coeff_table(H264Context *h){
2048 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2049 for(i=0; i<6; i++ ){
2050 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2052 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2053 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2060 for(q=0; q<52; q++){
2061 int shift = ff_div6[q] + 2;
2062 int idx = ff_rem6[q];
2064 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2065 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2066 h->pps.scaling_matrix4[i][x]) << shift;
2071 static void init_dequant_tables(H264Context *h){
2073 init_dequant4_coeff_table(h);
2074 if(h->pps.transform_8x8_mode)
2075 init_dequant8_coeff_table(h);
2076 if(h->sps.transform_bypass){
2079 h->dequant4_coeff[i][0][x] = 1<<6;
2080 if(h->pps.transform_8x8_mode)
2083 h->dequant8_coeff[i][0][x] = 1<<6;
2090 * needs width/height
2092 static int alloc_tables(H264Context *h){
2093 MpegEncContext * const s = &h->s;
2094 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2097 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2103 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2106 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2108 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2109 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2111 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2112 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2113 for(y=0; y<s->mb_height; y++){
2114 for(x=0; x<s->mb_width; x++){
2115 const int mb_xy= x + y*s->mb_stride;
2116 const int b_xy = 4*x + 4*y*h->b_stride;
2117 const int b8_xy= 2*x + 2*y*h->b8_stride;
2119 h->mb2b_xy [mb_xy]= b_xy;
2120 h->mb2b8_xy[mb_xy]= b8_xy;
2124 s->obmc_scratchpad = NULL;
2126 if(!h->dequant4_coeff[0])
2127 init_dequant_tables(h);
2136 * Mimic alloc_tables(), but for every context thread.
2138 static void clone_tables(H264Context *dst, H264Context *src){
2139 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2140 dst->non_zero_count = src->non_zero_count;
2141 dst->slice_table = src->slice_table;
2142 dst->cbp_table = src->cbp_table;
2143 dst->mb2b_xy = src->mb2b_xy;
2144 dst->mb2b8_xy = src->mb2b8_xy;
2145 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2146 dst->mvd_table[0] = src->mvd_table[0];
2147 dst->mvd_table[1] = src->mvd_table[1];
2148 dst->direct_table = src->direct_table;
2150 dst->s.obmc_scratchpad = NULL;
2151 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2156 * Allocate buffers which are not shared amongst multiple threads.
2158 static int context_init(H264Context *h){
2159 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2160 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2164 return -1; // free_tables will clean up for us
2167 static av_cold void common_init(H264Context *h){
2168 MpegEncContext * const s = &h->s;
2170 s->width = s->avctx->width;
2171 s->height = s->avctx->height;
2172 s->codec_id= s->avctx->codec->id;
2174 ff_h264_pred_init(&h->hpc, s->codec_id);
2176 h->dequant_coeff_pps= -1;
2177 s->unrestricted_mv=1;
2178 s->decode=1; //FIXME
2180 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2181 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2184 static av_cold int decode_init(AVCodecContext *avctx){
2185 H264Context *h= avctx->priv_data;
2186 MpegEncContext * const s = &h->s;
2188 MPV_decode_defaults(s);
2193 s->out_format = FMT_H264;
2194 s->workaround_bugs= avctx->workaround_bugs;
2197 // s->decode_mb= ff_h263_decode_mb;
2198 s->quarter_sample = 1;
2201 if(avctx->codec_id == CODEC_ID_SVQ3)
2202 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2204 avctx->pix_fmt= PIX_FMT_YUV420P;
2208 if(avctx->extradata_size > 0 && avctx->extradata &&
2209 *(char *)avctx->extradata == 1){
2216 h->thread_context[0] = h;
2220 static int frame_start(H264Context *h){
2221 MpegEncContext * const s = &h->s;
2224 if(MPV_frame_start(s, s->avctx) < 0)
2226 ff_er_frame_start(s);
2228 * MPV_frame_start uses pict_type to derive key_frame.
2229 * This is incorrect for H.264; IDR markings must be used.
2230 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2231 * See decode_nal_units().
2233 s->current_picture_ptr->key_frame= 0;
2235 assert(s->linesize && s->uvlinesize);
2237 for(i=0; i<16; i++){
2238 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2239 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2242 h->block_offset[16+i]=
2243 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2244 h->block_offset[24+16+i]=
2245 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2248 /* can't be in alloc_tables because linesize isn't known there.
2249 * FIXME: redo bipred weight to not require extra buffer? */
2250 for(i = 0; i < s->avctx->thread_count; i++)
2251 if(!h->thread_context[i]->s.obmc_scratchpad)
2252 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2254 /* some macroblocks will be accessed before they're available */
2255 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2256 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2258 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2260 // We mark the current picture as non-reference after allocating it, so
2261 // that if we break out due to an error it can be released automatically
2262 // in the next MPV_frame_start().
2263 // SVQ3 as well as most other codecs have only last/next/current and thus
2264 // get released even with set reference, besides SVQ3 and others do not
2265 // mark frames as reference later "naturally".
2266 if(s->codec_id != CODEC_ID_SVQ3)
2267 s->current_picture_ptr->reference= 0;
2269 s->current_picture_ptr->field_poc[0]=
2270 s->current_picture_ptr->field_poc[1]= INT_MAX;
2271 assert(s->current_picture_ptr->long_ref==0);
2276 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2277 MpegEncContext * const s = &h->s;
2281 src_cb -= uvlinesize;
2282 src_cr -= uvlinesize;
2284 // There are two lines saved, the line above the the top macroblock of a pair,
2285 // and the line above the bottom macroblock
2286 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2287 for(i=1; i<17; i++){
2288 h->left_border[i]= src_y[15+i* linesize];
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2294 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2295 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2296 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2298 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2299 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2301 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2306 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2307 MpegEncContext * const s = &h->s;
2314 if(h->deblocking_filter == 2) {
2316 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2317 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2319 deblock_left = (s->mb_x > 0);
2320 deblock_top = (s->mb_y > 0);
2323 src_y -= linesize + 1;
2324 src_cb -= uvlinesize + 1;
2325 src_cr -= uvlinesize + 1;
2327 #define XCHG(a,b,t,xchg)\
2334 for(i = !deblock_top; i<17; i++){
2335 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2340 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2341 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2342 if(s->mb_x+1 < s->mb_width){
2343 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2347 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2349 for(i = !deblock_top; i<9; i++){
2350 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2351 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2355 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2356 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2361 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2362 MpegEncContext * const s = &h->s;
2365 src_y -= 2 * linesize;
2366 src_cb -= 2 * uvlinesize;
2367 src_cr -= 2 * uvlinesize;
2369 // There are two lines saved, the line above the the top macroblock of a pair,
2370 // and the line above the bottom macroblock
2371 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2372 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2373 for(i=2; i<34; i++){
2374 h->left_border[i]= src_y[15+i* linesize];
2377 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2378 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2379 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2380 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2382 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2383 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2384 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2385 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2386 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2387 for(i=2; i<18; i++){
2388 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2389 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2391 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2392 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2393 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2394 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2398 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2399 MpegEncContext * const s = &h->s;
2402 int deblock_left = (s->mb_x > 0);
2403 int deblock_top = (s->mb_y > 1);
2405 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2407 src_y -= 2 * linesize + 1;
2408 src_cb -= 2 * uvlinesize + 1;
2409 src_cr -= 2 * uvlinesize + 1;
2411 #define XCHG(a,b,t,xchg)\
2418 for(i = (!deblock_top)<<1; i<34; i++){
2419 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2424 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2425 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2426 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2427 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2428 if(s->mb_x+1 < s->mb_width){
2429 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2430 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2434 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2436 for(i = (!deblock_top) << 1; i<18; i++){
2437 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2438 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2442 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2443 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2444 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2445 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2450 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2451 MpegEncContext * const s = &h->s;
2452 const int mb_x= s->mb_x;
2453 const int mb_y= s->mb_y;
2454 const int mb_xy= h->mb_xy;
2455 const int mb_type= s->current_picture.mb_type[mb_xy];
2456 uint8_t *dest_y, *dest_cb, *dest_cr;
2457 int linesize, uvlinesize /*dct_offset*/;
2459 int *block_offset = &h->block_offset[0];
2460 const unsigned int bottom = mb_y & 1;
2461 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2462 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2463 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2465 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2466 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2467 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2469 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2470 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2472 if (!simple && MB_FIELD) {
2473 linesize = h->mb_linesize = s->linesize * 2;
2474 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2475 block_offset = &h->block_offset[24];
2476 if(mb_y&1){ //FIXME move out of this function?
2477 dest_y -= s->linesize*15;
2478 dest_cb-= s->uvlinesize*7;
2479 dest_cr-= s->uvlinesize*7;
2483 for(list=0; list<h->list_count; list++){
2484 if(!USES_LIST(mb_type, list))
2486 if(IS_16X16(mb_type)){
2487 int8_t *ref = &h->ref_cache[list][scan8[0]];
2488 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2490 for(i=0; i<16; i+=4){
2491 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2492 int ref = h->ref_cache[list][scan8[i]];
2494 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2500 linesize = h->mb_linesize = s->linesize;
2501 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2502 // dct_offset = s->linesize * 16;
2505 if(transform_bypass){
2507 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2508 }else if(IS_8x8DCT(mb_type)){
2509 idct_dc_add = s->dsp.h264_idct8_dc_add;
2510 idct_add = s->dsp.h264_idct8_add;
2512 idct_dc_add = s->dsp.h264_idct_dc_add;
2513 idct_add = s->dsp.h264_idct_add;
2516 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2517 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2518 int mbt_y = mb_y&~1;
2519 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2520 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2521 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2522 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2525 if (!simple && IS_INTRA_PCM(mb_type)) {
2528 // The pixels are stored in h->mb array in the same order as levels,
2529 // copy them in output in the correct order.
2530 for(i=0; i<16; i++) {
2531 for (y=0; y<4; y++) {
2532 for (x=0; x<4; x++) {
2533 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2537 for(i=16; i<16+4; i++) {
2538 for (y=0; y<4; y++) {
2539 for (x=0; x<4; x++) {
2540 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2544 for(i=20; i<20+4; i++) {
2545 for (y=0; y<4; y++) {
2546 for (x=0; x<4; x++) {
2547 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2552 if(IS_INTRA(mb_type)){
2553 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2554 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2556 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2557 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2558 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2561 if(IS_INTRA4x4(mb_type)){
2562 if(simple || !s->encoding){
2563 if(IS_8x8DCT(mb_type)){
2564 for(i=0; i<16; i+=4){
2565 uint8_t * const ptr= dest_y + block_offset[i];
2566 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2567 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2568 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2569 (h->topright_samples_available<<i)&0x4000, linesize);
2571 if(nnz == 1 && h->mb[i*16])
2572 idct_dc_add(ptr, h->mb + i*16, linesize);
2574 idct_add(ptr, h->mb + i*16, linesize);
2578 for(i=0; i<16; i++){
2579 uint8_t * const ptr= dest_y + block_offset[i];
2581 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2584 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2585 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2586 assert(mb_y || linesize <= block_offset[i]);
2587 if(!topright_avail){
2588 tr= ptr[3 - linesize]*0x01010101;
2589 topright= (uint8_t*) &tr;
2591 topright= ptr + 4 - linesize;
2595 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2596 nnz = h->non_zero_count_cache[ scan8[i] ];
2599 if(nnz == 1 && h->mb[i*16])
2600 idct_dc_add(ptr, h->mb + i*16, linesize);
2602 idct_add(ptr, h->mb + i*16, linesize);
2604 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2609 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2611 if(!transform_bypass)
2612 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2614 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2616 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2617 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2619 hl_motion(h, dest_y, dest_cb, dest_cr,
2620 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2621 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2622 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2626 if(!IS_INTRA4x4(mb_type)){
2628 if(IS_INTRA16x16(mb_type)){
2629 for(i=0; i<16; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ])
2631 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2632 else if(h->mb[i*16])
2633 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2636 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2637 for(i=0; i<16; i+=di){
2638 int nnz = h->non_zero_count_cache[ scan8[i] ];
2640 if(nnz==1 && h->mb[i*16])
2641 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2643 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2648 for(i=0; i<16; i++){
2649 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2650 uint8_t * const ptr= dest_y + block_offset[i];
2651 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2657 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2658 uint8_t *dest[2] = {dest_cb, dest_cr};
2659 if(transform_bypass){
2660 idct_add = idct_dc_add = s->dsp.add_pixels4;
2662 idct_add = s->dsp.h264_idct_add;
2663 idct_dc_add = s->dsp.h264_idct_dc_add;
2664 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2665 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2668 for(i=16; i<16+8; i++){
2669 if(h->non_zero_count_cache[ scan8[i] ])
2670 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2671 else if(h->mb[i*16])
2672 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2675 for(i=16; i<16+8; i++){
2676 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2677 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2678 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2684 if(h->deblocking_filter) {
2685 if (!simple && FRAME_MBAFF) {
2686 //FIXME try deblocking one mb at a time?
2687 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2688 const int mb_y = s->mb_y - 1;
2689 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2690 const int mb_xy= mb_x + mb_y*s->mb_stride;
2691 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2692 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2693 if (!bottom) return;
2694 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2695 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2696 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2698 if(IS_INTRA(mb_type_top | mb_type_bottom))
2699 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2701 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2704 s->mb_y--; h->mb_xy -= s->mb_stride;
2705 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2706 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2707 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2708 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2709 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2711 s->mb_y++; h->mb_xy += s->mb_stride;
2712 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2713 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2714 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2715 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2716 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2718 tprintf(h->s.avctx, "call filter_mb\n");
2719 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2720 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2721 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2722 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2723 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2729 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2731 static void hl_decode_mb_simple(H264Context *h){
2732 hl_decode_mb_internal(h, 1);
2736 * Process a macroblock; this handles edge cases, such as interlacing.
2738 static void av_noinline hl_decode_mb_complex(H264Context *h){
2739 hl_decode_mb_internal(h, 0);
2742 static void hl_decode_mb(H264Context *h){
2743 MpegEncContext * const s = &h->s;
2744 const int mb_xy= h->mb_xy;
2745 const int mb_type= s->current_picture.mb_type[mb_xy];
2746 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2747 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2749 if(ENABLE_H264_ENCODER && !s->decode)
2753 hl_decode_mb_complex(h);
2754 else hl_decode_mb_simple(h);
2757 static void pic_as_field(Picture *pic, const int parity){
2759 for (i = 0; i < 4; ++i) {
2760 if (parity == PICT_BOTTOM_FIELD)
2761 pic->data[i] += pic->linesize[i];
2762 pic->reference = parity;
2763 pic->linesize[i] *= 2;
2767 static int split_field_copy(Picture *dest, Picture *src,
2768 int parity, int id_add){
2769 int match = !!(src->reference & parity);
2773 if(parity != PICT_FRAME){
2774 pic_as_field(dest, parity);
2776 dest->pic_id += id_add;
2783 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2787 while(i[0]<len || i[1]<len){
2788 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2790 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2793 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2794 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2797 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2798 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2805 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2810 best_poc= dir ? INT_MIN : INT_MAX;
2812 for(i=0; i<len; i++){
2813 const int poc= src[i]->poc;
2814 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2816 sorted[out_i]= src[i];
2819 if(best_poc == (dir ? INT_MIN : INT_MAX))
2821 limit= sorted[out_i++]->poc - dir;
2827 * fills the default_ref_list.
2829 static int fill_default_ref_list(H264Context *h){
2830 MpegEncContext * const s = &h->s;
2833 if(h->slice_type_nos==FF_B_TYPE){
2834 Picture *sorted[32];
2839 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2841 cur_poc= s->current_picture_ptr->poc;
2843 for(list= 0; list<2; list++){
2844 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2845 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2847 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2848 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2851 if(len < h->ref_count[list])
2852 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2856 if(lens[0] == lens[1] && lens[1] > 1){
2857 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2859 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2862 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2863 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2865 if(len < h->ref_count[0])
2866 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2869 for (i=0; i<h->ref_count[0]; i++) {
2870 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2872 if(h->slice_type_nos==FF_B_TYPE){
2873 for (i=0; i<h->ref_count[1]; i++) {
2874 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2881 static void print_short_term(H264Context *h);
2882 static void print_long_term(H264Context *h);
2885 * Extract structure information about the picture described by pic_num in
2886 * the current decoding context (frame or field). Note that pic_num is
2887 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2888 * @param pic_num picture number for which to extract structure information
2889 * @param structure one of PICT_XXX describing structure of picture
2891 * @return frame number (short term) or long term index of picture
2892 * described by pic_num
2894 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2895 MpegEncContext * const s = &h->s;
2897 *structure = s->picture_structure;
2900 /* opposite field */
2901 *structure ^= PICT_FRAME;
2908 static int decode_ref_pic_list_reordering(H264Context *h){
2909 MpegEncContext * const s = &h->s;
2910 int list, index, pic_structure;
2912 print_short_term(h);
2914 if(h->slice_type_nos==FF_I_TYPE) return 0; //FIXME move before function
2916 for(list=0; list<h->list_count; list++){
2917 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2919 if(get_bits1(&s->gb)){
2920 int pred= h->curr_pic_num;
2922 for(index=0; ; index++){
2923 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2924 unsigned int pic_id;
2926 Picture *ref = NULL;
2928 if(reordering_of_pic_nums_idc==3)
2931 if(index >= h->ref_count[list]){
2932 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2936 if(reordering_of_pic_nums_idc<3){
2937 if(reordering_of_pic_nums_idc<2){
2938 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2941 if(abs_diff_pic_num > h->max_pic_num){
2942 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2946 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2947 else pred+= abs_diff_pic_num;
2948 pred &= h->max_pic_num - 1;
2950 frame_num = pic_num_extract(h, pred, &pic_structure);
2952 for(i= h->short_ref_count-1; i>=0; i--){
2953 ref = h->short_ref[i];
2954 assert(ref->reference);
2955 assert(!ref->long_ref);
2957 ref->frame_num == frame_num &&
2958 (ref->reference & pic_structure)
2966 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2968 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2971 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2974 ref = h->long_ref[long_idx];
2975 assert(!(ref && !ref->reference));
2976 if(ref && (ref->reference & pic_structure)){
2977 ref->pic_id= pic_id;
2978 assert(ref->long_ref);
2986 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2987 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2989 for(i=index; i+1<h->ref_count[list]; i++){
2990 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2993 for(; i > index; i--){
2994 h->ref_list[list][i]= h->ref_list[list][i-1];
2996 h->ref_list[list][index]= *ref;
2998 pic_as_field(&h->ref_list[list][index], pic_structure);
3002 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3008 for(list=0; list<h->list_count; list++){
3009 for(index= 0; index < h->ref_count[list]; index++){
3010 if(!h->ref_list[list][index].data[0]){
3011 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
3012 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
3017 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3018 direct_dist_scale_factor(h);
3019 direct_ref_list_init(h);
3023 static void fill_mbaff_ref_list(H264Context *h){
3025 for(list=0; list<2; list++){ //FIXME try list_count
3026 for(i=0; i<h->ref_count[list]; i++){
3027 Picture *frame = &h->ref_list[list][i];
3028 Picture *field = &h->ref_list[list][16+2*i];
3031 field[0].linesize[j] <<= 1;
3032 field[0].reference = PICT_TOP_FIELD;
3033 field[1] = field[0];
3035 field[1].data[j] += frame->linesize[j];
3036 field[1].reference = PICT_BOTTOM_FIELD;
3038 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3039 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3041 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3042 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3046 for(j=0; j<h->ref_count[1]; j++){
3047 for(i=0; i<h->ref_count[0]; i++)
3048 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3049 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3050 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3054 static int pred_weight_table(H264Context *h){
3055 MpegEncContext * const s = &h->s;
3057 int luma_def, chroma_def;
3060 h->use_weight_chroma= 0;
3061 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3062 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3063 luma_def = 1<<h->luma_log2_weight_denom;
3064 chroma_def = 1<<h->chroma_log2_weight_denom;
3066 for(list=0; list<2; list++){
3067 for(i=0; i<h->ref_count[list]; i++){
3068 int luma_weight_flag, chroma_weight_flag;
3070 luma_weight_flag= get_bits1(&s->gb);
3071 if(luma_weight_flag){
3072 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3073 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3074 if( h->luma_weight[list][i] != luma_def
3075 || h->luma_offset[list][i] != 0)
3078 h->luma_weight[list][i]= luma_def;
3079 h->luma_offset[list][i]= 0;
3082 chroma_weight_flag= get_bits1(&s->gb);
3083 if(chroma_weight_flag){
3086 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3087 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3088 if( h->chroma_weight[list][i][j] != chroma_def
3089 || h->chroma_offset[list][i][j] != 0)
3090 h->use_weight_chroma= 1;
3095 h->chroma_weight[list][i][j]= chroma_def;
3096 h->chroma_offset[list][i][j]= 0;
3100 if(h->slice_type_nos != FF_B_TYPE) break;
3102 h->use_weight= h->use_weight || h->use_weight_chroma;
3106 static void implicit_weight_table(H264Context *h){
3107 MpegEncContext * const s = &h->s;
3109 int cur_poc = s->current_picture_ptr->poc;
3111 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3112 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3114 h->use_weight_chroma= 0;
3119 h->use_weight_chroma= 2;
3120 h->luma_log2_weight_denom= 5;
3121 h->chroma_log2_weight_denom= 5;
3123 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3124 int poc0 = h->ref_list[0][ref0].poc;
3125 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3126 int poc1 = h->ref_list[1][ref1].poc;
3127 int td = av_clip(poc1 - poc0, -128, 127);
3129 int tb = av_clip(cur_poc - poc0, -128, 127);
3130 int tx = (16384 + (FFABS(td) >> 1)) / td;
3131 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3132 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3133 h->implicit_weight[ref0][ref1] = 32;
3135 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3137 h->implicit_weight[ref0][ref1] = 32;
3143 * Mark a picture as no longer needed for reference. The refmask
3144 * argument allows unreferencing of individual fields or the whole frame.
3145 * If the picture becomes entirely unreferenced, but is being held for
3146 * display purposes, it is marked as such.
3147 * @param refmask mask of fields to unreference; the mask is bitwise
3148 * anded with the reference marking of pic
3149 * @return non-zero if pic becomes entirely unreferenced (except possibly
3150 * for display purposes) zero if one of the fields remains in
3153 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3155 if (pic->reference &= refmask) {
3158 for(i = 0; h->delayed_pic[i]; i++)
3159 if(pic == h->delayed_pic[i]){
3160 pic->reference=DELAYED_PIC_REF;
3168 * instantaneous decoder refresh.
3170 static void idr(H264Context *h){
3173 for(i=0; i<16; i++){
3174 remove_long(h, i, 0);
3176 assert(h->long_ref_count==0);
3178 for(i=0; i<h->short_ref_count; i++){
3179 unreference_pic(h, h->short_ref[i], 0);
3180 h->short_ref[i]= NULL;
3182 h->short_ref_count=0;
3183 h->prev_frame_num= 0;
3184 h->prev_frame_num_offset= 0;
3189 /* forget old pics after a seek */
3190 static void flush_dpb(AVCodecContext *avctx){
3191 H264Context *h= avctx->priv_data;
3193 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3194 if(h->delayed_pic[i])
3195 h->delayed_pic[i]->reference= 0;
3196 h->delayed_pic[i]= NULL;
3198 h->outputed_poc= INT_MIN;
3200 if(h->s.current_picture_ptr)
3201 h->s.current_picture_ptr->reference= 0;
3202 h->s.first_field= 0;
3203 ff_mpeg_flush(avctx);
3207 * Find a Picture in the short term reference list by frame number.
3208 * @param frame_num frame number to search for
3209 * @param idx the index into h->short_ref where returned picture is found
3210 * undefined if no picture found.
3211 * @return pointer to the found picture, or NULL if no pic with the provided
3212 * frame number is found
3214 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3215 MpegEncContext * const s = &h->s;
3218 for(i=0; i<h->short_ref_count; i++){
3219 Picture *pic= h->short_ref[i];
3220 if(s->avctx->debug&FF_DEBUG_MMCO)
3221 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3222 if(pic->frame_num == frame_num) {
3231 * Remove a picture from the short term reference list by its index in
3232 * that list. This does no checking on the provided index; it is assumed
3233 * to be valid. Other list entries are shifted down.
3234 * @param i index into h->short_ref of picture to remove.
3236 static void remove_short_at_index(H264Context *h, int i){
3237 assert(i >= 0 && i < h->short_ref_count);
3238 h->short_ref[i]= NULL;
3239 if (--h->short_ref_count)
3240 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3245 * @return the removed picture or NULL if an error occurs
3247 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3248 MpegEncContext * const s = &h->s;
3252 if(s->avctx->debug&FF_DEBUG_MMCO)
3253 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3255 pic = find_short(h, frame_num, &i);
3257 if(unreference_pic(h, pic, ref_mask))
3258 remove_short_at_index(h, i);
3265 * Remove a picture from the long term reference list by its index in
3267 * @return the removed picture or NULL if an error occurs
3269 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3272 pic= h->long_ref[i];
3274 if(unreference_pic(h, pic, ref_mask)){
3275 assert(h->long_ref[i]->long_ref == 1);
3276 h->long_ref[i]->long_ref= 0;
3277 h->long_ref[i]= NULL;
3278 h->long_ref_count--;
3286 * print short term list
3288 static void print_short_term(H264Context *h) {
3290 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3291 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3292 for(i=0; i<h->short_ref_count; i++){
3293 Picture *pic= h->short_ref[i];
3294 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3300 * print long term list
3302 static void print_long_term(H264Context *h) {
3304 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3305 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3306 for(i = 0; i < 16; i++){
3307 Picture *pic= h->long_ref[i];
3309 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3316 * Executes the reference picture marking (memory management control operations).
3318 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3319 MpegEncContext * const s = &h->s;
3321 int current_ref_assigned=0;
3324 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3325 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3327 for(i=0; i<mmco_count; i++){
3328 int structure, frame_num;
3329 if(s->avctx->debug&FF_DEBUG_MMCO)
3330 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3332 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3333 || mmco[i].opcode == MMCO_SHORT2LONG){
3334 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3335 pic = find_short(h, frame_num, &j);
3337 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3338 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3339 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3344 switch(mmco[i].opcode){
3345 case MMCO_SHORT2UNUSED:
3346 if(s->avctx->debug&FF_DEBUG_MMCO)
3347 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3348 remove_short(h, frame_num, structure ^ PICT_FRAME);
3350 case MMCO_SHORT2LONG:
3351 if (h->long_ref[mmco[i].long_arg] != pic)
3352 remove_long(h, mmco[i].long_arg, 0);
3354 remove_short_at_index(h, j);
3355 h->long_ref[ mmco[i].long_arg ]= pic;
3356 if (h->long_ref[ mmco[i].long_arg ]){
3357 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3358 h->long_ref_count++;
3361 case MMCO_LONG2UNUSED:
3362 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3363 pic = h->long_ref[j];
3365 remove_long(h, j, structure ^ PICT_FRAME);
3366 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3367 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3370 // Comment below left from previous code as it is an interresting note.
3371 /* First field in pair is in short term list or
3372 * at a different long term index.
3373 * This is not allowed; see 7.4.3, notes 2 and 3.
3374 * Report the problem and keep the pair where it is,
3375 * and mark this field valid.
3378 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3379 remove_long(h, mmco[i].long_arg, 0);
3381 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3382 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3383 h->long_ref_count++;
3386 s->current_picture_ptr->reference |= s->picture_structure;
3387 current_ref_assigned=1;
3389 case MMCO_SET_MAX_LONG:
3390 assert(mmco[i].long_arg <= 16);
3391 // just remove the long term which index is greater than new max
3392 for(j = mmco[i].long_arg; j<16; j++){
3393 remove_long(h, j, 0);
3397 while(h->short_ref_count){
3398 remove_short(h, h->short_ref[0]->frame_num, 0);
3400 for(j = 0; j < 16; j++) {
3401 remove_long(h, j, 0);
3403 s->current_picture_ptr->poc=
3404 s->current_picture_ptr->field_poc[0]=
3405 s->current_picture_ptr->field_poc[1]=
3409 s->current_picture_ptr->frame_num= 0;
3415 if (!current_ref_assigned) {
3416 /* Second field of complementary field pair; the first field of
3417 * which is already referenced. If short referenced, it
3418 * should be first entry in short_ref. If not, it must exist
3419 * in long_ref; trying to put it on the short list here is an
3420 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3422 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3423 /* Just mark the second field valid */
3424 s->current_picture_ptr->reference = PICT_FRAME;
3425 } else if (s->current_picture_ptr->long_ref) {
3426 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3427 "assignment for second field "
3428 "in complementary field pair "
3429 "(first field is long term)\n");
3431 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3433 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3436 if(h->short_ref_count)
3437 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3439 h->short_ref[0]= s->current_picture_ptr;
3440 h->short_ref_count++;
3441 s->current_picture_ptr->reference |= s->picture_structure;
3445 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3447 /* We have too many reference frames, probably due to corrupted
3448 * stream. Need to discard one frame. Prevents overrun of the
3449 * short_ref and long_ref buffers.
3451 av_log(h->s.avctx, AV_LOG_ERROR,
3452 "number of reference frames exceeds max (probably "
3453 "corrupt input), discarding one\n");
3455 if (h->long_ref_count && !h->short_ref_count) {
3456 for (i = 0; i < 16; ++i)
3461 remove_long(h, i, 0);
3463 pic = h->short_ref[h->short_ref_count - 1];
3464 remove_short(h, pic->frame_num, 0);
3468 print_short_term(h);
3473 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3474 MpegEncContext * const s = &h->s;
3478 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3479 s->broken_link= get_bits1(gb) -1;
3481 h->mmco[0].opcode= MMCO_LONG;
3482 h->mmco[0].long_arg= 0;
3486 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3487 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3488 MMCOOpcode opcode= get_ue_golomb(gb);
3490 h->mmco[i].opcode= opcode;
3491 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3492 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3493 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3494 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3498 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3499 unsigned int long_arg= get_ue_golomb(gb);
3500 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3501 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3504 h->mmco[i].long_arg= long_arg;
3507 if(opcode > (unsigned)MMCO_LONG){
3508 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3511 if(opcode == MMCO_END)
3516 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3518 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3519 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3520 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3521 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3523 if (FIELD_PICTURE) {
3524 h->mmco[0].short_pic_num *= 2;
3525 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3526 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3536 static int init_poc(H264Context *h){
3537 MpegEncContext * const s = &h->s;
3538 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3540 Picture *cur = s->current_picture_ptr;
3542 h->frame_num_offset= h->prev_frame_num_offset;
3543 if(h->frame_num < h->prev_frame_num)
3544 h->frame_num_offset += max_frame_num;
3546 if(h->sps.poc_type==0){
3547 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3549 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3550 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3551 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3552 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3554 h->poc_msb = h->prev_poc_msb;
3555 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3557 field_poc[1] = h->poc_msb + h->poc_lsb;
3558 if(s->picture_structure == PICT_FRAME)
3559 field_poc[1] += h->delta_poc_bottom;
3560 }else if(h->sps.poc_type==1){
3561 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3564 if(h->sps.poc_cycle_length != 0)
3565 abs_frame_num = h->frame_num_offset + h->frame_num;
3569 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3572 expected_delta_per_poc_cycle = 0;
3573 for(i=0; i < h->sps.poc_cycle_length; i++)
3574 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3576 if(abs_frame_num > 0){
3577 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3578 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3580 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3581 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3582 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3586 if(h->nal_ref_idc == 0)
3587 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3589 field_poc[0] = expectedpoc + h->delta_poc[0];
3590 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3592 if(s->picture_structure == PICT_FRAME)
3593 field_poc[1] += h->delta_poc[1];
3595 int poc= 2*(h->frame_num_offset + h->frame_num);
3604 if(s->picture_structure != PICT_BOTTOM_FIELD)
3605 s->current_picture_ptr->field_poc[0]= field_poc[0];
3606 if(s->picture_structure != PICT_TOP_FIELD)
3607 s->current_picture_ptr->field_poc[1]= field_poc[1];
3608 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3615 * initialize scan tables
3617 static void init_scan_tables(H264Context *h){
3618 MpegEncContext * const s = &h->s;
3620 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3621 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3622 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3624 for(i=0; i<16; i++){
3625 #define T(x) (x>>2) | ((x<<2) & 0xF)
3626 h->zigzag_scan[i] = T(zigzag_scan[i]);
3627 h-> field_scan[i] = T( field_scan[i]);
3631 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3632 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3633 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3634 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3635 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3637 for(i=0; i<64; i++){
3638 #define T(x) (x>>3) | ((x&7)<<3)
3639 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3640 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3641 h->field_scan8x8[i] = T(field_scan8x8[i]);
3642 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3646 if(h->sps.transform_bypass){ //FIXME same ugly
3647 h->zigzag_scan_q0 = zigzag_scan;
3648 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3649 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3650 h->field_scan_q0 = field_scan;
3651 h->field_scan8x8_q0 = field_scan8x8;
3652 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3654 h->zigzag_scan_q0 = h->zigzag_scan;
3655 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3656 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3657 h->field_scan_q0 = h->field_scan;
3658 h->field_scan8x8_q0 = h->field_scan8x8;
3659 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3664 * Replicates H264 "master" context to thread contexts.
3666 static void clone_slice(H264Context *dst, H264Context *src)
3668 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3669 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3670 dst->s.current_picture = src->s.current_picture;
3671 dst->s.linesize = src->s.linesize;
3672 dst->s.uvlinesize = src->s.uvlinesize;
3673 dst->s.first_field = src->s.first_field;
3675 dst->prev_poc_msb = src->prev_poc_msb;
3676 dst->prev_poc_lsb = src->prev_poc_lsb;
3677 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3678 dst->prev_frame_num = src->prev_frame_num;
3679 dst->short_ref_count = src->short_ref_count;
3681 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3682 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3683 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3684 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3686 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3687 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3691 * decodes a slice header.
3692 * This will also call MPV_common_init() and frame_start() as needed.
3694 * @param h h264context
3695 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3697 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3699 static int decode_slice_header(H264Context *h, H264Context *h0){
3700 MpegEncContext * const s = &h->s;
3701 MpegEncContext * const s0 = &h0->s;
3702 unsigned int first_mb_in_slice;
3703 unsigned int pps_id;
3704 int num_ref_idx_active_override_flag;
3705 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3706 unsigned int slice_type, tmp, i, j;
3707 int default_ref_list_done = 0;
3708 int last_pic_structure;
3710 s->dropable= h->nal_ref_idc == 0;
3712 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3713 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3714 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3716 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3717 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3720 first_mb_in_slice= get_ue_golomb(&s->gb);
3722 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3723 h0->current_slice = 0;
3724 if (!s0->first_field)
3725 s->current_picture_ptr= NULL;
3728 slice_type= get_ue_golomb(&s->gb);
3730 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3735 h->slice_type_fixed=1;
3737 h->slice_type_fixed=0;
3739 slice_type= slice_type_map[ slice_type ];
3740 if (slice_type == FF_I_TYPE
3741 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3742 default_ref_list_done = 1;
3744 h->slice_type= slice_type;
3745 h->slice_type_nos= slice_type & 3;
3747 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3748 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3749 av_log(h->s.avctx, AV_LOG_ERROR,
3750 "B picture before any references, skipping\n");
3754 pps_id= get_ue_golomb(&s->gb);
3755 if(pps_id>=MAX_PPS_COUNT){
3756 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3759 if(!h0->pps_buffers[pps_id]) {
3760 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3763 h->pps= *h0->pps_buffers[pps_id];
3765 if(!h0->sps_buffers[h->pps.sps_id]) {
3766 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3769 h->sps = *h0->sps_buffers[h->pps.sps_id];
3771 if(h == h0 && h->dequant_coeff_pps != pps_id){
3772 h->dequant_coeff_pps = pps_id;
3773 init_dequant_tables(h);
3776 s->mb_width= h->sps.mb_width;
3777 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3779 h->b_stride= s->mb_width*4;
3780 h->b8_stride= s->mb_width*2;
3782 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3783 if(h->sps.frame_mbs_only_flag)
3784 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3786 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3788 if (s->context_initialized
3789 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3791 return -1; // width / height changed during parallelized decoding
3795 if (!s->context_initialized) {
3797 return -1; // we cant (re-)initialize context during parallel decoding
3798 if (MPV_common_init(s) < 0)
3802 init_scan_tables(h);
3805 for(i = 1; i < s->avctx->thread_count; i++) {
3807 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3808 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3809 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3812 init_scan_tables(c);
3816 for(i = 0; i < s->avctx->thread_count; i++)
3817 if(context_init(h->thread_context[i]) < 0)
3820 s->avctx->width = s->width;
3821 s->avctx->height = s->height;
3822 s->avctx->sample_aspect_ratio= h->sps.sar;
3823 if(!s->avctx->sample_aspect_ratio.den)
3824 s->avctx->sample_aspect_ratio.den = 1;
3826 if(h->sps.timing_info_present_flag){
3827 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3828 if(h->x264_build > 0 && h->x264_build < 44)
3829 s->avctx->time_base.den *= 2;
3830 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3831 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3835 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3838 h->mb_aff_frame = 0;
3839 last_pic_structure = s0->picture_structure;
3840 if(h->sps.frame_mbs_only_flag){
3841 s->picture_structure= PICT_FRAME;
3843 if(get_bits1(&s->gb)) { //field_pic_flag
3844 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3846 s->picture_structure= PICT_FRAME;
3847 h->mb_aff_frame = h->sps.mb_aff;
3851 if(h0->current_slice == 0){
3852 while(h->frame_num != h->prev_frame_num &&
3853 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3854 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3856 h->prev_frame_num++;
3857 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3858 s->current_picture_ptr->frame_num= h->prev_frame_num;
3859 execute_ref_pic_marking(h, NULL, 0);
3862 /* See if we have a decoded first field looking for a pair... */
3863 if (s0->first_field) {
3864 assert(s0->current_picture_ptr);
3865 assert(s0->current_picture_ptr->data[0]);
3866 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3868 /* figure out if we have a complementary field pair */
3869 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3871 * Previous field is unmatched. Don't display it, but let it
3872 * remain for reference if marked as such.
3874 s0->current_picture_ptr = NULL;
3875 s0->first_field = FIELD_PICTURE;
3878 if (h->nal_ref_idc &&
3879 s0->current_picture_ptr->reference &&
3880 s0->current_picture_ptr->frame_num != h->frame_num) {
3882 * This and previous field were reference, but had
3883 * different frame_nums. Consider this field first in
3884 * pair. Throw away previous field except for reference
3887 s0->first_field = 1;
3888 s0->current_picture_ptr = NULL;
3891 /* Second field in complementary pair */
3892 s0->first_field = 0;
3897 /* Frame or first field in a potentially complementary pair */
3898 assert(!s0->current_picture_ptr);
3899 s0->first_field = FIELD_PICTURE;
3902 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3903 s0->first_field = 0;
3910 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3912 assert(s->mb_num == s->mb_width * s->mb_height);
3913 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3914 first_mb_in_slice >= s->mb_num){
3915 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3918 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3919 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3920 if (s->picture_structure == PICT_BOTTOM_FIELD)
3921 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3922 assert(s->mb_y < s->mb_height);
3924 if(s->picture_structure==PICT_FRAME){
3925 h->curr_pic_num= h->frame_num;
3926 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3928 h->curr_pic_num= 2*h->frame_num + 1;
3929 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3932 if(h->nal_unit_type == NAL_IDR_SLICE){
3933 get_ue_golomb(&s->gb); /* idr_pic_id */
3936 if(h->sps.poc_type==0){
3937 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3939 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3940 h->delta_poc_bottom= get_se_golomb(&s->gb);
3944 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3945 h->delta_poc[0]= get_se_golomb(&s->gb);
3947 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3948 h->delta_poc[1]= get_se_golomb(&s->gb);
3953 if(h->pps.redundant_pic_cnt_present){
3954 h->redundant_pic_count= get_ue_golomb(&s->gb);
3957 //set defaults, might be overridden a few lines later
3958 h->ref_count[0]= h->pps.ref_count[0];
3959 h->ref_count[1]= h->pps.ref_count[1];
3961 if(h->slice_type_nos != FF_I_TYPE){
3962 if(h->slice_type_nos == FF_B_TYPE){
3963 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3965 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3967 if(num_ref_idx_active_override_flag){
3968 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3969 if(h->slice_type_nos==FF_B_TYPE)
3970 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3972 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3973 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3974 h->ref_count[0]= h->ref_count[1]= 1;
3978 if(h->slice_type_nos == FF_B_TYPE)
3985 if(!default_ref_list_done){
3986 fill_default_ref_list(h);
3989 if(decode_ref_pic_list_reordering(h) < 0)
3992 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3993 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3994 pred_weight_table(h);
3995 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3996 implicit_weight_table(h);
4001 decode_ref_pic_marking(h0, &s->gb);
4004 fill_mbaff_ref_list(h);
4006 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4007 tmp = get_ue_golomb(&s->gb);
4009 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4012 h->cabac_init_idc= tmp;
4015 h->last_qscale_diff = 0;
4016 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4018 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4022 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4023 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4024 //FIXME qscale / qp ... stuff
4025 if(h->slice_type == FF_SP_TYPE){
4026 get_bits1(&s->gb); /* sp_for_switch_flag */
4028 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4029 get_se_golomb(&s->gb); /* slice_qs_delta */
4032 h->deblocking_filter = 1;
4033 h->slice_alpha_c0_offset = 0;
4034 h->slice_beta_offset = 0;
4035 if( h->pps.deblocking_filter_parameters_present ) {
4036 tmp= get_ue_golomb(&s->gb);
4038 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4041 h->deblocking_filter= tmp;
4042 if(h->deblocking_filter < 2)
4043 h->deblocking_filter^= 1; // 1<->0
4045 if( h->deblocking_filter ) {
4046 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4047 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4051 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4052 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4053 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4054 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4055 h->deblocking_filter= 0;
4057 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4058 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4059 /* Cheat slightly for speed:
4060 Do not bother to deblock across slices. */
4061 h->deblocking_filter = 2;
4063 h0->max_contexts = 1;
4064 if(!h0->single_decode_warning) {
4065 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4066 h0->single_decode_warning = 1;
4069 return 1; // deblocking switched inside frame
4074 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4075 slice_group_change_cycle= get_bits(&s->gb, ?);
4078 h0->last_slice_type = slice_type;
4079 h->slice_num = ++h0->current_slice;
4082 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4086 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4087 +(h->ref_list[j][i].reference&3);
4090 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4091 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4093 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4094 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4096 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4098 av_get_pict_type_char(h->slice_type),
4099 pps_id, h->frame_num,
4100 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4101 h->ref_count[0], h->ref_count[1],
4103 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4105 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4106 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4116 static inline int get_level_prefix(GetBitContext *gb){
4120 OPEN_READER(re, gb);
4121 UPDATE_CACHE(re, gb);
4122 buf=GET_CACHE(re, gb);
4124 log= 32 - av_log2(buf);
4126 print_bin(buf>>(32-log), log);
4127 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4130 LAST_SKIP_BITS(re, gb, log);
4131 CLOSE_READER(re, gb);
4136 static inline int get_dct8x8_allowed(H264Context *h){
4139 if(!IS_SUB_8X8(h->sub_mb_type[i])
4140 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4147 * decodes a residual block.
4148 * @param n block index
4149 * @param scantable scantable
4150 * @param max_coeff number of coefficients in the block
4151 * @return <0 if an error occurred
4153 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4154 MpegEncContext * const s = &h->s;
4155 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4157 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4159 //FIXME put trailing_onex into the context
4161 if(n == CHROMA_DC_BLOCK_INDEX){
4162 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4163 total_coeff= coeff_token>>2;
4165 if(n == LUMA_DC_BLOCK_INDEX){
4166 total_coeff= pred_non_zero_count(h, 0);
4167 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4168 total_coeff= coeff_token>>2;
4170 total_coeff= pred_non_zero_count(h, n);
4171 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4172 total_coeff= coeff_token>>2;
4173 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4177 //FIXME set last_non_zero?
4181 if(total_coeff > (unsigned)max_coeff) {
4182 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4186 trailing_ones= coeff_token&3;
4187 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4188 assert(total_coeff<=16);
4190 for(i=0; i<trailing_ones; i++){
4191 level[i]= 1 - 2*get_bits1(gb);
4195 int level_code, mask;
4196 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4197 int prefix= get_level_prefix(gb);
4199 //first coefficient has suffix_length equal to 0 or 1
4200 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4202 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4204 level_code= (prefix<<suffix_length); //part
4205 }else if(prefix==14){
4207 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4209 level_code= prefix + get_bits(gb, 4); //part
4211 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4212 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4214 level_code += (1<<(prefix-3))-4096;
4217 if(trailing_ones < 3) level_code += 2;
4222 mask= -(level_code&1);
4223 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4226 //remaining coefficients have suffix_length > 0
4227 for(;i<total_coeff;i++) {
4228 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4229 prefix = get_level_prefix(gb);
4231 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4233 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4235 level_code += (1<<(prefix-3))-4096;
4237 mask= -(level_code&1);
4238 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4239 if(level_code > suffix_limit[suffix_length])
4244 if(total_coeff == max_coeff)
4247 if(n == CHROMA_DC_BLOCK_INDEX)
4248 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4250 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4253 coeff_num = zeros_left + total_coeff - 1;
4254 j = scantable[coeff_num];
4256 block[j] = level[0];
4257 for(i=1;i<total_coeff;i++) {
4260 else if(zeros_left < 7){
4261 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4263 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4265 zeros_left -= run_before;
4266 coeff_num -= 1 + run_before;
4267 j= scantable[ coeff_num ];
4272 block[j] = (level[0] * qmul[j] + 32)>>6;
4273 for(i=1;i<total_coeff;i++) {
4276 else if(zeros_left < 7){
4277 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4279 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4281 zeros_left -= run_before;
4282 coeff_num -= 1 + run_before;
4283 j= scantable[ coeff_num ];
4285 block[j]= (level[i] * qmul[j] + 32)>>6;
4290 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4297 static void predict_field_decoding_flag(H264Context *h){
4298 MpegEncContext * const s = &h->s;
4299 const int mb_xy= h->mb_xy;
4300 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4301 ? s->current_picture.mb_type[mb_xy-1]
4302 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4303 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4305 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4309 * decodes a P_SKIP or B_SKIP macroblock
4311 static void decode_mb_skip(H264Context *h){
4312 MpegEncContext * const s = &h->s;
4313 const int mb_xy= h->mb_xy;
4316 memset(h->non_zero_count[mb_xy], 0, 16);
4317 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4320 mb_type|= MB_TYPE_INTERLACED;
4322 if( h->slice_type_nos == FF_B_TYPE )
4324 // just for fill_caches. pred_direct_motion will set the real mb_type
4325 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4327 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4328 pred_direct_motion(h, &mb_type);
4329 mb_type|= MB_TYPE_SKIP;
4334 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4336 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4337 pred_pskip_motion(h, &mx, &my);
4338 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4339 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4342 write_back_motion(h, mb_type);
4343 s->current_picture.mb_type[mb_xy]= mb_type;
4344 s->current_picture.qscale_table[mb_xy]= s->qscale;
4345 h->slice_table[ mb_xy ]= h->slice_num;
4346 h->prev_mb_skipped= 1;
4350 * decodes a macroblock
4351 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4353 static int decode_mb_cavlc(H264Context *h){
4354 MpegEncContext * const s = &h->s;
4356 int partition_count;
4357 unsigned int mb_type, cbp;
4358 int dct8x8_allowed= h->pps.transform_8x8_mode;
4360 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4362 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4364 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4365 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4367 if(h->slice_type_nos != FF_I_TYPE){
4368 if(s->mb_skip_run==-1)
4369 s->mb_skip_run= get_ue_golomb(&s->gb);
4371 if (s->mb_skip_run--) {
4372 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4373 if(s->mb_skip_run==0)
4374 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4376 predict_field_decoding_flag(h);
4383 if( (s->mb_y&1) == 0 )
4384 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4386 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4388 h->prev_mb_skipped= 0;
4390 mb_type= get_ue_golomb(&s->gb);
4391 if(h->slice_type_nos == FF_B_TYPE){
4393 partition_count= b_mb_type_info[mb_type].partition_count;
4394 mb_type= b_mb_type_info[mb_type].type;
4397 goto decode_intra_mb;
4399 }else if(h->slice_type_nos == FF_P_TYPE){
4401 partition_count= p_mb_type_info[mb_type].partition_count;
4402 mb_type= p_mb_type_info[mb_type].type;
4405 goto decode_intra_mb;
4408 assert(h->slice_type_nos == FF_I_TYPE);
4409 if(h->slice_type == FF_SI_TYPE && mb_type)
4413 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4417 cbp= i_mb_type_info[mb_type].cbp;
4418 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4419 mb_type= i_mb_type_info[mb_type].type;
4423 mb_type |= MB_TYPE_INTERLACED;
4425 h->slice_table[ mb_xy ]= h->slice_num;
4427 if(IS_INTRA_PCM(mb_type)){
4430 // We assume these blocks are very rare so we do not optimize it.
4431 align_get_bits(&s->gb);
4433 // The pixels are stored in the same order as levels in h->mb array.
4434 for(y=0; y<16; y++){
4435 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4436 for(x=0; x<16; x++){
4437 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4438 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4442 const int index= 256 + 4*(y&3) + 32*(y>>2);
4444 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4445 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4449 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4451 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4452 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4456 // In deblocking, the quantizer is 0
4457 s->current_picture.qscale_table[mb_xy]= 0;
4458 // All coeffs are present
4459 memset(h->non_zero_count[mb_xy], 16, 16);
4461 s->current_picture.mb_type[mb_xy]= mb_type;
4466 h->ref_count[0] <<= 1;
4467 h->ref_count[1] <<= 1;
4470 fill_caches(h, mb_type, 0);
4473 if(IS_INTRA(mb_type)){
4475 // init_top_left_availability(h);
4476 if(IS_INTRA4x4(mb_type)){
4479 if(dct8x8_allowed && get_bits1(&s->gb)){
4480 mb_type |= MB_TYPE_8x8DCT;
4484 // fill_intra4x4_pred_table(h);
4485 for(i=0; i<16; i+=di){
4486 int mode= pred_intra_mode(h, i);
4488 if(!get_bits1(&s->gb)){
4489 const int rem_mode= get_bits(&s->gb, 3);
4490 mode = rem_mode + (rem_mode >= mode);
4494 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4496 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4498 write_back_intra_pred_mode(h);
4499 if( check_intra4x4_pred_mode(h) < 0)
4502 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4503 if(h->intra16x16_pred_mode < 0)
4507 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4510 h->chroma_pred_mode= pred_mode;
4511 }else if(partition_count==4){
4512 int i, j, sub_partition_count[4], list, ref[2][4];
4514 if(h->slice_type_nos == FF_B_TYPE){
4516 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4517 if(h->sub_mb_type[i] >=13){
4518 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4521 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4522 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4524 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4525 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4526 pred_direct_motion(h, &mb_type);
4527 h->ref_cache[0][scan8[4]] =
4528 h->ref_cache[1][scan8[4]] =
4529 h->ref_cache[0][scan8[12]] =
4530 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4533 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4535 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4536 if(h->sub_mb_type[i] >=4){
4537 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4540 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4541 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4545 for(list=0; list<h->list_count; list++){
4546 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4548 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4549 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4550 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4552 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4564 dct8x8_allowed = get_dct8x8_allowed(h);
4566 for(list=0; list<h->list_count; list++){
4568 if(IS_DIRECT(h->sub_mb_type[i])) {
4569 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4572 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4573 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4575 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4576 const int sub_mb_type= h->sub_mb_type[i];
4577 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4578 for(j=0; j<sub_partition_count[i]; j++){
4580 const int index= 4*i + block_width*j;
4581 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4582 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4583 mx += get_se_golomb(&s->gb);
4584 my += get_se_golomb(&s->gb);
4585 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4587 if(IS_SUB_8X8(sub_mb_type)){
4589 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4591 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4592 }else if(IS_SUB_8X4(sub_mb_type)){
4593 mv_cache[ 1 ][0]= mx;
4594 mv_cache[ 1 ][1]= my;
4595 }else if(IS_SUB_4X8(sub_mb_type)){
4596 mv_cache[ 8 ][0]= mx;
4597 mv_cache[ 8 ][1]= my;
4599 mv_cache[ 0 ][0]= mx;
4600 mv_cache[ 0 ][1]= my;
4603 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4609 }else if(IS_DIRECT(mb_type)){
4610 pred_direct_motion(h, &mb_type);
4611 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4613 int list, mx, my, i;
4614 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4615 if(IS_16X16(mb_type)){
4616 for(list=0; list<h->list_count; list++){
4618 if(IS_DIR(mb_type, 0, list)){
4619 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4620 if(val >= h->ref_count[list]){
4621 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4625 val= LIST_NOT_USED&0xFF;
4626 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4628 for(list=0; list<h->list_count; list++){
4630 if(IS_DIR(mb_type, 0, list)){
4631 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4632 mx += get_se_golomb(&s->gb);
4633 my += get_se_golomb(&s->gb);
4634 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4636 val= pack16to32(mx,my);
4639 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4642 else if(IS_16X8(mb_type)){
4643 for(list=0; list<h->list_count; list++){
4646 if(IS_DIR(mb_type, i, list)){
4647 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4648 if(val >= h->ref_count[list]){
4649 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4653 val= LIST_NOT_USED&0xFF;
4654 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4657 for(list=0; list<h->list_count; list++){
4660 if(IS_DIR(mb_type, i, list)){
4661 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4662 mx += get_se_golomb(&s->gb);
4663 my += get_se_golomb(&s->gb);
4664 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4666 val= pack16to32(mx,my);
4669 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4673 assert(IS_8X16(mb_type));
4674 for(list=0; list<h->list_count; list++){
4677 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4678 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4679 if(val >= h->ref_count[list]){
4680 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4684 val= LIST_NOT_USED&0xFF;
4685 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4688 for(list=0; list<h->list_count; list++){
4691 if(IS_DIR(mb_type, i, list)){
4692 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4693 mx += get_se_golomb(&s->gb);
4694 my += get_se_golomb(&s->gb);
4695 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4697 val= pack16to32(mx,my);
4700 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4706 if(IS_INTER(mb_type))
4707 write_back_motion(h, mb_type);
4709 if(!IS_INTRA16x16(mb_type)){
4710 cbp= get_ue_golomb(&s->gb);
4712 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4716 if(IS_INTRA4x4(mb_type))
4717 cbp= golomb_to_intra4x4_cbp[cbp];
4719 cbp= golomb_to_inter_cbp[cbp];
4723 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4724 if(get_bits1(&s->gb)){
4725 mb_type |= MB_TYPE_8x8DCT;
4726 h->cbp_table[mb_xy]= cbp;
4729 s->current_picture.mb_type[mb_xy]= mb_type;
4731 if(cbp || IS_INTRA16x16(mb_type)){
4732 int i8x8, i4x4, chroma_idx;
4734 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4735 const uint8_t *scan, *scan8x8, *dc_scan;
4737 // fill_non_zero_count_cache(h);
4739 if(IS_INTERLACED(mb_type)){
4740 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4741 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4742 dc_scan= luma_dc_field_scan;
4744 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4745 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4746 dc_scan= luma_dc_zigzag_scan;
4749 dquant= get_se_golomb(&s->gb);
4751 if( dquant > 25 || dquant < -26 ){
4752 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4756 s->qscale += dquant;
4757 if(((unsigned)s->qscale) > 51){
4758 if(s->qscale<0) s->qscale+= 52;
4759 else s->qscale-= 52;
4762 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4763 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4764 if(IS_INTRA16x16(mb_type)){
4765 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4766 return -1; //FIXME continue if partitioned and other return -1 too
4769 assert((cbp&15) == 0 || (cbp&15) == 15);
4772 for(i8x8=0; i8x8<4; i8x8++){
4773 for(i4x4=0; i4x4<4; i4x4++){
4774 const int index= i4x4 + 4*i8x8;
4775 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4781 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4784 for(i8x8=0; i8x8<4; i8x8++){
4785 if(cbp & (1<<i8x8)){
4786 if(IS_8x8DCT(mb_type)){
4787 DCTELEM *buf = &h->mb[64*i8x8];
4789 for(i4x4=0; i4x4<4; i4x4++){
4790 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4791 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4794 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4795 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4797 for(i4x4=0; i4x4<4; i4x4++){
4798 const int index= i4x4 + 4*i8x8;
4800 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4806 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4807 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4813 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4814 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4820 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4821 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4822 for(i4x4=0; i4x4<4; i4x4++){
4823 const int index= 16 + 4*chroma_idx + i4x4;
4824 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4830 uint8_t * const nnz= &h->non_zero_count_cache[0];
4831 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4832 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4835 uint8_t * const nnz= &h->non_zero_count_cache[0];
4836 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4837 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4838 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4840 s->current_picture.qscale_table[mb_xy]= s->qscale;
4841 write_back_non_zero_count(h);
4844 h->ref_count[0] >>= 1;
4845 h->ref_count[1] >>= 1;
4851 static int decode_cabac_field_decoding_flag(H264Context *h) {
4852 MpegEncContext * const s = &h->s;
4853 const int mb_x = s->mb_x;
4854 const int mb_y = s->mb_y & ~1;
4855 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4856 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4858 unsigned int ctx = 0;
4860 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4863 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4867 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4870 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4871 uint8_t *state= &h->cabac_state[ctx_base];
4875 MpegEncContext * const s = &h->s;
4876 const int mba_xy = h->left_mb_xy[0];
4877 const int mbb_xy = h->top_mb_xy;
4879 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4881 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4883 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4884 return 0; /* I4x4 */
4887 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4888 return 0; /* I4x4 */
4891 if( get_cabac_terminate( &h->cabac ) )
4892 return 25; /* PCM */
4894 mb_type = 1; /* I16x16 */
4895 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4896 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4897 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4898 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4899 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4903 static int decode_cabac_mb_type( H264Context *h ) {
4904 MpegEncContext * const s = &h->s;
4906 if( h->slice_type_nos == FF_I_TYPE ) {
4907 return decode_cabac_intra_mb_type(h, 3, 1);
4908 } else if( h->slice_type_nos == FF_P_TYPE ) {
4909 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4911 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4912 /* P_L0_D16x16, P_8x8 */
4913 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4915 /* P_L0_D8x16, P_L0_D16x8 */
4916 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4919 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4921 } else if( h->slice_type_nos == FF_B_TYPE ) {
4922 const int mba_xy = h->left_mb_xy[0];
4923 const int mbb_xy = h->top_mb_xy;
4927 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4929 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4932 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4933 return 0; /* B_Direct_16x16 */
4935 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4936 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4939 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4940 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4941 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4942 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4944 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4945 else if( bits == 13 ) {
4946 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4947 } else if( bits == 14 )
4948 return 11; /* B_L1_L0_8x16 */
4949 else if( bits == 15 )
4950 return 22; /* B_8x8 */
4952 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4953 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4955 /* TODO SI/SP frames? */
4960 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4961 MpegEncContext * const s = &h->s;
4965 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4966 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4969 && h->slice_table[mba_xy] == h->slice_num
4970 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4971 mba_xy += s->mb_stride;
4973 mbb_xy = mb_xy - s->mb_stride;
4975 && h->slice_table[mbb_xy] == h->slice_num
4976 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4977 mbb_xy -= s->mb_stride;
4979 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4981 int mb_xy = h->mb_xy;
4983 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4986 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4988 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4991 if( h->slice_type_nos == FF_B_TYPE )
4993 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4996 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4999 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5002 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5003 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5004 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5006 if( mode >= pred_mode )
5012 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5013 const int mba_xy = h->left_mb_xy[0];
5014 const int mbb_xy = h->top_mb_xy;
5018 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5019 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5022 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5025 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5028 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5030 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5036 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5037 int cbp_b, cbp_a, ctx, cbp = 0;
5039 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5040 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5042 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5043 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5044 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5045 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5046 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5047 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5048 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5049 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5052 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5056 cbp_a = (h->left_cbp>>4)&0x03;
5057 cbp_b = (h-> top_cbp>>4)&0x03;
5060 if( cbp_a > 0 ) ctx++;
5061 if( cbp_b > 0 ) ctx += 2;
5062 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5066 if( cbp_a == 2 ) ctx++;
5067 if( cbp_b == 2 ) ctx += 2;
5068 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5070 static int decode_cabac_mb_dqp( H264Context *h) {
5074 if( h->last_qscale_diff != 0 )
5077 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5083 if(val > 102) //prevent infinite loop
5090 return -(val + 1)/2;
5092 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5093 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5095 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5097 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5101 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5103 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5104 return 0; /* B_Direct_8x8 */
5105 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5106 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5108 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5109 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5110 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5113 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5114 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5118 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5119 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5122 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5123 int refa = h->ref_cache[list][scan8[n] - 1];
5124 int refb = h->ref_cache[list][scan8[n] - 8];
5128 if( h->slice_type_nos == FF_B_TYPE) {
5129 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5131 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5140 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5146 if(ref >= 32 /*h->ref_list[list]*/){
5147 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5148 return 0; //FIXME we should return -1 and check the return everywhere
5154 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5155 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5156 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5157 int ctxbase = (l == 0) ? 40 : 47;
5162 else if( amvd > 32 )
5167 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5172 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5180 while( get_cabac_bypass( &h->cabac ) ) {
5184 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5189 if( get_cabac_bypass( &h->cabac ) )
5193 return get_cabac_bypass_sign( &h->cabac, -mvd );
5196 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5202 nza = h->left_cbp&0x100;
5203 nzb = h-> top_cbp&0x100;
5205 nza = (h->left_cbp>>(6+idx))&0x01;
5206 nzb = (h-> top_cbp>>(6+idx))&0x01;
5210 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5211 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5213 assert(cat == 1 || cat == 2);
5214 nza = h->non_zero_count_cache[scan8[idx] - 1];
5215 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5225 return ctx + 4 * cat;
5228 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5229 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5230 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5231 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5232 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5235 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5236 static const int significant_coeff_flag_offset[2][6] = {
5237 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5238 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5240 static const int last_coeff_flag_offset[2][6] = {
5241 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5242 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5244 static const int coeff_abs_level_m1_offset[6] = {
5245 227+0, 227+10, 227+20, 227+30, 227+39, 426
5247 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5248 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5249 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5250 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5251 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5252 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5253 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5254 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5255 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5257 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5258 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5259 * map node ctx => cabac ctx for level=1 */
5260 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5261 /* map node ctx => cabac ctx for level>1 */
5262 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5263 static const uint8_t coeff_abs_level_transition[2][8] = {
5264 /* update node ctx after decoding a level=1 */
5265 { 1, 2, 3, 3, 4, 5, 6, 7 },
5266 /* update node ctx after decoding a level>1 */
5267 { 4, 4, 4, 4, 5, 6, 7, 7 }
5273 int coeff_count = 0;
5276 uint8_t *significant_coeff_ctx_base;
5277 uint8_t *last_coeff_ctx_base;
5278 uint8_t *abs_level_m1_ctx_base;
5281 #define CABAC_ON_STACK
5283 #ifdef CABAC_ON_STACK
5286 cc.range = h->cabac.range;
5287 cc.low = h->cabac.low;
5288 cc.bytestream= h->cabac.bytestream;
5290 #define CC &h->cabac
5294 /* cat: 0-> DC 16x16 n = 0
5295 * 1-> AC 16x16 n = luma4x4idx
5296 * 2-> Luma4x4 n = luma4x4idx
5297 * 3-> DC Chroma n = iCbCr
5298 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5299 * 5-> Luma8x8 n = 4 * luma8x8idx
5302 /* read coded block flag */
5303 if( is_dc || cat != 5 ) {
5304 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5307 h->non_zero_count_cache[scan8[16+n]] = 0;
5309 h->non_zero_count_cache[scan8[n]] = 0;
5312 #ifdef CABAC_ON_STACK
5313 h->cabac.range = cc.range ;
5314 h->cabac.low = cc.low ;
5315 h->cabac.bytestream= cc.bytestream;
5321 significant_coeff_ctx_base = h->cabac_state
5322 + significant_coeff_flag_offset[MB_FIELD][cat];
5323 last_coeff_ctx_base = h->cabac_state
5324 + last_coeff_flag_offset[MB_FIELD][cat];
5325 abs_level_m1_ctx_base = h->cabac_state
5326 + coeff_abs_level_m1_offset[cat];
5328 if( !is_dc && cat == 5 ) {
5329 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5330 for(last= 0; last < coefs; last++) { \
5331 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5332 if( get_cabac( CC, sig_ctx )) { \
5333 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5334 index[coeff_count++] = last; \
5335 if( get_cabac( CC, last_ctx ) ) { \
5341 if( last == max_coeff -1 ) {\
5342 index[coeff_count++] = last;\
5344 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5345 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5346 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5348 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5350 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5352 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5355 assert(coeff_count > 0);
5359 h->cbp_table[h->mb_xy] |= 0x100;
5361 h->cbp_table[h->mb_xy] |= 0x40 << n;
5364 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5366 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5368 assert( cat == 1 || cat == 2 );
5369 h->non_zero_count_cache[scan8[n]] = coeff_count;
5373 while( coeff_count-- ) {
5374 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5376 int j= scantable[index[coeff_count]];
5378 if( get_cabac( CC, ctx ) == 0 ) {
5379 node_ctx = coeff_abs_level_transition[0][node_ctx];
5381 block[j] = get_cabac_bypass_sign( CC, -1);
5383 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5387 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5388 node_ctx = coeff_abs_level_transition[1][node_ctx];
5390 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5394 if( coeff_abs >= 15 ) {
5396 while( get_cabac_bypass( CC ) ) {
5402 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5408 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5410 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5414 #ifdef CABAC_ON_STACK
5415 h->cabac.range = cc.range ;
5416 h->cabac.low = cc.low ;
5417 h->cabac.bytestream= cc.bytestream;
5422 #ifndef CONFIG_SMALL
5423 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5424 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5427 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5428 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5432 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5434 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5436 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5437 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5441 static inline void compute_mb_neighbors(H264Context *h)
5443 MpegEncContext * const s = &h->s;
5444 const int mb_xy = h->mb_xy;
5445 h->top_mb_xy = mb_xy - s->mb_stride;
5446 h->left_mb_xy[0] = mb_xy - 1;
5448 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5449 const int top_pair_xy = pair_xy - s->mb_stride;
5450 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5451 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5452 const int curr_mb_frame_flag = !MB_FIELD;
5453 const int bottom = (s->mb_y & 1);
5455 ? !curr_mb_frame_flag // bottom macroblock
5456 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5458 h->top_mb_xy -= s->mb_stride;
5460 if (left_mb_frame_flag != curr_mb_frame_flag) {
5461 h->left_mb_xy[0] = pair_xy - 1;
5463 } else if (FIELD_PICTURE) {
5464 h->top_mb_xy -= s->mb_stride;
5470 * decodes a macroblock
5471 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5473 static int decode_mb_cabac(H264Context *h) {
5474 MpegEncContext * const s = &h->s;
5476 int mb_type, partition_count, cbp = 0;
5477 int dct8x8_allowed= h->pps.transform_8x8_mode;
5479 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5481 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5483 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5484 if( h->slice_type_nos != FF_I_TYPE ) {
5486 /* a skipped mb needs the aff flag from the following mb */
5487 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5488 predict_field_decoding_flag(h);
5489 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5490 skip = h->next_mb_skipped;
5492 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5493 /* read skip flags */
5495 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5496 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5497 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5498 if(h->next_mb_skipped)
5499 predict_field_decoding_flag(h);
5501 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5506 h->cbp_table[mb_xy] = 0;
5507 h->chroma_pred_mode_table[mb_xy] = 0;
5508 h->last_qscale_diff = 0;
5515 if( (s->mb_y&1) == 0 )
5517 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5519 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5521 h->prev_mb_skipped = 0;
5523 compute_mb_neighbors(h);
5524 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5525 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5529 if( h->slice_type_nos == FF_B_TYPE ) {
5531 partition_count= b_mb_type_info[mb_type].partition_count;
5532 mb_type= b_mb_type_info[mb_type].type;
5535 goto decode_intra_mb;
5537 } else if( h->slice_type_nos == FF_P_TYPE ) {
5539 partition_count= p_mb_type_info[mb_type].partition_count;
5540 mb_type= p_mb_type_info[mb_type].type;
5543 goto decode_intra_mb;
5546 if(h->slice_type == FF_SI_TYPE && mb_type)
5548 assert(h->slice_type_nos == FF_I_TYPE);
5550 partition_count = 0;
5551 cbp= i_mb_type_info[mb_type].cbp;
5552 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5553 mb_type= i_mb_type_info[mb_type].type;
5556 mb_type |= MB_TYPE_INTERLACED;
5558 h->slice_table[ mb_xy ]= h->slice_num;
5560 if(IS_INTRA_PCM(mb_type)) {
5564 // We assume these blocks are very rare so we do not optimize it.
5565 // FIXME The two following lines get the bitstream position in the cabac
5566 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5567 ptr= h->cabac.bytestream;
5568 if(h->cabac.low&0x1) ptr--;
5570 if(h->cabac.low&0x1FF) ptr--;
5573 // The pixels are stored in the same order as levels in h->mb array.
5574 for(y=0; y<16; y++){
5575 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5576 for(x=0; x<16; x++){
5577 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5578 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5582 const int index= 256 + 4*(y&3) + 32*(y>>2);
5584 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5585 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5589 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5591 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5592 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5596 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5598 // All blocks are present
5599 h->cbp_table[mb_xy] = 0x1ef;
5600 h->chroma_pred_mode_table[mb_xy] = 0;
5601 // In deblocking, the quantizer is 0
5602 s->current_picture.qscale_table[mb_xy]= 0;
5603 // All coeffs are present
5604 memset(h->non_zero_count[mb_xy], 16, 16);
5605 s->current_picture.mb_type[mb_xy]= mb_type;
5606 h->last_qscale_diff = 0;
5611 h->ref_count[0] <<= 1;
5612 h->ref_count[1] <<= 1;
5615 fill_caches(h, mb_type, 0);
5617 if( IS_INTRA( mb_type ) ) {
5619 if( IS_INTRA4x4( mb_type ) ) {
5620 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5621 mb_type |= MB_TYPE_8x8DCT;
5622 for( i = 0; i < 16; i+=4 ) {
5623 int pred = pred_intra_mode( h, i );
5624 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5625 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5628 for( i = 0; i < 16; i++ ) {
5629 int pred = pred_intra_mode( h, i );
5630 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5632 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5635 write_back_intra_pred_mode(h);
5636 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5638 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5639 if( h->intra16x16_pred_mode < 0 ) return -1;
5641 h->chroma_pred_mode_table[mb_xy] =
5642 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5644 pred_mode= check_intra_pred_mode( h, pred_mode );
5645 if( pred_mode < 0 ) return -1;
5646 h->chroma_pred_mode= pred_mode;
5647 } else if( partition_count == 4 ) {
5648 int i, j, sub_partition_count[4], list, ref[2][4];
5650 if( h->slice_type_nos == FF_B_TYPE ) {
5651 for( i = 0; i < 4; i++ ) {
5652 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5653 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5654 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5656 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5657 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5658 pred_direct_motion(h, &mb_type);
5659 h->ref_cache[0][scan8[4]] =
5660 h->ref_cache[1][scan8[4]] =
5661 h->ref_cache[0][scan8[12]] =
5662 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5663 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5664 for( i = 0; i < 4; i++ )
5665 if( IS_DIRECT(h->sub_mb_type[i]) )
5666 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5670 for( i = 0; i < 4; i++ ) {
5671 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5672 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5673 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5677 for( list = 0; list < h->list_count; list++ ) {
5678 for( i = 0; i < 4; i++ ) {
5679 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5680 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5681 if( h->ref_count[list] > 1 )
5682 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5688 h->ref_cache[list][ scan8[4*i]+1 ]=
5689 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5694 dct8x8_allowed = get_dct8x8_allowed(h);
5696 for(list=0; list<h->list_count; list++){
5698 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5699 if(IS_DIRECT(h->sub_mb_type[i])){
5700 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5704 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5705 const int sub_mb_type= h->sub_mb_type[i];
5706 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5707 for(j=0; j<sub_partition_count[i]; j++){
5710 const int index= 4*i + block_width*j;
5711 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5712 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5713 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5715 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5716 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5717 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5719 if(IS_SUB_8X8(sub_mb_type)){
5721 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5723 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5726 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5728 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5729 }else if(IS_SUB_8X4(sub_mb_type)){
5730 mv_cache[ 1 ][0]= mx;
5731 mv_cache[ 1 ][1]= my;
5733 mvd_cache[ 1 ][0]= mx - mpx;
5734 mvd_cache[ 1 ][1]= my - mpy;
5735 }else if(IS_SUB_4X8(sub_mb_type)){
5736 mv_cache[ 8 ][0]= mx;
5737 mv_cache[ 8 ][1]= my;
5739 mvd_cache[ 8 ][0]= mx - mpx;
5740 mvd_cache[ 8 ][1]= my - mpy;
5742 mv_cache[ 0 ][0]= mx;
5743 mv_cache[ 0 ][1]= my;
5745 mvd_cache[ 0 ][0]= mx - mpx;
5746 mvd_cache[ 0 ][1]= my - mpy;
5749 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5750 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5751 p[0] = p[1] = p[8] = p[9] = 0;
5752 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5756 } else if( IS_DIRECT(mb_type) ) {
5757 pred_direct_motion(h, &mb_type);
5758 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5759 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5760 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5762 int list, mx, my, i, mpx, mpy;
5763 if(IS_16X16(mb_type)){
5764 for(list=0; list<h->list_count; list++){
5765 if(IS_DIR(mb_type, 0, list)){
5766 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5767 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5769 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5771 for(list=0; list<h->list_count; list++){
5772 if(IS_DIR(mb_type, 0, list)){
5773 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5775 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5776 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5777 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5779 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5780 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5782 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5785 else if(IS_16X8(mb_type)){
5786 for(list=0; list<h->list_count; list++){
5788 if(IS_DIR(mb_type, i, list)){
5789 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5790 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5792 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5795 for(list=0; list<h->list_count; list++){
5797 if(IS_DIR(mb_type, i, list)){
5798 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5799 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5800 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5801 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5803 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5804 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5806 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5807 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5812 assert(IS_8X16(mb_type));
5813 for(list=0; list<h->list_count; list++){
5815 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5816 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5817 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5819 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5822 for(list=0; list<h->list_count; list++){
5824 if(IS_DIR(mb_type, i, list)){
5825 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5826 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5827 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5829 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5830 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5831 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5833 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5834 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5841 if( IS_INTER( mb_type ) ) {
5842 h->chroma_pred_mode_table[mb_xy] = 0;
5843 write_back_motion( h, mb_type );
5846 if( !IS_INTRA16x16( mb_type ) ) {
5847 cbp = decode_cabac_mb_cbp_luma( h );
5848 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5851 h->cbp_table[mb_xy] = h->cbp = cbp;
5853 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5854 if( decode_cabac_mb_transform_size( h ) )
5855 mb_type |= MB_TYPE_8x8DCT;
5857 s->current_picture.mb_type[mb_xy]= mb_type;
5859 if( cbp || IS_INTRA16x16( mb_type ) ) {
5860 const uint8_t *scan, *scan8x8, *dc_scan;
5861 const uint32_t *qmul;
5864 if(IS_INTERLACED(mb_type)){
5865 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5866 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5867 dc_scan= luma_dc_field_scan;
5869 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5870 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5871 dc_scan= luma_dc_zigzag_scan;
5874 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5875 if( dqp == INT_MIN ){
5876 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5880 if(((unsigned)s->qscale) > 51){
5881 if(s->qscale<0) s->qscale+= 52;
5882 else s->qscale-= 52;
5884 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5885 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5887 if( IS_INTRA16x16( mb_type ) ) {
5889 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5890 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5893 qmul = h->dequant4_coeff[0][s->qscale];
5894 for( i = 0; i < 16; i++ ) {
5895 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5896 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5899 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5903 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5904 if( cbp & (1<<i8x8) ) {
5905 if( IS_8x8DCT(mb_type) ) {
5906 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5907 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5909 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5910 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5911 const int index = 4*i8x8 + i4x4;
5912 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5914 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5915 //STOP_TIMER("decode_residual")
5919 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5920 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5927 for( c = 0; c < 2; c++ ) {
5928 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5929 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5935 for( c = 0; c < 2; c++ ) {
5936 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5937 for( i = 0; i < 4; i++ ) {
5938 const int index = 16 + 4 * c + i;
5939 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5940 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5944 uint8_t * const nnz= &h->non_zero_count_cache[0];
5945 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5946 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5949 uint8_t * const nnz= &h->non_zero_count_cache[0];
5950 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5951 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5952 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5953 h->last_qscale_diff = 0;
5956 s->current_picture.qscale_table[mb_xy]= s->qscale;
5957 write_back_non_zero_count(h);
5960 h->ref_count[0] >>= 1;
5961 h->ref_count[1] >>= 1;
5968 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5970 const int index_a = qp + h->slice_alpha_c0_offset;
5971 const int alpha = (alpha_table+52)[index_a];
5972 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5977 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5978 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5980 /* 16px edge length, because bS=4 is triggered by being at
5981 * the edge of an intra MB, so all 4 bS are the same */
5982 for( d = 0; d < 16; d++ ) {
5983 const int p0 = pix[-1];
5984 const int p1 = pix[-2];
5985 const int p2 = pix[-3];
5987 const int q0 = pix[0];
5988 const int q1 = pix[1];
5989 const int q2 = pix[2];
5991 if( FFABS( p0 - q0 ) < alpha &&
5992 FFABS( p1 - p0 ) < beta &&
5993 FFABS( q1 - q0 ) < beta ) {
5995 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5996 if( FFABS( p2 - p0 ) < beta)
5998 const int p3 = pix[-4];
6000 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6001 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6002 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6005 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6007 if( FFABS( q2 - q0 ) < beta)
6009 const int q3 = pix[3];
6011 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6012 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6013 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6016 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6020 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6021 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6023 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6029 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6031 const int index_a = qp + h->slice_alpha_c0_offset;
6032 const int alpha = (alpha_table+52)[index_a];
6033 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6038 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6039 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6041 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6045 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6047 for( i = 0; i < 16; i++, pix += stride) {
6053 int bS_index = (i >> 1);
6056 bS_index |= (i & 1);
6059 if( bS[bS_index] == 0 ) {
6063 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6064 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6065 alpha = (alpha_table+52)[index_a];
6066 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6068 if( bS[bS_index] < 4 ) {
6069 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6070 const int p0 = pix[-1];
6071 const int p1 = pix[-2];
6072 const int p2 = pix[-3];
6073 const int q0 = pix[0];
6074 const int q1 = pix[1];
6075 const int q2 = pix[2];
6077 if( FFABS( p0 - q0 ) < alpha &&
6078 FFABS( p1 - p0 ) < beta &&
6079 FFABS( q1 - q0 ) < beta ) {
6083 if( FFABS( p2 - p0 ) < beta ) {
6084 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6087 if( FFABS( q2 - q0 ) < beta ) {
6088 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6092 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6093 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6094 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6095 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6098 const int p0 = pix[-1];
6099 const int p1 = pix[-2];
6100 const int p2 = pix[-3];
6102 const int q0 = pix[0];
6103 const int q1 = pix[1];
6104 const int q2 = pix[2];
6106 if( FFABS( p0 - q0 ) < alpha &&
6107 FFABS( p1 - p0 ) < beta &&
6108 FFABS( q1 - q0 ) < beta ) {
6110 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6111 if( FFABS( p2 - p0 ) < beta)
6113 const int p3 = pix[-4];
6115 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6116 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6117 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6120 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6122 if( FFABS( q2 - q0 ) < beta)
6124 const int q3 = pix[3];
6126 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6127 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6128 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6131 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6135 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6136 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6138 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6143 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6145 for( i = 0; i < 8; i++, pix += stride) {
6153 if( bS[bS_index] == 0 ) {
6157 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6158 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6159 alpha = (alpha_table+52)[index_a];
6160 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6162 if( bS[bS_index] < 4 ) {
6163 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6164 const int p0 = pix[-1];
6165 const int p1 = pix[-2];
6166 const int q0 = pix[0];
6167 const int q1 = pix[1];
6169 if( FFABS( p0 - q0 ) < alpha &&
6170 FFABS( p1 - p0 ) < beta &&
6171 FFABS( q1 - q0 ) < beta ) {
6172 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6174 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6175 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6176 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6179 const int p0 = pix[-1];
6180 const int p1 = pix[-2];
6181 const int q0 = pix[0];
6182 const int q1 = pix[1];
6184 if( FFABS( p0 - q0 ) < alpha &&
6185 FFABS( p1 - p0 ) < beta &&
6186 FFABS( q1 - q0 ) < beta ) {
6188 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6189 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6190 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6196 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6198 const int index_a = qp + h->slice_alpha_c0_offset;
6199 const int alpha = (alpha_table+52)[index_a];
6200 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6201 const int pix_next = stride;
6206 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6207 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6209 /* 16px edge length, see filter_mb_edgev */
6210 for( d = 0; d < 16; d++ ) {
6211 const int p0 = pix[-1*pix_next];
6212 const int p1 = pix[-2*pix_next];
6213 const int p2 = pix[-3*pix_next];
6214 const int q0 = pix[0];
6215 const int q1 = pix[1*pix_next];
6216 const int q2 = pix[2*pix_next];
6218 if( FFABS( p0 - q0 ) < alpha &&
6219 FFABS( p1 - p0 ) < beta &&
6220 FFABS( q1 - q0 ) < beta ) {
6222 const int p3 = pix[-4*pix_next];
6223 const int q3 = pix[ 3*pix_next];
6225 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6226 if( FFABS( p2 - p0 ) < beta) {
6228 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6229 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6230 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6233 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6235 if( FFABS( q2 - q0 ) < beta) {
6237 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6238 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6239 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6242 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6246 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6247 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6249 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6256 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6258 const int index_a = qp + h->slice_alpha_c0_offset;
6259 const int alpha = (alpha_table+52)[index_a];
6260 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6265 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6266 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6268 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6272 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6273 MpegEncContext * const s = &h->s;
6274 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6276 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6280 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6282 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6283 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6284 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6287 assert(!FRAME_MBAFF);
6289 mb_type = s->current_picture.mb_type[mb_xy];
6290 qp = s->current_picture.qscale_table[mb_xy];
6291 qp0 = s->current_picture.qscale_table[mb_xy-1];
6292 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6293 qpc = get_chroma_qp( h, 0, qp );
6294 qpc0 = get_chroma_qp( h, 0, qp0 );
6295 qpc1 = get_chroma_qp( h, 0, qp1 );
6296 qp0 = (qp + qp0 + 1) >> 1;
6297 qp1 = (qp + qp1 + 1) >> 1;
6298 qpc0 = (qpc + qpc0 + 1) >> 1;
6299 qpc1 = (qpc + qpc1 + 1) >> 1;
6300 qp_thresh = 15 - h->slice_alpha_c0_offset;
6301 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6302 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6305 if( IS_INTRA(mb_type) ) {
6306 int16_t bS4[4] = {4,4,4,4};
6307 int16_t bS3[4] = {3,3,3,3};
6308 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6309 if( IS_8x8DCT(mb_type) ) {
6310 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6311 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6312 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6313 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6315 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6316 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6317 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6318 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6319 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6320 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6321 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6322 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6324 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6325 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6326 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6327 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6328 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6329 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6330 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6331 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6334 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6335 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6337 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6339 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6341 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6342 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6343 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6344 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6346 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6347 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6348 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6349 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6351 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6352 bSv[0][0] = 0x0004000400040004ULL;
6353 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6354 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6356 #define FILTER(hv,dir,edge)\
6357 if(bSv[dir][edge]) {\
6358 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6360 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6361 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6367 } else if( IS_8x8DCT(mb_type) ) {
6386 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6387 MpegEncContext * const s = &h->s;
6388 const int mb_xy= mb_x + mb_y*s->mb_stride;
6389 const int mb_type = s->current_picture.mb_type[mb_xy];
6390 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6391 int first_vertical_edge_done = 0;
6394 //for sufficiently low qp, filtering wouldn't do anything
6395 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6397 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6398 int qp = s->current_picture.qscale_table[mb_xy];
6400 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6401 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6406 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6407 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6408 int top_type, left_type[2];
6409 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6410 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6411 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6413 if(IS_8x8DCT(top_type)){
6414 h->non_zero_count_cache[4+8*0]=
6415 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6416 h->non_zero_count_cache[6+8*0]=
6417 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6419 if(IS_8x8DCT(left_type[0])){
6420 h->non_zero_count_cache[3+8*1]=
6421 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6423 if(IS_8x8DCT(left_type[1])){
6424 h->non_zero_count_cache[3+8*3]=
6425 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6428 if(IS_8x8DCT(mb_type)){
6429 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6430 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6432 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6433 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6435 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6436 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6438 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6439 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6444 // left mb is in picture
6445 && h->slice_table[mb_xy-1] != 255
6446 // and current and left pair do not have the same interlaced type
6447 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6448 // and left mb is in the same slice if deblocking_filter == 2
6449 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6450 /* First vertical edge is different in MBAFF frames
6451 * There are 8 different bS to compute and 2 different Qp
6453 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6454 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6459 int mb_qp, mbn0_qp, mbn1_qp;
6461 first_vertical_edge_done = 1;
6463 if( IS_INTRA(mb_type) )
6464 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6466 for( i = 0; i < 8; i++ ) {
6467 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6469 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6471 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6472 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6473 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6480 mb_qp = s->current_picture.qscale_table[mb_xy];
6481 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6482 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6483 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6484 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6485 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6486 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6487 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6488 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6489 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6490 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6491 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6492 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6495 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6496 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6497 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6498 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6499 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6501 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6502 for( dir = 0; dir < 2; dir++ )
6505 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6506 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6507 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6508 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6509 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6511 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6512 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6513 // how often to recheck mv-based bS when iterating between edges
6514 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6515 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6516 // how often to recheck mv-based bS when iterating along each edge
6517 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6519 if (first_vertical_edge_done) {
6521 first_vertical_edge_done = 0;
6524 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6527 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6528 && !IS_INTERLACED(mb_type)
6529 && IS_INTERLACED(mbm_type)
6531 // This is a special case in the norm where the filtering must
6532 // be done twice (one each of the field) even if we are in a
6533 // frame macroblock.
6535 static const int nnz_idx[4] = {4,5,6,3};
6536 unsigned int tmp_linesize = 2 * linesize;
6537 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6538 int mbn_xy = mb_xy - 2 * s->mb_stride;
6543 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6544 if( IS_INTRA(mb_type) ||
6545 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6546 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6548 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6549 for( i = 0; i < 4; i++ ) {
6550 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6551 mbn_nnz[nnz_idx[i]] != 0 )
6557 // Do not use s->qscale as luma quantizer because it has not the same
6558 // value in IPCM macroblocks.
6559 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6560 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6561 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6562 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6563 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6564 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6565 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6566 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6573 for( edge = start; edge < edges; edge++ ) {
6574 /* mbn_xy: neighbor macroblock */
6575 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6576 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6577 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6581 if( (edge&1) && IS_8x8DCT(mb_type) )
6584 if( IS_INTRA(mb_type) ||
6585 IS_INTRA(mbn_type) ) {
6588 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6589 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6598 bS[0] = bS[1] = bS[2] = bS[3] = value;
6603 if( edge & mask_edge ) {
6604 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6607 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6608 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6611 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6612 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6613 int bn_idx= b_idx - (dir ? 8:1);
6616 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6617 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6618 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6619 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6622 if(h->slice_type_nos == FF_B_TYPE && v){
6624 for( l = 0; !v && l < 2; l++ ) {
6626 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6627 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6628 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6632 bS[0] = bS[1] = bS[2] = bS[3] = v;
6638 for( i = 0; i < 4; i++ ) {
6639 int x = dir == 0 ? edge : i;
6640 int y = dir == 0 ? i : edge;
6641 int b_idx= 8 + 4 + x + 8*y;
6642 int bn_idx= b_idx - (dir ? 8:1);
6644 if( h->non_zero_count_cache[b_idx] != 0 ||
6645 h->non_zero_count_cache[bn_idx] != 0 ) {
6651 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6652 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6653 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6654 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6660 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6662 for( l = 0; l < 2; l++ ) {
6664 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6665 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6666 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6675 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6680 // Do not use s->qscale as luma quantizer because it has not the same
6681 // value in IPCM macroblocks.
6682 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6683 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6684 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6685 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6687 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6688 if( (edge&1) == 0 ) {
6689 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6690 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6691 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6692 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6695 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6696 if( (edge&1) == 0 ) {
6697 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6698 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6699 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6700 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6707 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6708 MpegEncContext * const s = &h->s;
6709 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6713 if( h->pps.cabac ) {
6717 align_get_bits( &s->gb );
6720 ff_init_cabac_states( &h->cabac);
6721 ff_init_cabac_decoder( &h->cabac,
6722 s->gb.buffer + get_bits_count(&s->gb)/8,
6723 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6724 /* calculate pre-state */
6725 for( i= 0; i < 460; i++ ) {
6727 if( h->slice_type_nos == FF_I_TYPE )
6728 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6730 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6733 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6735 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6740 int ret = decode_mb_cabac(h);
6742 //STOP_TIMER("decode_mb_cabac")
6744 if(ret>=0) hl_decode_mb(h);
6746 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6749 if(ret>=0) ret = decode_mb_cabac(h);
6751 if(ret>=0) hl_decode_mb(h);
6754 eos = get_cabac_terminate( &h->cabac );
6756 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6757 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6758 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6762 if( ++s->mb_x >= s->mb_width ) {
6764 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6766 if(FIELD_OR_MBAFF_PICTURE) {
6771 if( eos || s->mb_y >= s->mb_height ) {
6772 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6773 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6780 int ret = decode_mb_cavlc(h);
6782 if(ret>=0) hl_decode_mb(h);
6784 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6786 ret = decode_mb_cavlc(h);
6788 if(ret>=0) hl_decode_mb(h);
6793 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6794 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6799 if(++s->mb_x >= s->mb_width){
6801 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6803 if(FIELD_OR_MBAFF_PICTURE) {
6806 if(s->mb_y >= s->mb_height){
6807 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6809 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6810 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6814 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6821 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6822 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6823 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6824 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6828 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6837 for(;s->mb_y < s->mb_height; s->mb_y++){
6838 for(;s->mb_x < s->mb_width; s->mb_x++){
6839 int ret= decode_mb(h);
6844 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6845 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6850 if(++s->mb_x >= s->mb_width){
6852 if(++s->mb_y >= s->mb_height){
6853 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6854 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6858 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6865 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6866 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6867 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6871 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6878 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6881 return -1; //not reached
6884 static int decode_unregistered_user_data(H264Context *h, int size){
6885 MpegEncContext * const s = &h->s;
6886 uint8_t user_data[16+256];
6892 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6893 user_data[i]= get_bits(&s->gb, 8);
6897 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6898 if(e==1 && build>=0)
6899 h->x264_build= build;
6901 if(s->avctx->debug & FF_DEBUG_BUGS)
6902 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6905 skip_bits(&s->gb, 8);
6910 static int decode_sei(H264Context *h){
6911 MpegEncContext * const s = &h->s;
6913 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6918 type+= show_bits(&s->gb, 8);
6919 }while(get_bits(&s->gb, 8) == 255);
6923 size+= show_bits(&s->gb, 8);
6924 }while(get_bits(&s->gb, 8) == 255);
6928 if(decode_unregistered_user_data(h, size) < 0)
6932 skip_bits(&s->gb, 8*size);
6935 //FIXME check bits here
6936 align_get_bits(&s->gb);
6942 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6943 MpegEncContext * const s = &h->s;
6945 cpb_count = get_ue_golomb(&s->gb) + 1;
6946 get_bits(&s->gb, 4); /* bit_rate_scale */
6947 get_bits(&s->gb, 4); /* cpb_size_scale */
6948 for(i=0; i<cpb_count; i++){
6949 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6950 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6951 get_bits1(&s->gb); /* cbr_flag */
6953 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6954 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6955 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6956 get_bits(&s->gb, 5); /* time_offset_length */
6959 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6960 MpegEncContext * const s = &h->s;
6961 int aspect_ratio_info_present_flag;
6962 unsigned int aspect_ratio_idc;
6963 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6965 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6967 if( aspect_ratio_info_present_flag ) {
6968 aspect_ratio_idc= get_bits(&s->gb, 8);
6969 if( aspect_ratio_idc == EXTENDED_SAR ) {
6970 sps->sar.num= get_bits(&s->gb, 16);
6971 sps->sar.den= get_bits(&s->gb, 16);
6972 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6973 sps->sar= pixel_aspect[aspect_ratio_idc];
6975 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6982 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6984 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6985 get_bits1(&s->gb); /* overscan_appropriate_flag */
6988 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6989 get_bits(&s->gb, 3); /* video_format */
6990 get_bits1(&s->gb); /* video_full_range_flag */
6991 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6992 get_bits(&s->gb, 8); /* colour_primaries */
6993 get_bits(&s->gb, 8); /* transfer_characteristics */
6994 get_bits(&s->gb, 8); /* matrix_coefficients */
6998 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6999 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7000 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7003 sps->timing_info_present_flag = get_bits1(&s->gb);
7004 if(sps->timing_info_present_flag){
7005 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7006 sps->time_scale = get_bits_long(&s->gb, 32);
7007 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7010 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7011 if(nal_hrd_parameters_present_flag)
7012 decode_hrd_parameters(h, sps);
7013 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7014 if(vcl_hrd_parameters_present_flag)
7015 decode_hrd_parameters(h, sps);
7016 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7017 get_bits1(&s->gb); /* low_delay_hrd_flag */
7018 get_bits1(&s->gb); /* pic_struct_present_flag */
7020 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7021 if(sps->bitstream_restriction_flag){
7022 unsigned int num_reorder_frames;
7023 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7024 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7025 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7026 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7027 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7028 num_reorder_frames= get_ue_golomb(&s->gb);
7029 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7031 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7032 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7036 sps->num_reorder_frames= num_reorder_frames;
7042 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7043 const uint8_t *jvt_list, const uint8_t *fallback_list){
7044 MpegEncContext * const s = &h->s;
7045 int i, last = 8, next = 8;
7046 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7047 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7048 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7050 for(i=0;i<size;i++){
7052 next = (last + get_se_golomb(&s->gb)) & 0xff;
7053 if(!i && !next){ /* matrix not written, we use the preset one */
7054 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7057 last = factors[scan[i]] = next ? next : last;
7061 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7062 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7063 MpegEncContext * const s = &h->s;
7064 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7065 const uint8_t *fallback[4] = {
7066 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7067 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7068 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7069 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7071 if(get_bits1(&s->gb)){
7072 sps->scaling_matrix_present |= is_sps;
7073 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7074 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7075 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7076 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7077 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7078 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7079 if(is_sps || pps->transform_8x8_mode){
7080 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7081 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7083 } else if(fallback_sps) {
7084 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7085 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7090 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7093 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7094 const size_t size, const char *name)
7097 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7102 vec[id] = av_mallocz(size);
7104 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7109 static inline int decode_seq_parameter_set(H264Context *h){
7110 MpegEncContext * const s = &h->s;
7111 int profile_idc, level_idc;
7112 unsigned int sps_id, tmp, mb_width, mb_height;
7116 profile_idc= get_bits(&s->gb, 8);
7117 get_bits1(&s->gb); //constraint_set0_flag
7118 get_bits1(&s->gb); //constraint_set1_flag
7119 get_bits1(&s->gb); //constraint_set2_flag
7120 get_bits1(&s->gb); //constraint_set3_flag
7121 get_bits(&s->gb, 4); // reserved
7122 level_idc= get_bits(&s->gb, 8);
7123 sps_id= get_ue_golomb(&s->gb);
7125 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7129 sps->profile_idc= profile_idc;
7130 sps->level_idc= level_idc;
7132 if(sps->profile_idc >= 100){ //high profile
7133 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7134 get_bits1(&s->gb); //residual_color_transform_flag
7135 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7136 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7137 sps->transform_bypass = get_bits1(&s->gb);
7138 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7140 sps->scaling_matrix_present = 0;
7142 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7143 sps->poc_type= get_ue_golomb(&s->gb);
7145 if(sps->poc_type == 0){ //FIXME #define
7146 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7147 } else if(sps->poc_type == 1){//FIXME #define
7148 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7149 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7150 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7151 tmp= get_ue_golomb(&s->gb);
7153 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7154 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7157 sps->poc_cycle_length= tmp;
7159 for(i=0; i<sps->poc_cycle_length; i++)
7160 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7161 }else if(sps->poc_type != 2){
7162 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7166 tmp= get_ue_golomb(&s->gb);
7167 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7168 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7171 sps->ref_frame_count= tmp;
7172 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7173 mb_width= get_ue_golomb(&s->gb) + 1;
7174 mb_height= get_ue_golomb(&s->gb) + 1;
7175 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7176 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7177 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7180 sps->mb_width = mb_width;
7181 sps->mb_height= mb_height;
7183 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7184 if(!sps->frame_mbs_only_flag)
7185 sps->mb_aff= get_bits1(&s->gb);
7189 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7191 #ifndef ALLOW_INTERLACE
7193 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7195 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7196 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7198 sps->crop= get_bits1(&s->gb);
7200 sps->crop_left = get_ue_golomb(&s->gb);
7201 sps->crop_right = get_ue_golomb(&s->gb);
7202 sps->crop_top = get_ue_golomb(&s->gb);
7203 sps->crop_bottom= get_ue_golomb(&s->gb);
7204 if(sps->crop_left || sps->crop_top){
7205 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7207 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7208 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7214 sps->crop_bottom= 0;
7217 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7218 if( sps->vui_parameters_present_flag )
7219 decode_vui_parameters(h, sps);
7221 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7222 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7223 sps_id, sps->profile_idc, sps->level_idc,
7225 sps->ref_frame_count,
7226 sps->mb_width, sps->mb_height,
7227 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7228 sps->direct_8x8_inference_flag ? "8B8" : "",
7229 sps->crop_left, sps->crop_right,
7230 sps->crop_top, sps->crop_bottom,
7231 sps->vui_parameters_present_flag ? "VUI" : ""
7238 build_qp_table(PPS *pps, int t, int index)
7241 for(i = 0; i < 52; i++)
7242 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7245 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7246 MpegEncContext * const s = &h->s;
7247 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7250 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7254 tmp= get_ue_golomb(&s->gb);
7255 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7256 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7261 pps->cabac= get_bits1(&s->gb);
7262 pps->pic_order_present= get_bits1(&s->gb);
7263 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7264 if(pps->slice_group_count > 1 ){
7265 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7266 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7267 switch(pps->mb_slice_group_map_type){
7270 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7271 | run_length[ i ] |1 |ue(v) |
7276 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7278 | top_left_mb[ i ] |1 |ue(v) |
7279 | bottom_right_mb[ i ] |1 |ue(v) |
7287 | slice_group_change_direction_flag |1 |u(1) |
7288 | slice_group_change_rate_minus1 |1 |ue(v) |
7293 | slice_group_id_cnt_minus1 |1 |ue(v) |
7294 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7296 | slice_group_id[ i ] |1 |u(v) |
7301 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7302 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7303 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7304 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7305 pps->ref_count[0]= pps->ref_count[1]= 1;
7309 pps->weighted_pred= get_bits1(&s->gb);
7310 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7311 pps->init_qp= get_se_golomb(&s->gb) + 26;
7312 pps->init_qs= get_se_golomb(&s->gb) + 26;
7313 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7314 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7315 pps->constrained_intra_pred= get_bits1(&s->gb);
7316 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7318 pps->transform_8x8_mode= 0;
7319 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7320 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7321 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7323 if(get_bits_count(&s->gb) < bit_length){
7324 pps->transform_8x8_mode= get_bits1(&s->gb);
7325 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7326 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7328 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7331 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7332 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7333 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7334 h->pps.chroma_qp_diff= 1;
7336 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7337 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7338 pps_id, pps->sps_id,
7339 pps->cabac ? "CABAC" : "CAVLC",
7340 pps->slice_group_count,
7341 pps->ref_count[0], pps->ref_count[1],
7342 pps->weighted_pred ? "weighted" : "",
7343 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7344 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7345 pps->constrained_intra_pred ? "CONSTR" : "",
7346 pps->redundant_pic_cnt_present ? "REDU" : "",
7347 pps->transform_8x8_mode ? "8x8DCT" : ""
7355 * Call decode_slice() for each context.
7357 * @param h h264 master context
7358 * @param context_count number of contexts to execute
7360 static void execute_decode_slices(H264Context *h, int context_count){
7361 MpegEncContext * const s = &h->s;
7362 AVCodecContext * const avctx= s->avctx;
7366 if(context_count == 1) {
7367 decode_slice(avctx, h);
7369 for(i = 1; i < context_count; i++) {
7370 hx = h->thread_context[i];
7371 hx->s.error_resilience = avctx->error_resilience;
7372 hx->s.error_count = 0;
7375 avctx->execute(avctx, (void *)decode_slice,
7376 (void **)h->thread_context, NULL, context_count);
7378 /* pull back stuff from slices to master context */
7379 hx = h->thread_context[context_count - 1];
7380 s->mb_x = hx->s.mb_x;
7381 s->mb_y = hx->s.mb_y;
7382 s->dropable = hx->s.dropable;
7383 s->picture_structure = hx->s.picture_structure;
7384 for(i = 1; i < context_count; i++)
7385 h->s.error_count += h->thread_context[i]->s.error_count;
7390 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7391 MpegEncContext * const s = &h->s;
7392 AVCodecContext * const avctx= s->avctx;
7394 H264Context *hx; ///< thread context
7395 int context_count = 0;
7397 h->max_contexts = avctx->thread_count;
7400 for(i=0; i<50; i++){
7401 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7404 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7405 h->current_slice = 0;
7406 if (!s->first_field)
7407 s->current_picture_ptr= NULL;
7419 if(buf_index >= buf_size) break;
7421 for(i = 0; i < h->nal_length_size; i++)
7422 nalsize = (nalsize << 8) | buf[buf_index++];
7423 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7428 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7433 // start code prefix search
7434 for(; buf_index + 3 < buf_size; buf_index++){
7435 // This should always succeed in the first iteration.
7436 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7440 if(buf_index+3 >= buf_size) break;
7445 hx = h->thread_context[context_count];
7447 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7448 if (ptr==NULL || dst_length < 0){
7451 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7453 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7455 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7456 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7459 if (h->is_avc && (nalsize != consumed)){
7460 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7464 buf_index += consumed;
7466 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7467 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7472 switch(hx->nal_unit_type){
7474 if (h->nal_unit_type != NAL_IDR_SLICE) {
7475 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7478 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7480 init_get_bits(&hx->s.gb, ptr, bit_length);
7482 hx->inter_gb_ptr= &hx->s.gb;
7483 hx->s.data_partitioning = 0;
7485 if((err = decode_slice_header(hx, h)))
7488 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7489 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7490 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7491 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7492 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7493 && avctx->skip_frame < AVDISCARD_ALL)
7497 init_get_bits(&hx->s.gb, ptr, bit_length);
7499 hx->inter_gb_ptr= NULL;
7500 hx->s.data_partitioning = 1;
7502 err = decode_slice_header(hx, h);
7505 init_get_bits(&hx->intra_gb, ptr, bit_length);
7506 hx->intra_gb_ptr= &hx->intra_gb;
7509 init_get_bits(&hx->inter_gb, ptr, bit_length);
7510 hx->inter_gb_ptr= &hx->inter_gb;
7512 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7513 && s->context_initialized
7515 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7516 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7517 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7518 && avctx->skip_frame < AVDISCARD_ALL)
7522 init_get_bits(&s->gb, ptr, bit_length);
7526 init_get_bits(&s->gb, ptr, bit_length);
7527 decode_seq_parameter_set(h);
7529 if(s->flags& CODEC_FLAG_LOW_DELAY)
7532 if(avctx->has_b_frames < 2)
7533 avctx->has_b_frames= !s->low_delay;
7536 init_get_bits(&s->gb, ptr, bit_length);
7538 decode_picture_parameter_set(h, bit_length);
7542 case NAL_END_SEQUENCE:
7543 case NAL_END_STREAM:
7544 case NAL_FILLER_DATA:
7546 case NAL_AUXILIARY_SLICE:
7549 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7552 if(context_count == h->max_contexts) {
7553 execute_decode_slices(h, context_count);
7558 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7560 /* Slice could not be decoded in parallel mode, copy down
7561 * NAL unit stuff to context 0 and restart. Note that
7562 * rbsp_buffer is not transferred, but since we no longer
7563 * run in parallel mode this should not be an issue. */
7564 h->nal_unit_type = hx->nal_unit_type;
7565 h->nal_ref_idc = hx->nal_ref_idc;
7571 execute_decode_slices(h, context_count);
7576 * returns the number of bytes consumed for building the current frame
7578 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7579 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7580 if(pos+10>buf_size) pos=buf_size; // oops ;)
7585 static int decode_frame(AVCodecContext *avctx,
7586 void *data, int *data_size,
7587 const uint8_t *buf, int buf_size)
7589 H264Context *h = avctx->priv_data;
7590 MpegEncContext *s = &h->s;
7591 AVFrame *pict = data;
7594 s->flags= avctx->flags;
7595 s->flags2= avctx->flags2;
7597 /* end of stream, output what is still in the buffers */
7598 if (buf_size == 0) {
7602 //FIXME factorize this with the output code below
7603 out = h->delayed_pic[0];
7605 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7606 if(h->delayed_pic[i]->poc < out->poc){
7607 out = h->delayed_pic[i];
7611 for(i=out_idx; h->delayed_pic[i]; i++)
7612 h->delayed_pic[i] = h->delayed_pic[i+1];
7615 *data_size = sizeof(AVFrame);
7616 *pict= *(AVFrame*)out;
7622 if(h->is_avc && !h->got_avcC) {
7623 int i, cnt, nalsize;
7624 unsigned char *p = avctx->extradata;
7625 if(avctx->extradata_size < 7) {
7626 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7630 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7633 /* sps and pps in the avcC always have length coded with 2 bytes,
7634 so put a fake nal_length_size = 2 while parsing them */
7635 h->nal_length_size = 2;
7636 // Decode sps from avcC
7637 cnt = *(p+5) & 0x1f; // Number of sps
7639 for (i = 0; i < cnt; i++) {
7640 nalsize = AV_RB16(p) + 2;
7641 if(decode_nal_units(h, p, nalsize) < 0) {
7642 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7647 // Decode pps from avcC
7648 cnt = *(p++); // Number of pps
7649 for (i = 0; i < cnt; i++) {
7650 nalsize = AV_RB16(p) + 2;
7651 if(decode_nal_units(h, p, nalsize) != nalsize) {
7652 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7657 // Now store right nal length size, that will be use to parse all other nals
7658 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7659 // Do not reparse avcC
7663 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7664 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7668 buf_index=decode_nal_units(h, buf, buf_size);
7672 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7673 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7674 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7678 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7679 Picture *out = s->current_picture_ptr;
7680 Picture *cur = s->current_picture_ptr;
7681 int i, pics, cross_idr, out_of_order, out_idx;
7685 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7686 s->current_picture_ptr->pict_type= s->pict_type;
7689 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7690 h->prev_poc_msb= h->poc_msb;
7691 h->prev_poc_lsb= h->poc_lsb;
7693 h->prev_frame_num_offset= h->frame_num_offset;
7694 h->prev_frame_num= h->frame_num;
7697 * FIXME: Error handling code does not seem to support interlaced
7698 * when slices span multiple rows
7699 * The ff_er_add_slice calls don't work right for bottom
7700 * fields; they cause massive erroneous error concealing
7701 * Error marking covers both fields (top and bottom).
7702 * This causes a mismatched s->error_count
7703 * and a bad error table. Further, the error count goes to
7704 * INT_MAX when called for bottom field, because mb_y is
7705 * past end by one (callers fault) and resync_mb_y != 0
7706 * causes problems for the first MB line, too.
7713 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7714 /* Wait for second field. */
7718 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7719 /* Derive top_field_first from field pocs. */
7720 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7722 //FIXME do something with unavailable reference frames
7724 /* Sort B-frames into display order */
7726 if(h->sps.bitstream_restriction_flag
7727 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7728 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7732 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7733 && !h->sps.bitstream_restriction_flag){
7734 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7739 while(h->delayed_pic[pics]) pics++;
7741 assert(pics <= MAX_DELAYED_PIC_COUNT);
7743 h->delayed_pic[pics++] = cur;
7744 if(cur->reference == 0)
7745 cur->reference = DELAYED_PIC_REF;
7747 out = h->delayed_pic[0];
7749 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7750 if(h->delayed_pic[i]->poc < out->poc){
7751 out = h->delayed_pic[i];
7754 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7756 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7758 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7760 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7762 ((!cross_idr && out->poc > h->outputed_poc + 2)
7763 || cur->pict_type == FF_B_TYPE)))
7766 s->avctx->has_b_frames++;
7769 if(out_of_order || pics > s->avctx->has_b_frames){
7770 out->reference &= ~DELAYED_PIC_REF;
7771 for(i=out_idx; h->delayed_pic[i]; i++)
7772 h->delayed_pic[i] = h->delayed_pic[i+1];
7774 if(!out_of_order && pics > s->avctx->has_b_frames){
7775 *data_size = sizeof(AVFrame);
7777 h->outputed_poc = out->poc;
7778 *pict= *(AVFrame*)out;
7780 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7785 assert(pict->data[0] || !*data_size);
7786 ff_print_debug_info(s, pict);
7787 //printf("out %d\n", (int)pict->data[0]);
7790 /* Return the Picture timestamp as the frame number */
7791 /* we subtract 1 because it is added on utils.c */
7792 avctx->frame_number = s->picture_number - 1;
7794 return get_consumed_bytes(s, buf_index, buf_size);
7797 static inline void fill_mb_avail(H264Context *h){
7798 MpegEncContext * const s = &h->s;
7799 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7802 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7803 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7804 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7810 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7811 h->mb_avail[4]= 1; //FIXME move out
7812 h->mb_avail[5]= 0; //FIXME move out
7820 #define SIZE (COUNT*40)
7826 // int int_temp[10000];
7828 AVCodecContext avctx;
7830 dsputil_init(&dsp, &avctx);
7832 init_put_bits(&pb, temp, SIZE);
7833 printf("testing unsigned exp golomb\n");
7834 for(i=0; i<COUNT; i++){
7836 set_ue_golomb(&pb, i);
7837 STOP_TIMER("set_ue_golomb");
7839 flush_put_bits(&pb);
7841 init_get_bits(&gb, temp, 8*SIZE);
7842 for(i=0; i<COUNT; i++){
7845 s= show_bits(&gb, 24);
7848 j= get_ue_golomb(&gb);
7850 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7853 STOP_TIMER("get_ue_golomb");
7857 init_put_bits(&pb, temp, SIZE);
7858 printf("testing signed exp golomb\n");
7859 for(i=0; i<COUNT; i++){
7861 set_se_golomb(&pb, i - COUNT/2);
7862 STOP_TIMER("set_se_golomb");
7864 flush_put_bits(&pb);
7866 init_get_bits(&gb, temp, 8*SIZE);
7867 for(i=0; i<COUNT; i++){
7870 s= show_bits(&gb, 24);
7873 j= get_se_golomb(&gb);
7874 if(j != i - COUNT/2){
7875 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7878 STOP_TIMER("get_se_golomb");
7882 printf("testing 4x4 (I)DCT\n");
7885 uint8_t src[16], ref[16];
7886 uint64_t error= 0, max_error=0;
7888 for(i=0; i<COUNT; i++){
7890 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7891 for(j=0; j<16; j++){
7892 ref[j]= random()%255;
7893 src[j]= random()%255;
7896 h264_diff_dct_c(block, src, ref, 4);
7899 for(j=0; j<16; j++){
7900 // printf("%d ", block[j]);
7901 block[j]= block[j]*4;
7902 if(j&1) block[j]= (block[j]*4 + 2)/5;
7903 if(j&4) block[j]= (block[j]*4 + 2)/5;
7907 s->dsp.h264_idct_add(ref, block, 4);
7908 /* for(j=0; j<16; j++){
7909 printf("%d ", ref[j]);
7913 for(j=0; j<16; j++){
7914 int diff= FFABS(src[j] - ref[j]);
7917 max_error= FFMAX(max_error, diff);
7920 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7921 printf("testing quantizer\n");
7922 for(qp=0; qp<52; qp++){
7924 src1_block[i]= src2_block[i]= random()%255;
7927 printf("Testing NAL layer\n");
7929 uint8_t bitstream[COUNT];
7930 uint8_t nal[COUNT*2];
7932 memset(&h, 0, sizeof(H264Context));
7934 for(i=0; i<COUNT; i++){
7942 for(j=0; j<COUNT; j++){
7943 bitstream[j]= (random() % 255) + 1;
7946 for(j=0; j<zeros; j++){
7947 int pos= random() % COUNT;
7948 while(bitstream[pos] == 0){
7957 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7959 printf("encoding failed\n");
7963 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7967 if(out_length != COUNT){
7968 printf("incorrect length %d %d\n", out_length, COUNT);
7972 if(consumed != nal_length){
7973 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7977 if(memcmp(bitstream, out, COUNT)){
7978 printf("mismatch\n");
7984 printf("Testing RBSP\n");
7992 static av_cold int decode_end(AVCodecContext *avctx)
7994 H264Context *h = avctx->priv_data;
7995 MpegEncContext *s = &h->s;
7997 av_freep(&h->rbsp_buffer[0]);
7998 av_freep(&h->rbsp_buffer[1]);
7999 free_tables(h); //FIXME cleanup init stuff perhaps
8002 // memset(h, 0, sizeof(H264Context));
8008 AVCodec h264_decoder = {
8012 sizeof(H264Context),
8017 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8019 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),