2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
43 * Value of Picture.reference when Picture is not a reference picture, but
44 * is held for delayed output.
46 #define DELAYED_PIC_REF 4
48 static VLC coeff_token_vlc[4];
49 static VLC chroma_dc_coeff_token_vlc;
51 static VLC total_zeros_vlc[15];
52 static VLC chroma_dc_total_zeros_vlc[3];
54 static VLC run_vlc[6];
57 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
58 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
59 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
60 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
62 static av_always_inline uint32_t pack16to32(int a, int b){
63 #ifdef WORDS_BIGENDIAN
64 return (b&0xFFFF) + (a<<16);
66 return (a&0xFFFF) + (b<<16);
70 const uint8_t ff_rem6[52]={
71 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
74 const uint8_t ff_div6[52]={
75 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
79 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
80 MpegEncContext * const s = &h->s;
81 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
82 int topleft_xy, top_xy, topright_xy, left_xy[2];
83 int topleft_type, top_type, topright_type, left_type[2];
87 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
89 //FIXME deblocking could skip the intra and nnz parts.
90 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
93 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
95 topleft_xy = top_xy - 1;
96 topright_xy= top_xy + 1;
97 left_xy[1] = left_xy[0] = mb_xy-1;
107 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
108 const int top_pair_xy = pair_xy - s->mb_stride;
109 const int topleft_pair_xy = top_pair_xy - 1;
110 const int topright_pair_xy = top_pair_xy + 1;
111 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
112 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
113 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
114 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
115 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
116 const int bottom = (s->mb_y & 1);
117 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
119 ? !curr_mb_frame_flag // bottom macroblock
120 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
122 top_xy -= s->mb_stride;
125 ? !curr_mb_frame_flag // bottom macroblock
126 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
128 topleft_xy -= s->mb_stride;
131 ? !curr_mb_frame_flag // bottom macroblock
132 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
134 topright_xy -= s->mb_stride;
136 if (left_mb_frame_flag != curr_mb_frame_flag) {
137 left_xy[1] = left_xy[0] = pair_xy - 1;
138 if (curr_mb_frame_flag) {
159 left_xy[1] += s->mb_stride;
172 h->top_mb_xy = top_xy;
173 h->left_mb_xy[0] = left_xy[0];
174 h->left_mb_xy[1] = left_xy[1];
178 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
179 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
180 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
182 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
184 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
186 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
187 for(list=0; list<h->list_count; list++){
188 if(USES_LIST(mb_type,list)){
189 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
190 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
191 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
192 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
198 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
199 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
201 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
202 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
204 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
205 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
210 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
211 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
212 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
213 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
214 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
217 if(IS_INTRA(mb_type)){
218 h->topleft_samples_available=
219 h->top_samples_available=
220 h->left_samples_available= 0xFFFF;
221 h->topright_samples_available= 0xEEEA;
223 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
224 h->topleft_samples_available= 0xB3FF;
225 h->top_samples_available= 0x33FF;
226 h->topright_samples_available= 0x26EA;
229 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
230 h->topleft_samples_available&= 0xDF5F;
231 h->left_samples_available&= 0x5F5F;
235 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
236 h->topleft_samples_available&= 0x7FFF;
238 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
239 h->topright_samples_available&= 0xFBFF;
241 if(IS_INTRA4x4(mb_type)){
242 if(IS_INTRA4x4(top_type)){
243 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
244 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
245 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
246 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
249 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
254 h->intra4x4_pred_mode_cache[4+8*0]=
255 h->intra4x4_pred_mode_cache[5+8*0]=
256 h->intra4x4_pred_mode_cache[6+8*0]=
257 h->intra4x4_pred_mode_cache[7+8*0]= pred;
260 if(IS_INTRA4x4(left_type[i])){
261 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
262 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
265 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
286 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
288 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
289 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
290 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
291 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
293 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
294 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
296 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
297 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
300 h->non_zero_count_cache[4+8*0]=
301 h->non_zero_count_cache[5+8*0]=
302 h->non_zero_count_cache[6+8*0]=
303 h->non_zero_count_cache[7+8*0]=
305 h->non_zero_count_cache[1+8*0]=
306 h->non_zero_count_cache[2+8*0]=
308 h->non_zero_count_cache[1+8*3]=
309 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
313 for (i=0; i<2; i++) {
315 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
316 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
317 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
318 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
320 h->non_zero_count_cache[3+8*1 + 2*8*i]=
321 h->non_zero_count_cache[3+8*2 + 2*8*i]=
322 h->non_zero_count_cache[0+8*1 + 8*i]=
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
330 h->top_cbp = h->cbp_table[top_xy];
331 } else if(IS_INTRA(mb_type)) {
338 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
339 } else if(IS_INTRA(mb_type)) {
345 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
348 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
353 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
355 for(list=0; list<h->list_count; list++){
356 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
357 /*if(!h->mv_cache_clean[list]){
358 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
359 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
360 h->mv_cache_clean[list]= 1;
364 h->mv_cache_clean[list]= 0;
366 if(USES_LIST(top_type, list)){
367 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
368 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
369 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
370 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
371 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
373 h->ref_cache[list][scan8[0] + 0 - 1*8]=
374 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
375 h->ref_cache[list][scan8[0] + 2 - 1*8]=
376 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
378 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
379 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
380 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
382 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
386 int cache_idx = scan8[0] - 1 + i*2*8;
387 if(USES_LIST(left_type[i], list)){
388 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
389 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
390 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
391 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
392 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
393 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
395 *(uint32_t*)h->mv_cache [list][cache_idx ]=
396 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
397 h->ref_cache[list][cache_idx ]=
398 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
402 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
405 if(USES_LIST(topleft_type, list)){
406 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
407 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
408 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
409 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
415 if(USES_LIST(topright_type, list)){
416 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
417 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
418 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
419 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
421 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
425 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
428 h->ref_cache[list][scan8[5 ]+1] =
429 h->ref_cache[list][scan8[7 ]+1] =
430 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
431 h->ref_cache[list][scan8[4 ]] =
432 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
433 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
434 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
435 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
436 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
437 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
440 /* XXX beurk, Load mvd */
441 if(USES_LIST(top_type, list)){
442 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
443 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
444 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
445 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
448 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
449 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
450 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
453 if(USES_LIST(left_type[0], list)){
454 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
458 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
461 if(USES_LIST(left_type[1], list)){
462 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
463 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
466 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
469 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
470 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
471 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
472 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
473 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
475 if(h->slice_type == B_TYPE){
476 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
478 if(IS_DIRECT(top_type)){
479 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
480 }else if(IS_8X8(top_type)){
481 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
482 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
483 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
488 if(IS_DIRECT(left_type[0]))
489 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
490 else if(IS_8X8(left_type[0]))
491 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
493 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
495 if(IS_DIRECT(left_type[1]))
496 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
497 else if(IS_8X8(left_type[1]))
498 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
500 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
506 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
507 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
508 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
509 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
510 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
512 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
513 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
514 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
515 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
517 #define MAP_F2F(idx, mb_type)\
518 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
519 h->ref_cache[list][idx] <<= 1;\
520 h->mv_cache[list][idx][1] /= 2;\
521 h->mvd_cache[list][idx][1] /= 2;\
526 #define MAP_F2F(idx, mb_type)\
527 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] >>= 1;\
529 h->mv_cache[list][idx][1] <<= 1;\
530 h->mvd_cache[list][idx][1] <<= 1;\
540 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
543 static inline void write_back_intra_pred_mode(H264Context *h){
544 MpegEncContext * const s = &h->s;
545 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
547 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
548 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
549 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
550 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
551 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
552 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
553 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
557 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
559 static inline int check_intra4x4_pred_mode(H264Context *h){
560 MpegEncContext * const s = &h->s;
561 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
562 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
565 if(!(h->top_samples_available&0x8000)){
567 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
569 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
572 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
577 if(!(h->left_samples_available&0x8000)){
579 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
581 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
584 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
590 } //FIXME cleanup like next
593 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
595 static inline int check_intra_pred_mode(H264Context *h, int mode){
596 MpegEncContext * const s = &h->s;
597 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
598 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
601 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
605 if(!(h->top_samples_available&0x8000)){
608 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->left_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
625 * gets the predicted intra4x4 prediction mode.
627 static inline int pred_intra_mode(H264Context *h, int n){
628 const int index8= scan8[n];
629 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
630 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
631 const int min= FFMIN(left, top);
633 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
635 if(min<0) return DC_PRED;
639 static inline void write_back_non_zero_count(H264Context *h){
640 MpegEncContext * const s = &h->s;
641 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
643 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
644 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
645 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
646 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
647 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
648 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
649 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
651 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
652 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
653 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
655 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
656 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
657 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
660 // store all luma nnzs, for deblocking
663 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
664 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
669 * gets the predicted number of non zero coefficients.
670 * @param n block index
672 static inline int pred_non_zero_count(H264Context *h, int n){
673 const int index8= scan8[n];
674 const int left= h->non_zero_count_cache[index8 - 1];
675 const int top = h->non_zero_count_cache[index8 - 8];
678 if(i<64) i= (i+1)>>1;
680 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
685 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
686 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
687 MpegEncContext *s = &h->s;
689 /* there is no consistent mapping of mvs to neighboring locations that will
690 * make mbaff happy, so we can't move all this logic to fill_caches */
692 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
694 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
695 *C = h->mv_cache[list][scan8[0]-2];
698 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
699 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
700 if(IS_INTERLACED(mb_types[topright_xy])){
701 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
702 const int x4 = X4, y4 = Y4;\
703 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
704 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
705 return LIST_NOT_USED;\
706 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
707 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
708 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
709 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
711 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
714 if(topright_ref == PART_NOT_AVAILABLE
715 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
716 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
718 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
719 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
722 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
724 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
725 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
731 if(topright_ref != PART_NOT_AVAILABLE){
732 *C= h->mv_cache[list][ i - 8 + part_width ];
735 tprintf(s->avctx, "topright MV not available\n");
737 *C= h->mv_cache[list][ i - 8 - 1 ];
738 return h->ref_cache[list][ i - 8 - 1 ];
743 * gets the predicted MV.
744 * @param n the block index
745 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
746 * @param mx the x component of the predicted motion vector
747 * @param my the y component of the predicted motion vector
749 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
750 const int index8= scan8[n];
751 const int top_ref= h->ref_cache[list][ index8 - 8 ];
752 const int left_ref= h->ref_cache[list][ index8 - 1 ];
753 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
754 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
756 int diagonal_ref, match_count;
758 assert(part_width==1 || part_width==2 || part_width==4);
768 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
769 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
770 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
771 if(match_count > 1){ //most common
772 *mx= mid_pred(A[0], B[0], C[0]);
773 *my= mid_pred(A[1], B[1], C[1]);
774 }else if(match_count==1){
778 }else if(top_ref==ref){
786 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
790 *mx= mid_pred(A[0], B[0], C[0]);
791 *my= mid_pred(A[1], B[1], C[1]);
795 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
799 * gets the directionally predicted 16x8 MV.
800 * @param n the block index
801 * @param mx the x component of the predicted motion vector
802 * @param my the y component of the predicted motion vector
804 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
806 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
807 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
809 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
817 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
818 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
820 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 pred_motion(h, n, 4, list, ref, mx, my);
834 * gets the directionally predicted 8x16 MV.
835 * @param n the block index
836 * @param mx the x component of the predicted motion vector
837 * @param my the y component of the predicted motion vector
839 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
841 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
842 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
844 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
855 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
857 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
859 if(diagonal_ref == ref){
867 pred_motion(h, n, 2, list, ref, mx, my);
870 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
871 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
872 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
874 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
876 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
877 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
878 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
884 pred_motion(h, 0, 4, 0, 0, mx, my);
889 static inline void direct_dist_scale_factor(H264Context * const h){
890 const int poc = h->s.current_picture_ptr->poc;
891 const int poc1 = h->ref_list[1][0].poc;
893 for(i=0; i<h->ref_count[0]; i++){
894 int poc0 = h->ref_list[0][i].poc;
895 int td = av_clip(poc1 - poc0, -128, 127);
896 if(td == 0 /* FIXME || pic0 is a long-term ref */){
897 h->dist_scale_factor[i] = 256;
899 int tb = av_clip(poc - poc0, -128, 127);
900 int tx = (16384 + (FFABS(td) >> 1)) / td;
901 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
905 for(i=0; i<h->ref_count[0]; i++){
906 h->dist_scale_factor_field[2*i] =
907 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
911 static inline void direct_ref_list_init(H264Context * const h){
912 MpegEncContext * const s = &h->s;
913 Picture * const ref1 = &h->ref_list[1][0];
914 Picture * const cur = s->current_picture_ptr;
916 if(cur->pict_type == I_TYPE)
917 cur->ref_count[0] = 0;
918 if(cur->pict_type != B_TYPE)
919 cur->ref_count[1] = 0;
920 for(list=0; list<2; list++){
921 cur->ref_count[list] = h->ref_count[list];
922 for(j=0; j<h->ref_count[list]; j++)
923 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
925 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
927 for(list=0; list<2; list++){
928 for(i=0; i<ref1->ref_count[list]; i++){
929 const int poc = ref1->ref_poc[list][i];
930 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
931 for(j=0; j<h->ref_count[list]; j++)
932 if(h->ref_list[list][j].poc == poc){
933 h->map_col_to_list0[list][i] = j;
939 for(list=0; list<2; list++){
940 for(i=0; i<ref1->ref_count[list]; i++){
941 j = h->map_col_to_list0[list][i];
942 h->map_col_to_list0_field[list][2*i] = 2*j;
943 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
949 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
950 MpegEncContext * const s = &h->s;
951 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
952 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
953 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
954 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
955 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
956 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
957 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
958 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
959 const int is_b8x8 = IS_8X8(*mb_type);
960 unsigned int sub_mb_type;
963 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
964 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
965 /* FIXME save sub mb types from previous frames (or derive from MVs)
966 * so we know exactly what block size to use */
967 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
968 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
969 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
970 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
971 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
973 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
974 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
977 *mb_type |= MB_TYPE_DIRECT2;
979 *mb_type |= MB_TYPE_INTERLACED;
981 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
983 if(h->direct_spatial_mv_pred){
988 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
990 /* ref = min(neighbors) */
991 for(list=0; list<2; list++){
992 int refa = h->ref_cache[list][scan8[0] - 1];
993 int refb = h->ref_cache[list][scan8[0] - 8];
994 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
996 refc = h->ref_cache[list][scan8[0] - 8 - 1];
998 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1000 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1006 if(ref[0] < 0 && ref[1] < 0){
1007 ref[0] = ref[1] = 0;
1008 mv[0][0] = mv[0][1] =
1009 mv[1][0] = mv[1][1] = 0;
1011 for(list=0; list<2; list++){
1013 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1015 mv[list][0] = mv[list][1] = 0;
1020 *mb_type &= ~MB_TYPE_P0L1;
1021 sub_mb_type &= ~MB_TYPE_P0L1;
1022 }else if(ref[0] < 0){
1023 *mb_type &= ~MB_TYPE_P0L0;
1024 sub_mb_type &= ~MB_TYPE_P0L0;
1027 if(IS_16X16(*mb_type)){
1030 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1031 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1032 if(!IS_INTRA(mb_type_col)
1033 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1034 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1035 && (h->x264_build>33 || !h->x264_build)))){
1037 a= pack16to32(mv[0][0],mv[0][1]);
1039 b= pack16to32(mv[1][0],mv[1][1]);
1041 a= pack16to32(mv[0][0],mv[0][1]);
1042 b= pack16to32(mv[1][0],mv[1][1]);
1044 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1045 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1047 for(i8=0; i8<4; i8++){
1048 const int x8 = i8&1;
1049 const int y8 = i8>>1;
1051 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1053 h->sub_mb_type[i8] = sub_mb_type;
1055 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1056 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1057 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1058 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1061 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1062 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1063 && (h->x264_build>33 || !h->x264_build)))){
1064 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1065 if(IS_SUB_8X8(sub_mb_type)){
1066 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1067 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1069 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1071 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1074 for(i4=0; i4<4; i4++){
1075 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1076 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1078 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1080 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1086 }else{ /* direct temporal mv pred */
1087 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1088 const int *dist_scale_factor = h->dist_scale_factor;
1091 if(IS_INTERLACED(*mb_type)){
1092 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1093 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1094 dist_scale_factor = h->dist_scale_factor_field;
1096 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1097 /* FIXME assumes direct_8x8_inference == 1 */
1098 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1099 int mb_types_col[2];
1102 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1103 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1104 | (*mb_type & MB_TYPE_INTERLACED);
1105 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1107 if(IS_INTERLACED(*mb_type)){
1108 /* frame to field scaling */
1109 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1110 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1112 l1ref0 -= 2*h->b8_stride;
1113 l1ref1 -= 2*h->b8_stride;
1114 l1mv0 -= 4*h->b_stride;
1115 l1mv1 -= 4*h->b_stride;
1119 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1120 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1122 *mb_type |= MB_TYPE_16x8;
1124 *mb_type |= MB_TYPE_8x8;
1126 /* field to frame scaling */
1127 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1128 * but in MBAFF, top and bottom POC are equal */
1129 int dy = (s->mb_y&1) ? 1 : 2;
1131 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1132 l1ref0 += dy*h->b8_stride;
1133 l1ref1 += dy*h->b8_stride;
1134 l1mv0 += 2*dy*h->b_stride;
1135 l1mv1 += 2*dy*h->b_stride;
1138 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1140 *mb_type |= MB_TYPE_16x16;
1142 *mb_type |= MB_TYPE_8x8;
1145 for(i8=0; i8<4; i8++){
1146 const int x8 = i8&1;
1147 const int y8 = i8>>1;
1149 const int16_t (*l1mv)[2]= l1mv0;
1151 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1153 h->sub_mb_type[i8] = sub_mb_type;
1155 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1156 if(IS_INTRA(mb_types_col[y8])){
1157 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1158 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1159 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1163 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1165 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1167 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1170 scale = dist_scale_factor[ref0];
1171 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1174 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1175 int my_col = (mv_col[1]<<y_shift)/2;
1176 int mx = (scale * mv_col[0] + 128) >> 8;
1177 int my = (scale * my_col + 128) >> 8;
1178 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1186 /* one-to-one mv scaling */
1188 if(IS_16X16(*mb_type)){
1191 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1192 if(IS_INTRA(mb_type_col)){
1195 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1196 : map_col_to_list0[1][l1ref1[0]];
1197 const int scale = dist_scale_factor[ref0];
1198 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1200 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1201 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1203 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1204 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1206 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1207 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1208 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1210 for(i8=0; i8<4; i8++){
1211 const int x8 = i8&1;
1212 const int y8 = i8>>1;
1214 const int16_t (*l1mv)[2]= l1mv0;
1216 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 h->sub_mb_type[i8] = sub_mb_type;
1219 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 if(IS_INTRA(mb_type_col)){
1221 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1223 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1227 ref0 = l1ref0[x8 + y8*h->b8_stride];
1229 ref0 = map_col_to_list0[0][ref0];
1231 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1234 scale = dist_scale_factor[ref0];
1236 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1237 if(IS_SUB_8X8(sub_mb_type)){
1238 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1239 int mx = (scale * mv_col[0] + 128) >> 8;
1240 int my = (scale * mv_col[1] + 128) >> 8;
1241 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1242 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1244 for(i4=0; i4<4; i4++){
1245 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1246 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1247 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1248 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1249 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1250 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1257 static inline void write_back_motion(H264Context *h, int mb_type){
1258 MpegEncContext * const s = &h->s;
1259 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1260 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1263 if(!USES_LIST(mb_type, 0))
1264 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1266 for(list=0; list<h->list_count; list++){
1268 if(!USES_LIST(mb_type, list))
1272 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1273 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1275 if( h->pps.cabac ) {
1276 if(IS_SKIP(mb_type))
1277 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1280 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1281 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1286 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1287 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1288 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1289 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1290 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1294 if(h->slice_type == B_TYPE && h->pps.cabac){
1295 if(IS_8X8(mb_type)){
1296 uint8_t *direct_table = &h->direct_table[b8_xy];
1297 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1298 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1299 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1305 * Decodes a network abstraction layer unit.
1306 * @param consumed is the number of bytes used as input
1307 * @param length is the length of the array
1308 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1309 * @returns decoded bytes, might be src+1 if no escapes
1311 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1316 // src[0]&0x80; //forbidden bit
1317 h->nal_ref_idc= src[0]>>5;
1318 h->nal_unit_type= src[0]&0x1F;
1322 for(i=0; i<length; i++)
1323 printf("%2X ", src[i]);
1325 for(i=0; i+1<length; i+=2){
1326 if(src[i]) continue;
1327 if(i>0 && src[i-1]==0) i--;
1328 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1330 /* startcode, so we must be past the end */
1337 if(i>=length-1){ //no escaped 0
1338 *dst_length= length;
1339 *consumed= length+1; //+1 for the header
1343 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1344 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1345 dst= h->rbsp_buffer[bufidx];
1351 //printf("decoding esc\n");
1354 //remove escapes (very rare 1:2^22)
1355 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1356 if(src[si+2]==3){ //escape
1361 }else //next start code
1365 dst[di++]= src[si++];
1369 *consumed= si + 1;//+1 for the header
1370 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1375 * identifies the exact end of the bitstream
1376 * @return the length of the trailing, or 0 if damaged
1378 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1382 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1392 * idct tranforms the 16 dc values and dequantize them.
1393 * @param qp quantization parameter
1395 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1398 int temp[16]; //FIXME check if this is a good idea
1399 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1400 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1402 //memset(block, 64, 2*256);
1405 const int offset= y_offset[i];
1406 const int z0= block[offset+stride*0] + block[offset+stride*4];
1407 const int z1= block[offset+stride*0] - block[offset+stride*4];
1408 const int z2= block[offset+stride*1] - block[offset+stride*5];
1409 const int z3= block[offset+stride*1] + block[offset+stride*5];
1418 const int offset= x_offset[i];
1419 const int z0= temp[4*0+i] + temp[4*2+i];
1420 const int z1= temp[4*0+i] - temp[4*2+i];
1421 const int z2= temp[4*1+i] - temp[4*3+i];
1422 const int z3= temp[4*1+i] + temp[4*3+i];
1424 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1425 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1426 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1427 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1433 * dct tranforms the 16 dc values.
1434 * @param qp quantization parameter ??? FIXME
1436 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1437 // const int qmul= dequant_coeff[qp][0];
1439 int temp[16]; //FIXME check if this is a good idea
1440 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1441 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1444 const int offset= y_offset[i];
1445 const int z0= block[offset+stride*0] + block[offset+stride*4];
1446 const int z1= block[offset+stride*0] - block[offset+stride*4];
1447 const int z2= block[offset+stride*1] - block[offset+stride*5];
1448 const int z3= block[offset+stride*1] + block[offset+stride*5];
1457 const int offset= x_offset[i];
1458 const int z0= temp[4*0+i] + temp[4*2+i];
1459 const int z1= temp[4*0+i] - temp[4*2+i];
1460 const int z2= temp[4*1+i] - temp[4*3+i];
1461 const int z3= temp[4*1+i] + temp[4*3+i];
1463 block[stride*0 +offset]= (z0 + z3)>>1;
1464 block[stride*2 +offset]= (z1 + z2)>>1;
1465 block[stride*8 +offset]= (z1 - z2)>>1;
1466 block[stride*10+offset]= (z0 - z3)>>1;
1474 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1475 const int stride= 16*2;
1476 const int xStride= 16;
1479 a= block[stride*0 + xStride*0];
1480 b= block[stride*0 + xStride*1];
1481 c= block[stride*1 + xStride*0];
1482 d= block[stride*1 + xStride*1];
1489 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1490 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1491 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1492 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1496 static void chroma_dc_dct_c(DCTELEM *block){
1497 const int stride= 16*2;
1498 const int xStride= 16;
1501 a= block[stride*0 + xStride*0];
1502 b= block[stride*0 + xStride*1];
1503 c= block[stride*1 + xStride*0];
1504 d= block[stride*1 + xStride*1];
1511 block[stride*0 + xStride*0]= (a+c);
1512 block[stride*0 + xStride*1]= (e+b);
1513 block[stride*1 + xStride*0]= (a-c);
1514 block[stride*1 + xStride*1]= (e-b);
1519 * gets the chroma qp.
1521 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1522 return h->pps.chroma_qp_table[t][qscale & 0xff];
1525 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1526 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1527 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1529 const int * const quant_table= quant_coeff[qscale];
1530 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1531 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1532 const unsigned int threshold2= (threshold1<<1);
1538 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1539 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1540 const unsigned int dc_threshold2= (dc_threshold1<<1);
1542 int level= block[0]*quant_coeff[qscale+18][0];
1543 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1545 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1548 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1551 // last_non_zero = i;
1556 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1557 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1558 const unsigned int dc_threshold2= (dc_threshold1<<1);
1560 int level= block[0]*quant_table[0];
1561 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1563 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1566 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1569 // last_non_zero = i;
1582 const int j= scantable[i];
1583 int level= block[j]*quant_table[j];
1585 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1586 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1587 if(((unsigned)(level+threshold1))>threshold2){
1589 level= (bias + level)>>QUANT_SHIFT;
1592 level= (bias - level)>>QUANT_SHIFT;
1601 return last_non_zero;
1604 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1605 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1606 int src_x_offset, int src_y_offset,
1607 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1608 MpegEncContext * const s = &h->s;
1609 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1610 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1611 const int luma_xy= (mx&3) + ((my&3)<<2);
1612 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1613 uint8_t * src_cb, * src_cr;
1614 int extra_width= h->emu_edge_width;
1615 int extra_height= h->emu_edge_height;
1617 const int full_mx= mx>>2;
1618 const int full_my= my>>2;
1619 const int pic_width = 16*s->mb_width;
1620 const int pic_height = 16*s->mb_height >> MB_FIELD;
1622 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1625 if(mx&7) extra_width -= 3;
1626 if(my&7) extra_height -= 3;
1628 if( full_mx < 0-extra_width
1629 || full_my < 0-extra_height
1630 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1631 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1632 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1633 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1637 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1639 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1642 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1645 // chroma offset when predicting from a field of opposite parity
1646 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1647 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1649 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1650 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1654 src_cb= s->edge_emu_buffer;
1656 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1659 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1660 src_cr= s->edge_emu_buffer;
1662 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1665 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1666 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1667 int x_offset, int y_offset,
1668 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1669 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1670 int list0, int list1){
1671 MpegEncContext * const s = &h->s;
1672 qpel_mc_func *qpix_op= qpix_put;
1673 h264_chroma_mc_func chroma_op= chroma_put;
1675 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1676 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1677 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1678 x_offset += 8*s->mb_x;
1679 y_offset += 8*(s->mb_y >> MB_FIELD);
1682 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1683 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1684 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1685 qpix_op, chroma_op);
1688 chroma_op= chroma_avg;
1692 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1693 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1694 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1695 qpix_op, chroma_op);
1699 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1700 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1701 int x_offset, int y_offset,
1702 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1703 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1704 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1705 int list0, int list1){
1706 MpegEncContext * const s = &h->s;
1708 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1709 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1710 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1711 x_offset += 8*s->mb_x;
1712 y_offset += 8*(s->mb_y >> MB_FIELD);
1715 /* don't optimize for luma-only case, since B-frames usually
1716 * use implicit weights => chroma too. */
1717 uint8_t *tmp_cb = s->obmc_scratchpad;
1718 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1719 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1720 int refn0 = h->ref_cache[0][ scan8[n] ];
1721 int refn1 = h->ref_cache[1][ scan8[n] ];
1723 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1724 dest_y, dest_cb, dest_cr,
1725 x_offset, y_offset, qpix_put, chroma_put);
1726 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1727 tmp_y, tmp_cb, tmp_cr,
1728 x_offset, y_offset, qpix_put, chroma_put);
1730 if(h->use_weight == 2){
1731 int weight0 = h->implicit_weight[refn0][refn1];
1732 int weight1 = 64 - weight0;
1733 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1734 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1735 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1737 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1738 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1739 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1740 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1741 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1742 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1743 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1745 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1748 int list = list1 ? 1 : 0;
1749 int refn = h->ref_cache[list][ scan8[n] ];
1750 Picture *ref= &h->ref_list[list][refn];
1751 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1752 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1753 qpix_put, chroma_put);
1755 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1756 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1757 if(h->use_weight_chroma){
1758 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1759 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1760 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1761 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1766 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1767 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1768 int x_offset, int y_offset,
1769 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1770 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1771 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1772 int list0, int list1){
1773 if((h->use_weight==2 && list0 && list1
1774 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1775 || h->use_weight==1)
1776 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1777 x_offset, y_offset, qpix_put, chroma_put,
1778 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1780 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1781 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1784 static inline void prefetch_motion(H264Context *h, int list){
1785 /* fetch pixels for estimated mv 4 macroblocks ahead
1786 * optimized for 64byte cache lines */
1787 MpegEncContext * const s = &h->s;
1788 const int refn = h->ref_cache[list][scan8[0]];
1790 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1791 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1792 uint8_t **src= h->ref_list[list][refn].data;
1793 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1794 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1795 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1796 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1800 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1801 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1802 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1803 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1804 MpegEncContext * const s = &h->s;
1805 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1806 const int mb_type= s->current_picture.mb_type[mb_xy];
1808 assert(IS_INTER(mb_type));
1810 prefetch_motion(h, 0);
1812 if(IS_16X16(mb_type)){
1813 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1815 &weight_op[0], &weight_avg[0],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 }else if(IS_16X8(mb_type)){
1818 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1819 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1820 &weight_op[1], &weight_avg[1],
1821 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1822 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1823 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1824 &weight_op[1], &weight_avg[1],
1825 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1826 }else if(IS_8X16(mb_type)){
1827 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1828 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1829 &weight_op[2], &weight_avg[2],
1830 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1831 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1832 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1833 &weight_op[2], &weight_avg[2],
1834 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1838 assert(IS_8X8(mb_type));
1841 const int sub_mb_type= h->sub_mb_type[i];
1843 int x_offset= (i&1)<<2;
1844 int y_offset= (i&2)<<1;
1846 if(IS_SUB_8X8(sub_mb_type)){
1847 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1849 &weight_op[3], &weight_avg[3],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 }else if(IS_SUB_8X4(sub_mb_type)){
1852 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1853 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1854 &weight_op[4], &weight_avg[4],
1855 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1856 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1857 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1858 &weight_op[4], &weight_avg[4],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 }else if(IS_SUB_4X8(sub_mb_type)){
1861 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1862 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1863 &weight_op[5], &weight_avg[5],
1864 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1865 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1866 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1867 &weight_op[5], &weight_avg[5],
1868 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1871 assert(IS_SUB_4X4(sub_mb_type));
1873 int sub_x_offset= x_offset + 2*(j&1);
1874 int sub_y_offset= y_offset + (j&2);
1875 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1876 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1877 &weight_op[6], &weight_avg[6],
1878 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1884 prefetch_motion(h, 1);
1887 static void decode_init_vlc(void){
1888 static int done = 0;
1894 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1895 &chroma_dc_coeff_token_len [0], 1, 1,
1896 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1899 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1900 &coeff_token_len [i][0], 1, 1,
1901 &coeff_token_bits[i][0], 1, 1, 1);
1905 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1906 &chroma_dc_total_zeros_len [i][0], 1, 1,
1907 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1909 for(i=0; i<15; i++){
1910 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1911 &total_zeros_len [i][0], 1, 1,
1912 &total_zeros_bits[i][0], 1, 1, 1);
1916 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1917 &run_len [i][0], 1, 1,
1918 &run_bits[i][0], 1, 1, 1);
1920 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1921 &run_len [6][0], 1, 1,
1922 &run_bits[6][0], 1, 1, 1);
1926 static void free_tables(H264Context *h){
1929 av_freep(&h->intra4x4_pred_mode);
1930 av_freep(&h->chroma_pred_mode_table);
1931 av_freep(&h->cbp_table);
1932 av_freep(&h->mvd_table[0]);
1933 av_freep(&h->mvd_table[1]);
1934 av_freep(&h->direct_table);
1935 av_freep(&h->non_zero_count);
1936 av_freep(&h->slice_table_base);
1937 h->slice_table= NULL;
1939 av_freep(&h->mb2b_xy);
1940 av_freep(&h->mb2b8_xy);
1942 for(i = 0; i < MAX_SPS_COUNT; i++)
1943 av_freep(h->sps_buffers + i);
1945 for(i = 0; i < MAX_PPS_COUNT; i++)
1946 av_freep(h->pps_buffers + i);
1948 for(i = 0; i < h->s.avctx->thread_count; i++) {
1949 hx = h->thread_context[i];
1951 av_freep(&hx->top_borders[1]);
1952 av_freep(&hx->top_borders[0]);
1953 av_freep(&hx->s.obmc_scratchpad);
1957 static void init_dequant8_coeff_table(H264Context *h){
1959 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1960 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1961 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1963 for(i=0; i<2; i++ ){
1964 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1965 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1969 for(q=0; q<52; q++){
1970 int shift = ff_div6[q];
1971 int idx = ff_rem6[q];
1973 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1974 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1975 h->pps.scaling_matrix8[i][x]) << shift;
1980 static void init_dequant4_coeff_table(H264Context *h){
1982 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1983 for(i=0; i<6; i++ ){
1984 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1986 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1987 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1994 for(q=0; q<52; q++){
1995 int shift = ff_div6[q] + 2;
1996 int idx = ff_rem6[q];
1998 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
1999 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2000 h->pps.scaling_matrix4[i][x]) << shift;
2005 static void init_dequant_tables(H264Context *h){
2007 init_dequant4_coeff_table(h);
2008 if(h->pps.transform_8x8_mode)
2009 init_dequant8_coeff_table(h);
2010 if(h->sps.transform_bypass){
2013 h->dequant4_coeff[i][0][x] = 1<<6;
2014 if(h->pps.transform_8x8_mode)
2017 h->dequant8_coeff[i][0][x] = 1<<6;
2024 * needs width/height
2026 static int alloc_tables(H264Context *h){
2027 MpegEncContext * const s = &h->s;
2028 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2031 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2033 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2034 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2035 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2037 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2038 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2039 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2040 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2042 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2043 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2045 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2046 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2047 for(y=0; y<s->mb_height; y++){
2048 for(x=0; x<s->mb_width; x++){
2049 const int mb_xy= x + y*s->mb_stride;
2050 const int b_xy = 4*x + 4*y*h->b_stride;
2051 const int b8_xy= 2*x + 2*y*h->b8_stride;
2053 h->mb2b_xy [mb_xy]= b_xy;
2054 h->mb2b8_xy[mb_xy]= b8_xy;
2058 s->obmc_scratchpad = NULL;
2060 if(!h->dequant4_coeff[0])
2061 init_dequant_tables(h);
2070 * Mimic alloc_tables(), but for every context thread.
2072 static void clone_tables(H264Context *dst, H264Context *src){
2073 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2074 dst->non_zero_count = src->non_zero_count;
2075 dst->slice_table = src->slice_table;
2076 dst->cbp_table = src->cbp_table;
2077 dst->mb2b_xy = src->mb2b_xy;
2078 dst->mb2b8_xy = src->mb2b8_xy;
2079 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2080 dst->mvd_table[0] = src->mvd_table[0];
2081 dst->mvd_table[1] = src->mvd_table[1];
2082 dst->direct_table = src->direct_table;
2084 dst->s.obmc_scratchpad = NULL;
2085 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2090 * Allocate buffers which are not shared amongst multiple threads.
2092 static int context_init(H264Context *h){
2093 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2094 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2098 return -1; // free_tables will clean up for us
2101 static void common_init(H264Context *h){
2102 MpegEncContext * const s = &h->s;
2104 s->width = s->avctx->width;
2105 s->height = s->avctx->height;
2106 s->codec_id= s->avctx->codec->id;
2108 ff_h264_pred_init(&h->hpc, s->codec_id);
2110 h->dequant_coeff_pps= -1;
2111 s->unrestricted_mv=1;
2112 s->decode=1; //FIXME
2114 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2115 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2118 static int decode_init(AVCodecContext *avctx){
2119 H264Context *h= avctx->priv_data;
2120 MpegEncContext * const s = &h->s;
2122 MPV_decode_defaults(s);
2127 s->out_format = FMT_H264;
2128 s->workaround_bugs= avctx->workaround_bugs;
2131 // s->decode_mb= ff_h263_decode_mb;
2132 s->quarter_sample = 1;
2134 avctx->pix_fmt= PIX_FMT_YUV420P;
2138 if(avctx->extradata_size > 0 && avctx->extradata &&
2139 *(char *)avctx->extradata == 1){
2146 h->thread_context[0] = h;
2150 static int frame_start(H264Context *h){
2151 MpegEncContext * const s = &h->s;
2154 if(MPV_frame_start(s, s->avctx) < 0)
2156 ff_er_frame_start(s);
2158 * MPV_frame_start uses pict_type to derive key_frame.
2159 * This is incorrect for H.264; IDR markings must be used.
2160 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2161 * See decode_nal_units().
2163 s->current_picture_ptr->key_frame= 0;
2165 assert(s->linesize && s->uvlinesize);
2167 for(i=0; i<16; i++){
2168 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2169 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2172 h->block_offset[16+i]=
2173 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2174 h->block_offset[24+16+i]=
2175 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2178 /* can't be in alloc_tables because linesize isn't known there.
2179 * FIXME: redo bipred weight to not require extra buffer? */
2180 for(i = 0; i < s->avctx->thread_count; i++)
2181 if(!h->thread_context[i]->s.obmc_scratchpad)
2182 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2184 /* some macroblocks will be accessed before they're available */
2185 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2186 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2188 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2192 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2193 MpegEncContext * const s = &h->s;
2197 src_cb -= uvlinesize;
2198 src_cr -= uvlinesize;
2200 // There are two lines saved, the line above the the top macroblock of a pair,
2201 // and the line above the bottom macroblock
2202 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2203 for(i=1; i<17; i++){
2204 h->left_border[i]= src_y[15+i* linesize];
2207 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2208 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2210 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2211 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2212 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2214 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2215 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2217 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2218 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2222 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2223 MpegEncContext * const s = &h->s;
2230 if(h->deblocking_filter == 2) {
2231 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2232 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2233 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2235 deblock_left = (s->mb_x > 0);
2236 deblock_top = (s->mb_y > 0);
2239 src_y -= linesize + 1;
2240 src_cb -= uvlinesize + 1;
2241 src_cr -= uvlinesize + 1;
2243 #define XCHG(a,b,t,xchg)\
2250 for(i = !deblock_top; i<17; i++){
2251 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2256 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2257 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2258 if(s->mb_x+1 < s->mb_width){
2259 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2263 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2265 for(i = !deblock_top; i<9; i++){
2266 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2267 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2271 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2272 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2277 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2278 MpegEncContext * const s = &h->s;
2281 src_y -= 2 * linesize;
2282 src_cb -= 2 * uvlinesize;
2283 src_cr -= 2 * uvlinesize;
2285 // There are two lines saved, the line above the the top macroblock of a pair,
2286 // and the line above the bottom macroblock
2287 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2288 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2289 for(i=2; i<34; i++){
2290 h->left_border[i]= src_y[15+i* linesize];
2293 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2294 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2295 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2296 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2298 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2299 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2300 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2301 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2302 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2303 for(i=2; i<18; i++){
2304 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2305 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2307 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2308 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2309 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2310 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2314 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2315 MpegEncContext * const s = &h->s;
2318 int deblock_left = (s->mb_x > 0);
2319 int deblock_top = (s->mb_y > 1);
2321 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2323 src_y -= 2 * linesize + 1;
2324 src_cb -= 2 * uvlinesize + 1;
2325 src_cr -= 2 * uvlinesize + 1;
2327 #define XCHG(a,b,t,xchg)\
2334 for(i = (!deblock_top)<<1; i<34; i++){
2335 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2340 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2341 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2342 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2343 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2344 if(s->mb_x+1 < s->mb_width){
2345 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2346 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2350 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2352 for(i = (!deblock_top) << 1; i<18; i++){
2353 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2354 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2358 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2359 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2360 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2361 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2366 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2367 MpegEncContext * const s = &h->s;
2368 const int mb_x= s->mb_x;
2369 const int mb_y= s->mb_y;
2370 const int mb_xy= mb_x + mb_y*s->mb_stride;
2371 const int mb_type= s->current_picture.mb_type[mb_xy];
2372 uint8_t *dest_y, *dest_cb, *dest_cr;
2373 int linesize, uvlinesize /*dct_offset*/;
2375 int *block_offset = &h->block_offset[0];
2376 const unsigned int bottom = mb_y & 1;
2377 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2378 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2379 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2381 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2382 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2383 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2385 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2386 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2388 if (!simple && MB_FIELD) {
2389 linesize = h->mb_linesize = s->linesize * 2;
2390 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2391 block_offset = &h->block_offset[24];
2392 if(mb_y&1){ //FIXME move out of this func?
2393 dest_y -= s->linesize*15;
2394 dest_cb-= s->uvlinesize*7;
2395 dest_cr-= s->uvlinesize*7;
2399 for(list=0; list<h->list_count; list++){
2400 if(!USES_LIST(mb_type, list))
2402 if(IS_16X16(mb_type)){
2403 int8_t *ref = &h->ref_cache[list][scan8[0]];
2404 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2406 for(i=0; i<16; i+=4){
2407 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2408 int ref = h->ref_cache[list][scan8[i]];
2410 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2416 linesize = h->mb_linesize = s->linesize;
2417 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2418 // dct_offset = s->linesize * 16;
2421 if(transform_bypass){
2423 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2424 }else if(IS_8x8DCT(mb_type)){
2425 idct_dc_add = s->dsp.h264_idct8_dc_add;
2426 idct_add = s->dsp.h264_idct8_add;
2428 idct_dc_add = s->dsp.h264_idct_dc_add;
2429 idct_add = s->dsp.h264_idct_add;
2432 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2433 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2434 int mbt_y = mb_y&~1;
2435 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2436 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2437 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2438 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2441 if (!simple && IS_INTRA_PCM(mb_type)) {
2444 // The pixels are stored in h->mb array in the same order as levels,
2445 // copy them in output in the correct order.
2446 for(i=0; i<16; i++) {
2447 for (y=0; y<4; y++) {
2448 for (x=0; x<4; x++) {
2449 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2453 for(i=16; i<16+4; i++) {
2454 for (y=0; y<4; y++) {
2455 for (x=0; x<4; x++) {
2456 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2460 for(i=20; i<20+4; i++) {
2461 for (y=0; y<4; y++) {
2462 for (x=0; x<4; x++) {
2463 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2468 if(IS_INTRA(mb_type)){
2469 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2470 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2472 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2473 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2474 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2477 if(IS_INTRA4x4(mb_type)){
2478 if(simple || !s->encoding){
2479 if(IS_8x8DCT(mb_type)){
2480 for(i=0; i<16; i+=4){
2481 uint8_t * const ptr= dest_y + block_offset[i];
2482 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2483 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2484 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2485 (h->topright_samples_available<<i)&0x4000, linesize);
2487 if(nnz == 1 && h->mb[i*16])
2488 idct_dc_add(ptr, h->mb + i*16, linesize);
2490 idct_add(ptr, h->mb + i*16, linesize);
2494 for(i=0; i<16; i++){
2495 uint8_t * const ptr= dest_y + block_offset[i];
2497 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2500 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2501 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2502 assert(mb_y || linesize <= block_offset[i]);
2503 if(!topright_avail){
2504 tr= ptr[3 - linesize]*0x01010101;
2505 topright= (uint8_t*) &tr;
2507 topright= ptr + 4 - linesize;
2511 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2512 nnz = h->non_zero_count_cache[ scan8[i] ];
2515 if(nnz == 1 && h->mb[i*16])
2516 idct_dc_add(ptr, h->mb + i*16, linesize);
2518 idct_add(ptr, h->mb + i*16, linesize);
2520 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2525 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2527 if(!transform_bypass)
2528 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2530 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2532 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2533 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2535 hl_motion(h, dest_y, dest_cb, dest_cr,
2536 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2537 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2538 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2542 if(!IS_INTRA4x4(mb_type)){
2544 if(IS_INTRA16x16(mb_type)){
2545 for(i=0; i<16; i++){
2546 if(h->non_zero_count_cache[ scan8[i] ])
2547 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2548 else if(h->mb[i*16])
2549 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2552 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2553 for(i=0; i<16; i+=di){
2554 int nnz = h->non_zero_count_cache[ scan8[i] ];
2556 if(nnz==1 && h->mb[i*16])
2557 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2559 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2564 for(i=0; i<16; i++){
2565 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2566 uint8_t * const ptr= dest_y + block_offset[i];
2567 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2573 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2574 uint8_t *dest[2] = {dest_cb, dest_cr};
2575 if(transform_bypass){
2576 idct_add = idct_dc_add = s->dsp.add_pixels4;
2578 idct_add = s->dsp.h264_idct_add;
2579 idct_dc_add = s->dsp.h264_idct_dc_add;
2580 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2581 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2584 for(i=16; i<16+8; i++){
2585 if(h->non_zero_count_cache[ scan8[i] ])
2586 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2587 else if(h->mb[i*16])
2588 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2591 for(i=16; i<16+8; i++){
2592 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2593 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2594 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2600 if(h->deblocking_filter) {
2601 if (!simple && FRAME_MBAFF) {
2602 //FIXME try deblocking one mb at a time?
2603 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2604 const int mb_y = s->mb_y - 1;
2605 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2606 const int mb_xy= mb_x + mb_y*s->mb_stride;
2607 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2608 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2609 if (!bottom) return;
2610 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2611 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2612 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2614 if(IS_INTRA(mb_type_top | mb_type_bottom))
2615 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2617 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2621 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2622 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2623 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2624 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2625 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2628 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2629 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2630 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2631 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2632 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2634 tprintf(h->s.avctx, "call filter_mb\n");
2635 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2636 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2637 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2643 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2645 static void hl_decode_mb_simple(H264Context *h){
2646 hl_decode_mb_internal(h, 1);
2650 * Process a macroblock; this handles edge cases, such as interlacing.
2652 static void av_noinline hl_decode_mb_complex(H264Context *h){
2653 hl_decode_mb_internal(h, 0);
2656 static void hl_decode_mb(H264Context *h){
2657 MpegEncContext * const s = &h->s;
2658 const int mb_x= s->mb_x;
2659 const int mb_y= s->mb_y;
2660 const int mb_xy= mb_x + mb_y*s->mb_stride;
2661 const int mb_type= s->current_picture.mb_type[mb_xy];
2662 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2668 hl_decode_mb_complex(h);
2669 else hl_decode_mb_simple(h);
2672 static void pic_as_field(Picture *pic, const int parity){
2674 for (i = 0; i < 4; ++i) {
2675 if (parity == PICT_BOTTOM_FIELD)
2676 pic->data[i] += pic->linesize[i];
2677 pic->reference = parity;
2678 pic->linesize[i] *= 2;
2682 static int split_field_copy(Picture *dest, Picture *src,
2683 int parity, int id_add){
2684 int match = !!(src->reference & parity);
2688 pic_as_field(dest, parity);
2690 dest->pic_id += id_add;
2697 * Split one reference list into field parts, interleaving by parity
2698 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2699 * set to look at the actual start of data for that field.
2701 * @param dest output list
2702 * @param dest_len maximum number of fields to put in dest
2703 * @param src the source reference list containing fields and/or field pairs
2704 * (aka short_ref/long_ref, or
2705 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2706 * @param src_len number of Picture's in source (pairs and unmatched fields)
2707 * @param parity the parity of the picture being decoded/needing
2708 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2709 * @return number of fields placed in dest
2711 static int split_field_half_ref_list(Picture *dest, int dest_len,
2712 Picture *src, int src_len, int parity){
2713 int same_parity = 1;
2719 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2720 if (same_parity && same_i < src_len) {
2721 field_output = split_field_copy(dest + out_i, src + same_i,
2723 same_parity = !field_output;
2726 } else if (opp_i < src_len) {
2727 field_output = split_field_copy(dest + out_i, src + opp_i,
2728 PICT_FRAME - parity, 0);
2729 same_parity = field_output;
2741 * Split the reference frame list into a reference field list.
2742 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2743 * The input list contains both reference field pairs and
2744 * unmatched reference fields; it is ordered as spec describes
2745 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2746 * unmatched field pairs are also present. Conceptually this is equivalent
2747 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2749 * @param dest output reference list where ordered fields are to be placed
2750 * @param dest_len max number of fields to place at dest
2751 * @param src source reference list, as described above
2752 * @param src_len number of pictures (pairs and unmatched fields) in src
2753 * @param parity parity of field being currently decoded
2754 * (one of PICT_{TOP,BOTTOM}_FIELD)
2755 * @param long_i index into src array that holds first long reference picture,
2756 * or src_len if no long refs present.
2758 static int split_field_ref_list(Picture *dest, int dest_len,
2759 Picture *src, int src_len,
2760 int parity, int long_i){
2762 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2766 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2767 src_len - long_i, parity);
2772 * fills the default_ref_list.
2774 static int fill_default_ref_list(H264Context *h){
2775 MpegEncContext * const s = &h->s;
2777 int smallest_poc_greater_than_current = -1;
2779 Picture sorted_short_ref[32];
2780 Picture field_entry_list[2][32];
2781 Picture *frame_list[2];
2783 if (FIELD_PICTURE) {
2784 structure_sel = PICT_FRAME;
2785 frame_list[0] = field_entry_list[0];
2786 frame_list[1] = field_entry_list[1];
2789 frame_list[0] = h->default_ref_list[0];
2790 frame_list[1] = h->default_ref_list[1];
2793 if(h->slice_type==B_TYPE){
2800 /* sort frame according to poc in B slice */
2801 for(out_i=0; out_i<h->short_ref_count; out_i++){
2803 int best_poc=INT_MAX;
2805 for(i=0; i<h->short_ref_count; i++){
2806 const int poc= h->short_ref[i]->poc;
2807 if(poc > limit && poc < best_poc){
2813 assert(best_i != INT_MIN);
2816 sorted_short_ref[out_i]= *h->short_ref[best_i];
2817 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2818 if (-1 == smallest_poc_greater_than_current) {
2819 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2820 smallest_poc_greater_than_current = out_i;
2825 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2827 // find the largest poc
2828 for(list=0; list<2; list++){
2831 int step= list ? -1 : 1;
2833 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2835 while(j<0 || j>= h->short_ref_count){
2836 if(j != -99 && step == (list ? -1 : 1))
2839 j= smallest_poc_greater_than_current + (step>>1);
2841 sel = sorted_short_ref[j].reference | structure_sel;
2842 if(sel != PICT_FRAME) continue;
2843 frame_list[list][index ]= sorted_short_ref[j];
2844 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2846 short_len[list] = index;
2848 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2850 if(h->long_ref[i] == NULL) continue;
2851 sel = h->long_ref[i]->reference | structure_sel;
2852 if(sel != PICT_FRAME) continue;
2854 frame_list[ list ][index ]= *h->long_ref[i];
2855 frame_list[ list ][index++].pic_id= i;;
2859 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
2860 // swap the two first elements of L1 when
2861 // L0 and L1 are identical
2862 Picture temp= frame_list[1][0];
2863 frame_list[1][0] = frame_list[1][1];
2864 frame_list[1][1] = temp;
2869 for(list=0; list<2; list++){
2871 len[list] = split_field_ref_list(h->default_ref_list[list],
2875 s->picture_structure,
2878 if(len[list] < h->ref_count[ list ])
2879 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2886 for(i=0; i<h->short_ref_count; i++){
2888 sel = h->short_ref[i]->reference | structure_sel;
2889 if(sel != PICT_FRAME) continue;
2890 frame_list[0][index ]= *h->short_ref[i];
2891 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2894 for(i = 0; i < 16; i++){
2896 if(h->long_ref[i] == NULL) continue;
2897 sel = h->long_ref[i]->reference | structure_sel;
2898 if(sel != PICT_FRAME) continue;
2899 frame_list[0][index ]= *h->long_ref[i];
2900 frame_list[0][index++].pic_id= i;;
2904 index = split_field_ref_list(h->default_ref_list[0],
2905 h->ref_count[0], frame_list[0],
2906 index, s->picture_structure,
2909 if(index < h->ref_count[0])
2910 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2913 for (i=0; i<h->ref_count[0]; i++) {
2914 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2916 if(h->slice_type==B_TYPE){
2917 for (i=0; i<h->ref_count[1]; i++) {
2918 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
2925 static void print_short_term(H264Context *h);
2926 static void print_long_term(H264Context *h);
2929 * Extract structure information about the picture described by pic_num in
2930 * the current decoding context (frame or field). Note that pic_num is
2931 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2932 * @param pic_num picture number for which to extract structure information
2933 * @param structure one of PICT_XXX describing structure of picture
2935 * @return frame number (short term) or long term index of picture
2936 * described by pic_num
2938 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2939 MpegEncContext * const s = &h->s;
2941 *structure = s->picture_structure;
2944 /* opposite field */
2945 *structure ^= PICT_FRAME;
2952 static int decode_ref_pic_list_reordering(H264Context *h){
2953 MpegEncContext * const s = &h->s;
2954 int list, index, pic_structure;
2956 print_short_term(h);
2958 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
2960 for(list=0; list<h->list_count; list++){
2961 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2963 if(get_bits1(&s->gb)){
2964 int pred= h->curr_pic_num;
2966 for(index=0; ; index++){
2967 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2968 unsigned int pic_id;
2970 Picture *ref = NULL;
2972 if(reordering_of_pic_nums_idc==3)
2975 if(index >= h->ref_count[list]){
2976 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2980 if(reordering_of_pic_nums_idc<3){
2981 if(reordering_of_pic_nums_idc<2){
2982 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2985 if(abs_diff_pic_num > h->max_pic_num){
2986 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2990 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2991 else pred+= abs_diff_pic_num;
2992 pred &= h->max_pic_num - 1;
2994 frame_num = pic_num_extract(h, pred, &pic_structure);
2996 for(i= h->short_ref_count-1; i>=0; i--){
2997 ref = h->short_ref[i];
2998 assert(ref->reference);
2999 assert(!ref->long_ref);
3000 if(ref->data[0] != NULL &&
3001 ref->frame_num == frame_num &&
3002 (ref->reference & pic_structure) &&
3003 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3010 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3012 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3015 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3018 ref = h->long_ref[long_idx];
3019 assert(!(ref && !ref->reference));
3020 if(ref && (ref->reference & pic_structure)){
3021 ref->pic_id= pic_id;
3022 assert(ref->long_ref);
3030 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3031 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3033 for(i=index; i+1<h->ref_count[list]; i++){
3034 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3037 for(; i > index; i--){
3038 h->ref_list[list][i]= h->ref_list[list][i-1];
3040 h->ref_list[list][index]= *ref;
3042 pic_as_field(&h->ref_list[list][index], pic_structure);
3046 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3052 for(list=0; list<h->list_count; list++){
3053 for(index= 0; index < h->ref_count[list]; index++){
3054 if(!h->ref_list[list][index].data[0])
3055 h->ref_list[list][index]= s->current_picture;
3059 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3060 direct_dist_scale_factor(h);
3061 direct_ref_list_init(h);
3065 static void fill_mbaff_ref_list(H264Context *h){
3067 for(list=0; list<2; list++){ //FIXME try list_count
3068 for(i=0; i<h->ref_count[list]; i++){
3069 Picture *frame = &h->ref_list[list][i];
3070 Picture *field = &h->ref_list[list][16+2*i];
3073 field[0].linesize[j] <<= 1;
3074 field[0].reference = PICT_TOP_FIELD;
3075 field[1] = field[0];
3077 field[1].data[j] += frame->linesize[j];
3078 field[1].reference = PICT_BOTTOM_FIELD;
3080 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3081 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3083 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3084 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3088 for(j=0; j<h->ref_count[1]; j++){
3089 for(i=0; i<h->ref_count[0]; i++)
3090 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3091 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3092 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3096 static int pred_weight_table(H264Context *h){
3097 MpegEncContext * const s = &h->s;
3099 int luma_def, chroma_def;
3102 h->use_weight_chroma= 0;
3103 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3104 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3105 luma_def = 1<<h->luma_log2_weight_denom;
3106 chroma_def = 1<<h->chroma_log2_weight_denom;
3108 for(list=0; list<2; list++){
3109 for(i=0; i<h->ref_count[list]; i++){
3110 int luma_weight_flag, chroma_weight_flag;
3112 luma_weight_flag= get_bits1(&s->gb);
3113 if(luma_weight_flag){
3114 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3115 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3116 if( h->luma_weight[list][i] != luma_def
3117 || h->luma_offset[list][i] != 0)
3120 h->luma_weight[list][i]= luma_def;
3121 h->luma_offset[list][i]= 0;
3124 chroma_weight_flag= get_bits1(&s->gb);
3125 if(chroma_weight_flag){
3128 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3129 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3130 if( h->chroma_weight[list][i][j] != chroma_def
3131 || h->chroma_offset[list][i][j] != 0)
3132 h->use_weight_chroma= 1;
3137 h->chroma_weight[list][i][j]= chroma_def;
3138 h->chroma_offset[list][i][j]= 0;
3142 if(h->slice_type != B_TYPE) break;
3144 h->use_weight= h->use_weight || h->use_weight_chroma;
3148 static void implicit_weight_table(H264Context *h){
3149 MpegEncContext * const s = &h->s;
3151 int cur_poc = s->current_picture_ptr->poc;
3153 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3154 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3156 h->use_weight_chroma= 0;
3161 h->use_weight_chroma= 2;
3162 h->luma_log2_weight_denom= 5;
3163 h->chroma_log2_weight_denom= 5;
3165 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3166 int poc0 = h->ref_list[0][ref0].poc;
3167 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3168 int poc1 = h->ref_list[1][ref1].poc;
3169 int td = av_clip(poc1 - poc0, -128, 127);
3171 int tb = av_clip(cur_poc - poc0, -128, 127);
3172 int tx = (16384 + (FFABS(td) >> 1)) / td;
3173 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3174 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3175 h->implicit_weight[ref0][ref1] = 32;
3177 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3179 h->implicit_weight[ref0][ref1] = 32;
3185 * Mark a picture as no longer needed for reference. The refmask
3186 * argument allows unreferencing of individual fields or the whole frame.
3187 * If the picture becomes entirely unreferenced, but is being held for
3188 * display purposes, it is marked as such.
3189 * @param refmask mask of fields to unreference; the mask is bitwise
3190 * anded with the reference marking of pic
3191 * @return non-zero if pic becomes entirely unreferenced (except possibly
3192 * for display purposes) zero if one of the fields remains in
3195 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3197 if (pic->reference &= refmask) {
3200 if(pic == h->delayed_output_pic)
3201 pic->reference=DELAYED_PIC_REF;
3203 for(i = 0; h->delayed_pic[i]; i++)
3204 if(pic == h->delayed_pic[i]){
3205 pic->reference=DELAYED_PIC_REF;
3214 * instantaneous decoder refresh.
3216 static void idr(H264Context *h){
3219 for(i=0; i<16; i++){
3220 if (h->long_ref[i] != NULL) {
3221 unreference_pic(h, h->long_ref[i], 0);
3222 h->long_ref[i]= NULL;
3225 h->long_ref_count=0;
3227 for(i=0; i<h->short_ref_count; i++){
3228 unreference_pic(h, h->short_ref[i], 0);
3229 h->short_ref[i]= NULL;
3231 h->short_ref_count=0;
3234 /* forget old pics after a seek */
3235 static void flush_dpb(AVCodecContext *avctx){
3236 H264Context *h= avctx->priv_data;
3238 for(i=0; i<16; i++) {
3239 if(h->delayed_pic[i])
3240 h->delayed_pic[i]->reference= 0;
3241 h->delayed_pic[i]= NULL;
3243 if(h->delayed_output_pic)
3244 h->delayed_output_pic->reference= 0;
3245 h->delayed_output_pic= NULL;
3247 if(h->s.current_picture_ptr)
3248 h->s.current_picture_ptr->reference= 0;
3249 h->s.first_field= 0;
3250 ff_mpeg_flush(avctx);
3254 * Find a Picture in the short term reference list by frame number.
3255 * @param frame_num frame number to search for
3256 * @param idx the index into h->short_ref where returned picture is found
3257 * undefined if no picture found.
3258 * @return pointer to the found picture, or NULL if no pic with the provided
3259 * frame number is found
3261 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3262 MpegEncContext * const s = &h->s;
3265 for(i=0; i<h->short_ref_count; i++){
3266 Picture *pic= h->short_ref[i];
3267 if(s->avctx->debug&FF_DEBUG_MMCO)
3268 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3269 if(pic->frame_num == frame_num) {
3278 * Remove a picture from the short term reference list by its index in
3279 * that list. This does no checking on the provided index; it is assumed
3280 * to be valid. Other list entries are shifted down.
3281 * @param i index into h->short_ref of picture to remove.
3283 static void remove_short_at_index(H264Context *h, int i){
3284 assert(i > 0 && i < h->short_ref_count);
3285 h->short_ref[i]= NULL;
3286 if (--h->short_ref_count)
3287 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3292 * @return the removed picture or NULL if an error occurs
3294 static Picture * remove_short(H264Context *h, int frame_num){
3295 MpegEncContext * const s = &h->s;
3299 if(s->avctx->debug&FF_DEBUG_MMCO)
3300 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3302 pic = find_short(h, frame_num, &i);
3304 remove_short_at_index(h, i);
3310 * Remove a picture from the long term reference list by its index in
3311 * that list. This does no checking on the provided index; it is assumed
3312 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3313 * @param i index into h->long_ref of picture to remove.
3315 static void remove_long_at_index(H264Context *h, int i){
3316 h->long_ref[i]= NULL;
3317 h->long_ref_count--;
3322 * @return the removed picture or NULL if an error occurs
3324 static Picture * remove_long(H264Context *h, int i){
3327 pic= h->long_ref[i];
3329 remove_long_at_index(h, i);
3335 * print short term list
3337 static void print_short_term(H264Context *h) {
3339 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3340 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3341 for(i=0; i<h->short_ref_count; i++){
3342 Picture *pic= h->short_ref[i];
3343 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3349 * print long term list
3351 static void print_long_term(H264Context *h) {
3353 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3354 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3355 for(i = 0; i < 16; i++){
3356 Picture *pic= h->long_ref[i];
3358 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3365 * Executes the reference picture marking (memory management control operations).
3367 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3368 MpegEncContext * const s = &h->s;
3370 int current_ref_assigned=0;
3373 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3374 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3376 for(i=0; i<mmco_count; i++){
3377 int structure, frame_num, unref_pic;
3378 if(s->avctx->debug&FF_DEBUG_MMCO)
3379 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3381 switch(mmco[i].opcode){
3382 case MMCO_SHORT2UNUSED:
3383 if(s->avctx->debug&FF_DEBUG_MMCO)
3384 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3385 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3386 pic = find_short(h, frame_num, &j);
3388 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3389 remove_short_at_index(h, j);
3390 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3391 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3393 case MMCO_SHORT2LONG:
3394 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3395 h->long_ref[mmco[i].long_arg]->frame_num ==
3396 mmco[i].short_pic_num / 2) {
3397 /* do nothing, we've already moved this field pair. */
3399 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3401 pic= remove_long(h, mmco[i].long_arg);
3402 if(pic) unreference_pic(h, pic, 0);
3404 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3405 if (h->long_ref[ mmco[i].long_arg ]){
3406 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3407 h->long_ref_count++;
3411 case MMCO_LONG2UNUSED:
3412 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3413 pic = h->long_ref[j];
3415 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3416 remove_long_at_index(h, j);
3417 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3418 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3422 if (FIELD_PICTURE && !s->first_field) {
3423 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3424 /* Just mark second field as referenced */
3426 } else if (s->current_picture_ptr->reference) {
3427 /* First field in pair is in short term list or
3428 * at a different long term index.
3429 * This is not allowed; see 7.4.3, notes 2 and 3.
3430 * Report the problem and keep the pair where it is,
3431 * and mark this field valid.
3433 av_log(h->s.avctx, AV_LOG_ERROR,
3434 "illegal long term reference assignment for second "
3435 "field in complementary field pair (first field is "
3436 "short term or has non-matching long index)\n");
3442 pic= remove_long(h, mmco[i].long_arg);
3443 if(pic) unreference_pic(h, pic, 0);
3445 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3446 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3447 h->long_ref_count++;
3450 s->current_picture_ptr->reference |= s->picture_structure;
3451 current_ref_assigned=1;
3453 case MMCO_SET_MAX_LONG:
3454 assert(mmco[i].long_arg <= 16);
3455 // just remove the long term which index is greater than new max
3456 for(j = mmco[i].long_arg; j<16; j++){
3457 pic = remove_long(h, j);
3458 if (pic) unreference_pic(h, pic, 0);
3462 while(h->short_ref_count){
3463 pic= remove_short(h, h->short_ref[0]->frame_num);
3464 if(pic) unreference_pic(h, pic, 0);
3466 for(j = 0; j < 16; j++) {
3467 pic= remove_long(h, j);
3468 if(pic) unreference_pic(h, pic, 0);
3475 if (!current_ref_assigned && FIELD_PICTURE &&
3476 !s->first_field && s->current_picture_ptr->reference) {
3478 /* Second field of complementary field pair; the first field of
3479 * which is already referenced. If short referenced, it
3480 * should be first entry in short_ref. If not, it must exist
3481 * in long_ref; trying to put it on the short list here is an
3482 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3484 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3485 /* Just mark the second field valid */
3486 s->current_picture_ptr->reference = PICT_FRAME;
3487 } else if (s->current_picture_ptr->long_ref) {
3488 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3489 "assignment for second field "
3490 "in complementary field pair "
3491 "(first field is long term)\n");
3494 * First field in reference, but not in any sensible place on our
3495 * reference lists. This shouldn't happen unless reference
3496 * handling somewhere else is wrong.
3500 current_ref_assigned = 1;
3503 if(!current_ref_assigned){
3504 pic= remove_short(h, s->current_picture_ptr->frame_num);
3506 unreference_pic(h, pic, 0);
3507 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3510 if(h->short_ref_count)
3511 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3513 h->short_ref[0]= s->current_picture_ptr;
3514 h->short_ref[0]->long_ref=0;
3515 h->short_ref_count++;
3516 s->current_picture_ptr->reference |= s->picture_structure;
3519 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3521 /* We have too many reference frames, probably due to corrupted
3522 * stream. Need to discard one frame. Prevents overrun of the
3523 * short_ref and long_ref buffers.
3525 av_log(h->s.avctx, AV_LOG_ERROR,
3526 "number of reference frames exceeds max (probably "
3527 "corrupt input), discarding one\n");
3529 if (h->long_ref_count) {
3530 for (i = 0; i < 16; ++i)
3535 pic = h->long_ref[i];
3536 remove_long_at_index(h, i);
3538 pic = h->short_ref[h->short_ref_count - 1];
3539 remove_short_at_index(h, h->short_ref_count - 1);
3541 unreference_pic(h, pic, 0);
3544 print_short_term(h);
3549 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3550 MpegEncContext * const s = &h->s;
3553 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3554 s->broken_link= get_bits1(gb) -1;
3555 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3556 if(h->mmco[0].long_arg == -1)
3559 h->mmco[0].opcode= MMCO_LONG;
3563 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3564 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3565 MMCOOpcode opcode= get_ue_golomb(gb);
3567 h->mmco[i].opcode= opcode;
3568 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3569 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3570 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3571 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3575 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3576 unsigned int long_arg= get_ue_golomb(gb);
3577 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3578 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3581 h->mmco[i].long_arg= long_arg;
3584 if(opcode > (unsigned)MMCO_LONG){
3585 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3588 if(opcode == MMCO_END)
3593 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3595 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3596 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3597 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3598 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3600 if (FIELD_PICTURE) {
3601 h->mmco[0].short_pic_num *= 2;
3602 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3603 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3614 static int init_poc(H264Context *h){
3615 MpegEncContext * const s = &h->s;
3616 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3619 if(h->nal_unit_type == NAL_IDR_SLICE){
3620 h->frame_num_offset= 0;
3622 if(h->frame_num < h->prev_frame_num)
3623 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3625 h->frame_num_offset= h->prev_frame_num_offset;
3628 if(h->sps.poc_type==0){
3629 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3631 if(h->nal_unit_type == NAL_IDR_SLICE){
3636 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3637 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3638 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3639 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3641 h->poc_msb = h->prev_poc_msb;
3642 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3644 field_poc[1] = h->poc_msb + h->poc_lsb;
3645 if(s->picture_structure == PICT_FRAME)
3646 field_poc[1] += h->delta_poc_bottom;
3647 }else if(h->sps.poc_type==1){
3648 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3651 if(h->sps.poc_cycle_length != 0)
3652 abs_frame_num = h->frame_num_offset + h->frame_num;
3656 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3659 expected_delta_per_poc_cycle = 0;
3660 for(i=0; i < h->sps.poc_cycle_length; i++)
3661 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3663 if(abs_frame_num > 0){
3664 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3665 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3667 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3668 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3669 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3673 if(h->nal_ref_idc == 0)
3674 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3676 field_poc[0] = expectedpoc + h->delta_poc[0];
3677 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3679 if(s->picture_structure == PICT_FRAME)
3680 field_poc[1] += h->delta_poc[1];
3683 if(h->nal_unit_type == NAL_IDR_SLICE){
3686 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3687 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3693 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3694 s->current_picture_ptr->field_poc[0]= field_poc[0];
3695 s->current_picture_ptr->poc = field_poc[0];
3697 if(s->picture_structure != PICT_TOP_FIELD) {
3698 s->current_picture_ptr->field_poc[1]= field_poc[1];
3699 s->current_picture_ptr->poc = field_poc[1];
3701 if(!FIELD_PICTURE || !s->first_field) {
3702 Picture *cur = s->current_picture_ptr;
3703 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3711 * initialize scan tables
3713 static void init_scan_tables(H264Context *h){
3714 MpegEncContext * const s = &h->s;
3716 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3717 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3718 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3720 for(i=0; i<16; i++){
3721 #define T(x) (x>>2) | ((x<<2) & 0xF)
3722 h->zigzag_scan[i] = T(zigzag_scan[i]);
3723 h-> field_scan[i] = T( field_scan[i]);
3727 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3728 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3729 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3730 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3731 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3733 for(i=0; i<64; i++){
3734 #define T(x) (x>>3) | ((x&7)<<3)
3735 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3736 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3737 h->field_scan8x8[i] = T(field_scan8x8[i]);
3738 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3742 if(h->sps.transform_bypass){ //FIXME same ugly
3743 h->zigzag_scan_q0 = zigzag_scan;
3744 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3745 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3746 h->field_scan_q0 = field_scan;
3747 h->field_scan8x8_q0 = field_scan8x8;
3748 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3750 h->zigzag_scan_q0 = h->zigzag_scan;
3751 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3752 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3753 h->field_scan_q0 = h->field_scan;
3754 h->field_scan8x8_q0 = h->field_scan8x8;
3755 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3760 * Replicates H264 "master" context to thread contexts.
3762 static void clone_slice(H264Context *dst, H264Context *src)
3764 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3765 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3766 dst->s.current_picture = src->s.current_picture;
3767 dst->s.linesize = src->s.linesize;
3768 dst->s.uvlinesize = src->s.uvlinesize;
3769 dst->s.first_field = src->s.first_field;
3771 dst->prev_poc_msb = src->prev_poc_msb;
3772 dst->prev_poc_lsb = src->prev_poc_lsb;
3773 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3774 dst->prev_frame_num = src->prev_frame_num;
3775 dst->short_ref_count = src->short_ref_count;
3777 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3778 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3779 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3780 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3782 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3783 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3787 * decodes a slice header.
3788 * this will allso call MPV_common_init() and frame_start() as needed
3790 * @param h h264context
3791 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3793 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3795 static int decode_slice_header(H264Context *h, H264Context *h0){
3796 MpegEncContext * const s = &h->s;
3797 MpegEncContext * const s0 = &h0->s;
3798 unsigned int first_mb_in_slice;
3799 unsigned int pps_id;
3800 int num_ref_idx_active_override_flag;
3801 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3802 unsigned int slice_type, tmp, i;
3803 int default_ref_list_done = 0;
3804 int last_pic_structure;
3806 s->dropable= h->nal_ref_idc == 0;
3808 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3809 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3810 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3812 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3813 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3816 first_mb_in_slice= get_ue_golomb(&s->gb);
3818 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3819 h0->current_slice = 0;
3820 if (!s0->first_field)
3821 s->current_picture_ptr= NULL;
3824 slice_type= get_ue_golomb(&s->gb);
3826 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3831 h->slice_type_fixed=1;
3833 h->slice_type_fixed=0;
3835 slice_type= slice_type_map[ slice_type ];
3836 if (slice_type == I_TYPE
3837 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3838 default_ref_list_done = 1;
3840 h->slice_type= slice_type;
3842 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3843 if (s->pict_type == B_TYPE && s0->last_picture_ptr == NULL) {
3844 av_log(h->s.avctx, AV_LOG_ERROR,
3845 "B picture before any references, skipping\n");
3849 pps_id= get_ue_golomb(&s->gb);
3850 if(pps_id>=MAX_PPS_COUNT){
3851 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3854 if(!h0->pps_buffers[pps_id]) {
3855 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3858 h->pps= *h0->pps_buffers[pps_id];
3860 if(!h0->sps_buffers[h->pps.sps_id]) {
3861 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3864 h->sps = *h0->sps_buffers[h->pps.sps_id];
3866 if(h == h0 && h->dequant_coeff_pps != pps_id){
3867 h->dequant_coeff_pps = pps_id;
3868 init_dequant_tables(h);
3871 s->mb_width= h->sps.mb_width;
3872 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3874 h->b_stride= s->mb_width*4;
3875 h->b8_stride= s->mb_width*2;
3877 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3878 if(h->sps.frame_mbs_only_flag)
3879 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3881 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3883 if (s->context_initialized
3884 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3886 return -1; // width / height changed during parallelized decoding
3890 if (!s->context_initialized) {
3892 return -1; // we cant (re-)initialize context during parallel decoding
3893 if (MPV_common_init(s) < 0)
3897 init_scan_tables(h);
3900 for(i = 1; i < s->avctx->thread_count; i++) {
3902 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3903 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3904 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3907 init_scan_tables(c);
3911 for(i = 0; i < s->avctx->thread_count; i++)
3912 if(context_init(h->thread_context[i]) < 0)
3915 s->avctx->width = s->width;
3916 s->avctx->height = s->height;
3917 s->avctx->sample_aspect_ratio= h->sps.sar;
3918 if(!s->avctx->sample_aspect_ratio.den)
3919 s->avctx->sample_aspect_ratio.den = 1;
3921 if(h->sps.timing_info_present_flag){
3922 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3923 if(h->x264_build > 0 && h->x264_build < 44)
3924 s->avctx->time_base.den *= 2;
3925 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3926 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3930 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3933 h->mb_aff_frame = 0;
3934 last_pic_structure = s0->picture_structure;
3935 if(h->sps.frame_mbs_only_flag){
3936 s->picture_structure= PICT_FRAME;
3938 if(get_bits1(&s->gb)) { //field_pic_flag
3939 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3941 s->picture_structure= PICT_FRAME;
3942 h->mb_aff_frame = h->sps.mb_aff;
3946 if(h0->current_slice == 0){
3947 /* See if we have a decoded first field looking for a pair... */
3948 if (s0->first_field) {
3949 assert(s0->current_picture_ptr);
3950 assert(s0->current_picture_ptr->data[0]);
3951 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3953 /* figure out if we have a complementary field pair */
3954 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3956 * Previous field is unmatched. Don't display it, but let it
3957 * remain for reference if marked as such.
3959 s0->current_picture_ptr = NULL;
3960 s0->first_field = FIELD_PICTURE;
3963 if (h->nal_ref_idc &&
3964 s0->current_picture_ptr->reference &&
3965 s0->current_picture_ptr->frame_num != h->frame_num) {
3967 * This and previous field were reference, but had
3968 * different frame_nums. Consider this field first in
3969 * pair. Throw away previous field except for reference
3972 s0->first_field = 1;
3973 s0->current_picture_ptr = NULL;
3976 /* Second field in complementary pair */
3977 s0->first_field = 0;
3982 /* Frame or first field in a potentially complementary pair */
3983 assert(!s0->current_picture_ptr);
3984 s0->first_field = FIELD_PICTURE;
3987 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3988 s0->first_field = 0;
3995 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3997 assert(s->mb_num == s->mb_width * s->mb_height);
3998 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3999 first_mb_in_slice >= s->mb_num){
4000 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4003 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4004 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4005 if (s->picture_structure == PICT_BOTTOM_FIELD)
4006 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4007 assert(s->mb_y < s->mb_height);
4009 if(s->picture_structure==PICT_FRAME){
4010 h->curr_pic_num= h->frame_num;
4011 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4013 h->curr_pic_num= 2*h->frame_num + 1;
4014 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4017 if(h->nal_unit_type == NAL_IDR_SLICE){
4018 get_ue_golomb(&s->gb); /* idr_pic_id */
4021 if(h->sps.poc_type==0){
4022 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4024 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4025 h->delta_poc_bottom= get_se_golomb(&s->gb);
4029 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4030 h->delta_poc[0]= get_se_golomb(&s->gb);
4032 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4033 h->delta_poc[1]= get_se_golomb(&s->gb);
4038 if(h->pps.redundant_pic_cnt_present){
4039 h->redundant_pic_count= get_ue_golomb(&s->gb);
4042 //set defaults, might be overriden a few line later
4043 h->ref_count[0]= h->pps.ref_count[0];
4044 h->ref_count[1]= h->pps.ref_count[1];
4046 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4047 if(h->slice_type == B_TYPE){
4048 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4049 if(FIELD_OR_MBAFF_PICTURE && h->direct_spatial_mv_pred)
4050 av_log(h->s.avctx, AV_LOG_ERROR, "Interlaced pictures + spatial direct mode is not implemented\n");
4052 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4054 if(num_ref_idx_active_override_flag){
4055 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4056 if(h->slice_type==B_TYPE)
4057 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4059 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4060 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4061 h->ref_count[0]= h->ref_count[1]= 1;
4065 if(h->slice_type == B_TYPE)
4072 if(!default_ref_list_done){
4073 fill_default_ref_list(h);
4076 if(decode_ref_pic_list_reordering(h) < 0)
4079 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4080 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4081 pred_weight_table(h);
4082 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4083 implicit_weight_table(h);
4088 decode_ref_pic_marking(h0, &s->gb);
4091 fill_mbaff_ref_list(h);
4093 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4094 tmp = get_ue_golomb(&s->gb);
4096 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4099 h->cabac_init_idc= tmp;
4102 h->last_qscale_diff = 0;
4103 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4105 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4109 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4110 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4111 //FIXME qscale / qp ... stuff
4112 if(h->slice_type == SP_TYPE){
4113 get_bits1(&s->gb); /* sp_for_switch_flag */
4115 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4116 get_se_golomb(&s->gb); /* slice_qs_delta */
4119 h->deblocking_filter = 1;
4120 h->slice_alpha_c0_offset = 0;
4121 h->slice_beta_offset = 0;
4122 if( h->pps.deblocking_filter_parameters_present ) {
4123 tmp= get_ue_golomb(&s->gb);
4125 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4128 h->deblocking_filter= tmp;
4129 if(h->deblocking_filter < 2)
4130 h->deblocking_filter^= 1; // 1<->0
4132 if( h->deblocking_filter ) {
4133 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4134 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4138 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4139 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4140 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4141 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4142 h->deblocking_filter= 0;
4144 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4145 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4146 /* Cheat slightly for speed:
4147 Do not bother to deblock across slices. */
4148 h->deblocking_filter = 2;
4150 h0->max_contexts = 1;
4151 if(!h0->single_decode_warning) {
4152 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4153 h0->single_decode_warning = 1;
4156 return 1; // deblocking switched inside frame
4161 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4162 slice_group_change_cycle= get_bits(&s->gb, ?);
4165 h0->last_slice_type = slice_type;
4166 h->slice_num = ++h0->current_slice;
4168 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4169 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4171 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4172 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4174 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4176 av_get_pict_type_char(h->slice_type),
4177 pps_id, h->frame_num,
4178 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4179 h->ref_count[0], h->ref_count[1],
4181 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4183 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4193 static inline int get_level_prefix(GetBitContext *gb){
4197 OPEN_READER(re, gb);
4198 UPDATE_CACHE(re, gb);
4199 buf=GET_CACHE(re, gb);
4201 log= 32 - av_log2(buf);
4203 print_bin(buf>>(32-log), log);
4204 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4207 LAST_SKIP_BITS(re, gb, log);
4208 CLOSE_READER(re, gb);
4213 static inline int get_dct8x8_allowed(H264Context *h){
4216 if(!IS_SUB_8X8(h->sub_mb_type[i])
4217 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4224 * decodes a residual block.
4225 * @param n block index
4226 * @param scantable scantable
4227 * @param max_coeff number of coefficients in the block
4228 * @return <0 if an error occured
4230 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4231 MpegEncContext * const s = &h->s;
4232 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4234 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4236 //FIXME put trailing_onex into the context
4238 if(n == CHROMA_DC_BLOCK_INDEX){
4239 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4240 total_coeff= coeff_token>>2;
4242 if(n == LUMA_DC_BLOCK_INDEX){
4243 total_coeff= pred_non_zero_count(h, 0);
4244 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4245 total_coeff= coeff_token>>2;
4247 total_coeff= pred_non_zero_count(h, n);
4248 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4249 total_coeff= coeff_token>>2;
4250 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4254 //FIXME set last_non_zero?
4258 if(total_coeff > (unsigned)max_coeff) {
4259 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4263 trailing_ones= coeff_token&3;
4264 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4265 assert(total_coeff<=16);
4267 for(i=0; i<trailing_ones; i++){
4268 level[i]= 1 - 2*get_bits1(gb);
4272 int level_code, mask;
4273 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4274 int prefix= get_level_prefix(gb);
4276 //first coefficient has suffix_length equal to 0 or 1
4277 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4279 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4281 level_code= (prefix<<suffix_length); //part
4282 }else if(prefix==14){
4284 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4286 level_code= prefix + get_bits(gb, 4); //part
4287 }else if(prefix==15){
4288 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4289 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4291 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4295 if(trailing_ones < 3) level_code += 2;
4300 mask= -(level_code&1);
4301 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4304 //remaining coefficients have suffix_length > 0
4305 for(;i<total_coeff;i++) {
4306 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4307 prefix = get_level_prefix(gb);
4309 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4310 }else if(prefix==15){
4311 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4313 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4316 mask= -(level_code&1);
4317 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4318 if(level_code > suffix_limit[suffix_length])
4323 if(total_coeff == max_coeff)
4326 if(n == CHROMA_DC_BLOCK_INDEX)
4327 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4329 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4332 coeff_num = zeros_left + total_coeff - 1;
4333 j = scantable[coeff_num];
4335 block[j] = level[0];
4336 for(i=1;i<total_coeff;i++) {
4339 else if(zeros_left < 7){
4340 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4342 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4344 zeros_left -= run_before;
4345 coeff_num -= 1 + run_before;
4346 j= scantable[ coeff_num ];
4351 block[j] = (level[0] * qmul[j] + 32)>>6;
4352 for(i=1;i<total_coeff;i++) {
4355 else if(zeros_left < 7){
4356 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4358 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4360 zeros_left -= run_before;
4361 coeff_num -= 1 + run_before;
4362 j= scantable[ coeff_num ];
4364 block[j]= (level[i] * qmul[j] + 32)>>6;
4369 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4376 static void predict_field_decoding_flag(H264Context *h){
4377 MpegEncContext * const s = &h->s;
4378 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4379 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4380 ? s->current_picture.mb_type[mb_xy-1]
4381 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4382 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4384 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4388 * decodes a P_SKIP or B_SKIP macroblock
4390 static void decode_mb_skip(H264Context *h){
4391 MpegEncContext * const s = &h->s;
4392 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4395 memset(h->non_zero_count[mb_xy], 0, 16);
4396 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4399 mb_type|= MB_TYPE_INTERLACED;
4401 if( h->slice_type == B_TYPE )
4403 // just for fill_caches. pred_direct_motion will set the real mb_type
4404 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4406 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4407 pred_direct_motion(h, &mb_type);
4408 mb_type|= MB_TYPE_SKIP;
4413 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4415 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4416 pred_pskip_motion(h, &mx, &my);
4417 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4418 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4421 write_back_motion(h, mb_type);
4422 s->current_picture.mb_type[mb_xy]= mb_type;
4423 s->current_picture.qscale_table[mb_xy]= s->qscale;
4424 h->slice_table[ mb_xy ]= h->slice_num;
4425 h->prev_mb_skipped= 1;
4429 * decodes a macroblock
4430 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4432 static int decode_mb_cavlc(H264Context *h){
4433 MpegEncContext * const s = &h->s;
4434 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4435 int partition_count;
4436 unsigned int mb_type, cbp;
4437 int dct8x8_allowed= h->pps.transform_8x8_mode;
4439 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4441 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4442 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4444 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4445 if(s->mb_skip_run==-1)
4446 s->mb_skip_run= get_ue_golomb(&s->gb);
4448 if (s->mb_skip_run--) {
4449 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4450 if(s->mb_skip_run==0)
4451 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4453 predict_field_decoding_flag(h);
4460 if( (s->mb_y&1) == 0 )
4461 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4463 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4465 h->prev_mb_skipped= 0;
4467 mb_type= get_ue_golomb(&s->gb);
4468 if(h->slice_type == B_TYPE){
4470 partition_count= b_mb_type_info[mb_type].partition_count;
4471 mb_type= b_mb_type_info[mb_type].type;
4474 goto decode_intra_mb;
4476 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4478 partition_count= p_mb_type_info[mb_type].partition_count;
4479 mb_type= p_mb_type_info[mb_type].type;
4482 goto decode_intra_mb;
4485 assert(h->slice_type == I_TYPE);
4488 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4492 cbp= i_mb_type_info[mb_type].cbp;
4493 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4494 mb_type= i_mb_type_info[mb_type].type;
4498 mb_type |= MB_TYPE_INTERLACED;
4500 h->slice_table[ mb_xy ]= h->slice_num;
4502 if(IS_INTRA_PCM(mb_type)){
4505 // We assume these blocks are very rare so we do not optimize it.
4506 align_get_bits(&s->gb);
4508 // The pixels are stored in the same order as levels in h->mb array.
4509 for(y=0; y<16; y++){
4510 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4511 for(x=0; x<16; x++){
4512 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4513 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4517 const int index= 256 + 4*(y&3) + 32*(y>>2);
4519 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4520 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4524 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4526 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4527 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4531 // In deblocking, the quantizer is 0
4532 s->current_picture.qscale_table[mb_xy]= 0;
4533 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4534 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4535 // All coeffs are present
4536 memset(h->non_zero_count[mb_xy], 16, 16);
4538 s->current_picture.mb_type[mb_xy]= mb_type;
4543 h->ref_count[0] <<= 1;
4544 h->ref_count[1] <<= 1;
4547 fill_caches(h, mb_type, 0);
4550 if(IS_INTRA(mb_type)){
4552 // init_top_left_availability(h);
4553 if(IS_INTRA4x4(mb_type)){
4556 if(dct8x8_allowed && get_bits1(&s->gb)){
4557 mb_type |= MB_TYPE_8x8DCT;
4561 // fill_intra4x4_pred_table(h);
4562 for(i=0; i<16; i+=di){
4563 int mode= pred_intra_mode(h, i);
4565 if(!get_bits1(&s->gb)){
4566 const int rem_mode= get_bits(&s->gb, 3);
4567 mode = rem_mode + (rem_mode >= mode);
4571 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4573 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4575 write_back_intra_pred_mode(h);
4576 if( check_intra4x4_pred_mode(h) < 0)
4579 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4580 if(h->intra16x16_pred_mode < 0)
4584 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4587 h->chroma_pred_mode= pred_mode;
4588 }else if(partition_count==4){
4589 int i, j, sub_partition_count[4], list, ref[2][4];
4591 if(h->slice_type == B_TYPE){
4593 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4594 if(h->sub_mb_type[i] >=13){
4595 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4598 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4599 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4601 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4602 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4603 pred_direct_motion(h, &mb_type);
4604 h->ref_cache[0][scan8[4]] =
4605 h->ref_cache[1][scan8[4]] =
4606 h->ref_cache[0][scan8[12]] =
4607 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4610 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4612 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4613 if(h->sub_mb_type[i] >=4){
4614 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4617 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4618 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4622 for(list=0; list<h->list_count; list++){
4623 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4625 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4626 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4627 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4629 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4641 dct8x8_allowed = get_dct8x8_allowed(h);
4643 for(list=0; list<h->list_count; list++){
4645 if(IS_DIRECT(h->sub_mb_type[i])) {
4646 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4649 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4650 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4652 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4653 const int sub_mb_type= h->sub_mb_type[i];
4654 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4655 for(j=0; j<sub_partition_count[i]; j++){
4657 const int index= 4*i + block_width*j;
4658 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4659 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4660 mx += get_se_golomb(&s->gb);
4661 my += get_se_golomb(&s->gb);
4662 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4664 if(IS_SUB_8X8(sub_mb_type)){
4666 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4668 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4669 }else if(IS_SUB_8X4(sub_mb_type)){
4670 mv_cache[ 1 ][0]= mx;
4671 mv_cache[ 1 ][1]= my;
4672 }else if(IS_SUB_4X8(sub_mb_type)){
4673 mv_cache[ 8 ][0]= mx;
4674 mv_cache[ 8 ][1]= my;
4676 mv_cache[ 0 ][0]= mx;
4677 mv_cache[ 0 ][1]= my;
4680 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4686 }else if(IS_DIRECT(mb_type)){
4687 pred_direct_motion(h, &mb_type);
4688 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4690 int list, mx, my, i;
4691 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4692 if(IS_16X16(mb_type)){
4693 for(list=0; list<h->list_count; list++){
4695 if(IS_DIR(mb_type, 0, list)){
4696 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4697 if(val >= h->ref_count[list]){
4698 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4702 val= LIST_NOT_USED&0xFF;
4703 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4705 for(list=0; list<h->list_count; list++){
4707 if(IS_DIR(mb_type, 0, list)){
4708 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4709 mx += get_se_golomb(&s->gb);
4710 my += get_se_golomb(&s->gb);
4711 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4713 val= pack16to32(mx,my);
4716 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4719 else if(IS_16X8(mb_type)){
4720 for(list=0; list<h->list_count; list++){
4723 if(IS_DIR(mb_type, i, list)){
4724 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4725 if(val >= h->ref_count[list]){
4726 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4730 val= LIST_NOT_USED&0xFF;
4731 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4734 for(list=0; list<h->list_count; list++){
4737 if(IS_DIR(mb_type, i, list)){
4738 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4739 mx += get_se_golomb(&s->gb);
4740 my += get_se_golomb(&s->gb);
4741 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4743 val= pack16to32(mx,my);
4746 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4750 assert(IS_8X16(mb_type));
4751 for(list=0; list<h->list_count; list++){
4754 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4755 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4756 if(val >= h->ref_count[list]){
4757 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4761 val= LIST_NOT_USED&0xFF;
4762 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4765 for(list=0; list<h->list_count; list++){
4768 if(IS_DIR(mb_type, i, list)){
4769 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4770 mx += get_se_golomb(&s->gb);
4771 my += get_se_golomb(&s->gb);
4772 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4774 val= pack16to32(mx,my);
4777 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4783 if(IS_INTER(mb_type))
4784 write_back_motion(h, mb_type);
4786 if(!IS_INTRA16x16(mb_type)){
4787 cbp= get_ue_golomb(&s->gb);
4789 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4793 if(IS_INTRA4x4(mb_type))
4794 cbp= golomb_to_intra4x4_cbp[cbp];
4796 cbp= golomb_to_inter_cbp[cbp];
4800 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4801 if(get_bits1(&s->gb))
4802 mb_type |= MB_TYPE_8x8DCT;
4804 s->current_picture.mb_type[mb_xy]= mb_type;
4806 if(cbp || IS_INTRA16x16(mb_type)){
4807 int i8x8, i4x4, chroma_idx;
4809 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4810 const uint8_t *scan, *scan8x8, *dc_scan;
4812 // fill_non_zero_count_cache(h);
4814 if(IS_INTERLACED(mb_type)){
4815 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4816 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4817 dc_scan= luma_dc_field_scan;
4819 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4820 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4821 dc_scan= luma_dc_zigzag_scan;
4824 dquant= get_se_golomb(&s->gb);
4826 if( dquant > 25 || dquant < -26 ){
4827 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4831 s->qscale += dquant;
4832 if(((unsigned)s->qscale) > 51){
4833 if(s->qscale<0) s->qscale+= 52;
4834 else s->qscale-= 52;
4837 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4838 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4839 if(IS_INTRA16x16(mb_type)){
4840 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4841 return -1; //FIXME continue if partitioned and other return -1 too
4844 assert((cbp&15) == 0 || (cbp&15) == 15);
4847 for(i8x8=0; i8x8<4; i8x8++){
4848 for(i4x4=0; i4x4<4; i4x4++){
4849 const int index= i4x4 + 4*i8x8;
4850 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4856 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4859 for(i8x8=0; i8x8<4; i8x8++){
4860 if(cbp & (1<<i8x8)){
4861 if(IS_8x8DCT(mb_type)){
4862 DCTELEM *buf = &h->mb[64*i8x8];
4864 for(i4x4=0; i4x4<4; i4x4++){
4865 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4866 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4869 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4870 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4872 for(i4x4=0; i4x4<4; i4x4++){
4873 const int index= i4x4 + 4*i8x8;
4875 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4881 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4882 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4888 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4889 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4895 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4896 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4897 for(i4x4=0; i4x4<4; i4x4++){
4898 const int index= 16 + 4*chroma_idx + i4x4;
4899 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4905 uint8_t * const nnz= &h->non_zero_count_cache[0];
4906 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4907 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4910 uint8_t * const nnz= &h->non_zero_count_cache[0];
4911 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4912 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4913 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4915 s->current_picture.qscale_table[mb_xy]= s->qscale;
4916 write_back_non_zero_count(h);
4919 h->ref_count[0] >>= 1;
4920 h->ref_count[1] >>= 1;
4926 static int decode_cabac_field_decoding_flag(H264Context *h) {
4927 MpegEncContext * const s = &h->s;
4928 const int mb_x = s->mb_x;
4929 const int mb_y = s->mb_y & ~1;
4930 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4931 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4933 unsigned int ctx = 0;
4935 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4938 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4942 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4945 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4946 uint8_t *state= &h->cabac_state[ctx_base];
4950 MpegEncContext * const s = &h->s;
4951 const int mba_xy = h->left_mb_xy[0];
4952 const int mbb_xy = h->top_mb_xy;
4954 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4956 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4958 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4959 return 0; /* I4x4 */
4962 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4963 return 0; /* I4x4 */
4966 if( get_cabac_terminate( &h->cabac ) )
4967 return 25; /* PCM */
4969 mb_type = 1; /* I16x16 */
4970 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4971 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4972 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4973 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4974 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4978 static int decode_cabac_mb_type( H264Context *h ) {
4979 MpegEncContext * const s = &h->s;
4981 if( h->slice_type == I_TYPE ) {
4982 return decode_cabac_intra_mb_type(h, 3, 1);
4983 } else if( h->slice_type == P_TYPE ) {
4984 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4986 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4987 /* P_L0_D16x16, P_8x8 */
4988 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4990 /* P_L0_D8x16, P_L0_D16x8 */
4991 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4994 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4996 } else if( h->slice_type == B_TYPE ) {
4997 const int mba_xy = h->left_mb_xy[0];
4998 const int mbb_xy = h->top_mb_xy;
5002 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5004 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5007 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5008 return 0; /* B_Direct_16x16 */
5010 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5011 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5014 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5015 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5016 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5017 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5019 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5020 else if( bits == 13 ) {
5021 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5022 } else if( bits == 14 )
5023 return 11; /* B_L1_L0_8x16 */
5024 else if( bits == 15 )
5025 return 22; /* B_8x8 */
5027 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5028 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5030 /* TODO SI/SP frames? */
5035 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5036 MpegEncContext * const s = &h->s;
5040 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5041 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5044 && h->slice_table[mba_xy] == h->slice_num
5045 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5046 mba_xy += s->mb_stride;
5048 mbb_xy = mb_xy - s->mb_stride;
5050 && h->slice_table[mbb_xy] == h->slice_num
5051 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5052 mbb_xy -= s->mb_stride;
5054 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5056 int mb_xy = mb_x + mb_y*s->mb_stride;
5058 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5061 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5063 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5066 if( h->slice_type == B_TYPE )
5068 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5071 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5074 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5077 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5078 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5079 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5081 if( mode >= pred_mode )
5087 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5088 const int mba_xy = h->left_mb_xy[0];
5089 const int mbb_xy = h->top_mb_xy;
5093 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5094 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5097 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5100 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5103 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5105 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5111 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5112 int cbp_b, cbp_a, ctx, cbp = 0;
5114 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5115 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5117 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5118 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5119 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5120 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5121 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5122 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5123 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5124 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5127 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5131 cbp_a = (h->left_cbp>>4)&0x03;
5132 cbp_b = (h-> top_cbp>>4)&0x03;
5135 if( cbp_a > 0 ) ctx++;
5136 if( cbp_b > 0 ) ctx += 2;
5137 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5141 if( cbp_a == 2 ) ctx++;
5142 if( cbp_b == 2 ) ctx += 2;
5143 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5145 static int decode_cabac_mb_dqp( H264Context *h) {
5149 if( h->last_qscale_diff != 0 )
5152 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5158 if(val > 102) //prevent infinite loop
5165 return -(val + 1)/2;
5167 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5168 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5170 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5172 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5176 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5178 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5179 return 0; /* B_Direct_8x8 */
5180 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5181 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5183 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5184 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5185 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5188 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5189 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5193 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5194 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5197 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5198 int refa = h->ref_cache[list][scan8[n] - 1];
5199 int refb = h->ref_cache[list][scan8[n] - 8];
5203 if( h->slice_type == B_TYPE) {
5204 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5206 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5215 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5221 if(ref >= 32 /*h->ref_list[list]*/){
5222 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5223 return 0; //FIXME we should return -1 and check the return everywhere
5229 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5230 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5231 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5232 int ctxbase = (l == 0) ? 40 : 47;
5237 else if( amvd > 32 )
5242 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5247 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5255 while( get_cabac_bypass( &h->cabac ) ) {
5259 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5264 if( get_cabac_bypass( &h->cabac ) )
5268 return get_cabac_bypass_sign( &h->cabac, -mvd );
5271 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5276 nza = h->left_cbp&0x100;
5277 nzb = h-> top_cbp&0x100;
5278 } else if( cat == 1 || cat == 2 ) {
5279 nza = h->non_zero_count_cache[scan8[idx] - 1];
5280 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5281 } else if( cat == 3 ) {
5282 nza = (h->left_cbp>>(6+idx))&0x01;
5283 nzb = (h-> top_cbp>>(6+idx))&0x01;
5286 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5287 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5296 return ctx + 4 * cat;
5299 DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8[63]) = {
5300 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5301 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5302 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5303 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5306 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5307 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5308 static const int significant_coeff_flag_offset[2][6] = {
5309 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5310 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5312 static const int last_coeff_flag_offset[2][6] = {
5313 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5314 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5316 static const int coeff_abs_level_m1_offset[6] = {
5317 227+0, 227+10, 227+20, 227+30, 227+39, 426
5319 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5320 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5321 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5322 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5323 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5324 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5325 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5326 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5327 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5333 int coeff_count = 0;
5336 int abslevelgt1 = 0;
5338 uint8_t *significant_coeff_ctx_base;
5339 uint8_t *last_coeff_ctx_base;
5340 uint8_t *abs_level_m1_ctx_base;
5343 #define CABAC_ON_STACK
5345 #ifdef CABAC_ON_STACK
5348 cc.range = h->cabac.range;
5349 cc.low = h->cabac.low;
5350 cc.bytestream= h->cabac.bytestream;
5352 #define CC &h->cabac
5356 /* cat: 0-> DC 16x16 n = 0
5357 * 1-> AC 16x16 n = luma4x4idx
5358 * 2-> Luma4x4 n = luma4x4idx
5359 * 3-> DC Chroma n = iCbCr
5360 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5361 * 5-> Luma8x8 n = 4 * luma8x8idx
5364 /* read coded block flag */
5366 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5367 if( cat == 1 || cat == 2 )
5368 h->non_zero_count_cache[scan8[n]] = 0;
5370 h->non_zero_count_cache[scan8[16+n]] = 0;
5371 #ifdef CABAC_ON_STACK
5372 h->cabac.range = cc.range ;
5373 h->cabac.low = cc.low ;
5374 h->cabac.bytestream= cc.bytestream;
5380 significant_coeff_ctx_base = h->cabac_state
5381 + significant_coeff_flag_offset[MB_FIELD][cat];
5382 last_coeff_ctx_base = h->cabac_state
5383 + last_coeff_flag_offset[MB_FIELD][cat];
5384 abs_level_m1_ctx_base = h->cabac_state
5385 + coeff_abs_level_m1_offset[cat];
5388 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5389 for(last= 0; last < coefs; last++) { \
5390 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5391 if( get_cabac( CC, sig_ctx )) { \
5392 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5393 index[coeff_count++] = last; \
5394 if( get_cabac( CC, last_ctx ) ) { \
5400 if( last == max_coeff -1 ) {\
5401 index[coeff_count++] = last;\
5403 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5404 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5405 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5407 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5409 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5411 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5414 assert(coeff_count > 0);
5417 h->cbp_table[mb_xy] |= 0x100;
5418 else if( cat == 1 || cat == 2 )
5419 h->non_zero_count_cache[scan8[n]] = coeff_count;
5421 h->cbp_table[mb_xy] |= 0x40 << n;
5423 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5426 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5429 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5430 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5431 int j= scantable[index[coeff_count]];
5433 if( get_cabac( CC, ctx ) == 0 ) {
5435 block[j] = get_cabac_bypass_sign( CC, -1);
5437 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5443 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5444 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5448 if( coeff_abs >= 15 ) {
5450 while( get_cabac_bypass( CC ) ) {
5456 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5462 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5463 else block[j] = coeff_abs;
5465 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5466 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5472 #ifdef CABAC_ON_STACK
5473 h->cabac.range = cc.range ;
5474 h->cabac.low = cc.low ;
5475 h->cabac.bytestream= cc.bytestream;
5480 static inline void compute_mb_neighbors(H264Context *h)
5482 MpegEncContext * const s = &h->s;
5483 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5484 h->top_mb_xy = mb_xy - s->mb_stride;
5485 h->left_mb_xy[0] = mb_xy - 1;
5487 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5488 const int top_pair_xy = pair_xy - s->mb_stride;
5489 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5490 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5491 const int curr_mb_frame_flag = !MB_FIELD;
5492 const int bottom = (s->mb_y & 1);
5494 ? !curr_mb_frame_flag // bottom macroblock
5495 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5497 h->top_mb_xy -= s->mb_stride;
5499 if (left_mb_frame_flag != curr_mb_frame_flag) {
5500 h->left_mb_xy[0] = pair_xy - 1;
5502 } else if (FIELD_PICTURE) {
5503 h->top_mb_xy -= s->mb_stride;
5509 * decodes a macroblock
5510 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5512 static int decode_mb_cabac(H264Context *h) {
5513 MpegEncContext * const s = &h->s;
5514 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5515 int mb_type, partition_count, cbp = 0;
5516 int dct8x8_allowed= h->pps.transform_8x8_mode;
5518 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5520 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5521 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5523 /* a skipped mb needs the aff flag from the following mb */
5524 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5525 predict_field_decoding_flag(h);
5526 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5527 skip = h->next_mb_skipped;
5529 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5530 /* read skip flags */
5532 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5533 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5534 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5535 if(h->next_mb_skipped)
5536 predict_field_decoding_flag(h);
5538 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5543 h->cbp_table[mb_xy] = 0;
5544 h->chroma_pred_mode_table[mb_xy] = 0;
5545 h->last_qscale_diff = 0;
5552 if( (s->mb_y&1) == 0 )
5554 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5556 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5558 h->prev_mb_skipped = 0;
5560 compute_mb_neighbors(h);
5561 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5562 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5566 if( h->slice_type == B_TYPE ) {
5568 partition_count= b_mb_type_info[mb_type].partition_count;
5569 mb_type= b_mb_type_info[mb_type].type;
5572 goto decode_intra_mb;
5574 } else if( h->slice_type == P_TYPE ) {
5576 partition_count= p_mb_type_info[mb_type].partition_count;
5577 mb_type= p_mb_type_info[mb_type].type;
5580 goto decode_intra_mb;
5583 assert(h->slice_type == I_TYPE);
5585 partition_count = 0;
5586 cbp= i_mb_type_info[mb_type].cbp;
5587 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5588 mb_type= i_mb_type_info[mb_type].type;
5591 mb_type |= MB_TYPE_INTERLACED;
5593 h->slice_table[ mb_xy ]= h->slice_num;
5595 if(IS_INTRA_PCM(mb_type)) {
5599 // We assume these blocks are very rare so we do not optimize it.
5600 // FIXME The two following lines get the bitstream position in the cabac
5601 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5602 ptr= h->cabac.bytestream;
5603 if(h->cabac.low&0x1) ptr--;
5605 if(h->cabac.low&0x1FF) ptr--;
5608 // The pixels are stored in the same order as levels in h->mb array.
5609 for(y=0; y<16; y++){
5610 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5611 for(x=0; x<16; x++){
5612 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5613 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5617 const int index= 256 + 4*(y&3) + 32*(y>>2);
5619 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5620 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5624 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5626 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5627 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5631 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5633 // All blocks are present
5634 h->cbp_table[mb_xy] = 0x1ef;
5635 h->chroma_pred_mode_table[mb_xy] = 0;
5636 // In deblocking, the quantizer is 0
5637 s->current_picture.qscale_table[mb_xy]= 0;
5638 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5639 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5640 // All coeffs are present
5641 memset(h->non_zero_count[mb_xy], 16, 16);
5642 s->current_picture.mb_type[mb_xy]= mb_type;
5647 h->ref_count[0] <<= 1;
5648 h->ref_count[1] <<= 1;
5651 fill_caches(h, mb_type, 0);
5653 if( IS_INTRA( mb_type ) ) {
5655 if( IS_INTRA4x4( mb_type ) ) {
5656 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5657 mb_type |= MB_TYPE_8x8DCT;
5658 for( i = 0; i < 16; i+=4 ) {
5659 int pred = pred_intra_mode( h, i );
5660 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5661 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5664 for( i = 0; i < 16; i++ ) {
5665 int pred = pred_intra_mode( h, i );
5666 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5668 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5671 write_back_intra_pred_mode(h);
5672 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5674 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5675 if( h->intra16x16_pred_mode < 0 ) return -1;
5677 h->chroma_pred_mode_table[mb_xy] =
5678 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5680 pred_mode= check_intra_pred_mode( h, pred_mode );
5681 if( pred_mode < 0 ) return -1;
5682 h->chroma_pred_mode= pred_mode;
5683 } else if( partition_count == 4 ) {
5684 int i, j, sub_partition_count[4], list, ref[2][4];
5686 if( h->slice_type == B_TYPE ) {
5687 for( i = 0; i < 4; i++ ) {
5688 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5689 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5690 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5692 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5693 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5694 pred_direct_motion(h, &mb_type);
5695 h->ref_cache[0][scan8[4]] =
5696 h->ref_cache[1][scan8[4]] =
5697 h->ref_cache[0][scan8[12]] =
5698 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5699 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5700 for( i = 0; i < 4; i++ )
5701 if( IS_DIRECT(h->sub_mb_type[i]) )
5702 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5706 for( i = 0; i < 4; i++ ) {
5707 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5708 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5709 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5713 for( list = 0; list < h->list_count; list++ ) {
5714 for( i = 0; i < 4; i++ ) {
5715 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5716 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5717 if( h->ref_count[list] > 1 )
5718 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5724 h->ref_cache[list][ scan8[4*i]+1 ]=
5725 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5730 dct8x8_allowed = get_dct8x8_allowed(h);
5732 for(list=0; list<h->list_count; list++){
5734 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5735 if(IS_DIRECT(h->sub_mb_type[i])){
5736 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5740 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5741 const int sub_mb_type= h->sub_mb_type[i];
5742 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5743 for(j=0; j<sub_partition_count[i]; j++){
5746 const int index= 4*i + block_width*j;
5747 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5748 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5749 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5751 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5752 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5753 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5755 if(IS_SUB_8X8(sub_mb_type)){
5757 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5759 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5762 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5764 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5765 }else if(IS_SUB_8X4(sub_mb_type)){
5766 mv_cache[ 1 ][0]= mx;
5767 mv_cache[ 1 ][1]= my;
5769 mvd_cache[ 1 ][0]= mx - mpx;
5770 mvd_cache[ 1 ][1]= my - mpy;
5771 }else if(IS_SUB_4X8(sub_mb_type)){
5772 mv_cache[ 8 ][0]= mx;
5773 mv_cache[ 8 ][1]= my;
5775 mvd_cache[ 8 ][0]= mx - mpx;
5776 mvd_cache[ 8 ][1]= my - mpy;
5778 mv_cache[ 0 ][0]= mx;
5779 mv_cache[ 0 ][1]= my;
5781 mvd_cache[ 0 ][0]= mx - mpx;
5782 mvd_cache[ 0 ][1]= my - mpy;
5785 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5786 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5787 p[0] = p[1] = p[8] = p[9] = 0;
5788 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5792 } else if( IS_DIRECT(mb_type) ) {
5793 pred_direct_motion(h, &mb_type);
5794 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5795 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5796 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5798 int list, mx, my, i, mpx, mpy;
5799 if(IS_16X16(mb_type)){
5800 for(list=0; list<h->list_count; list++){
5801 if(IS_DIR(mb_type, 0, list)){
5802 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5803 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5805 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5807 for(list=0; list<h->list_count; list++){
5808 if(IS_DIR(mb_type, 0, list)){
5809 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5811 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5812 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5813 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5815 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5816 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5818 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5821 else if(IS_16X8(mb_type)){
5822 for(list=0; list<h->list_count; list++){
5824 if(IS_DIR(mb_type, i, list)){
5825 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5826 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5828 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5831 for(list=0; list<h->list_count; list++){
5833 if(IS_DIR(mb_type, i, list)){
5834 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5835 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5836 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5837 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5839 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5840 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5842 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5843 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5848 assert(IS_8X16(mb_type));
5849 for(list=0; list<h->list_count; list++){
5851 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5852 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5853 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5855 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5858 for(list=0; list<h->list_count; list++){
5860 if(IS_DIR(mb_type, i, list)){
5861 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5862 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5863 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5865 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5866 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5867 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5869 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5870 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5877 if( IS_INTER( mb_type ) ) {
5878 h->chroma_pred_mode_table[mb_xy] = 0;
5879 write_back_motion( h, mb_type );
5882 if( !IS_INTRA16x16( mb_type ) ) {
5883 cbp = decode_cabac_mb_cbp_luma( h );
5884 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5887 h->cbp_table[mb_xy] = h->cbp = cbp;
5889 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5890 if( decode_cabac_mb_transform_size( h ) )
5891 mb_type |= MB_TYPE_8x8DCT;
5893 s->current_picture.mb_type[mb_xy]= mb_type;
5895 if( cbp || IS_INTRA16x16( mb_type ) ) {
5896 const uint8_t *scan, *scan8x8, *dc_scan;
5897 const uint32_t *qmul;
5900 if(IS_INTERLACED(mb_type)){
5901 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5902 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5903 dc_scan= luma_dc_field_scan;
5905 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5906 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5907 dc_scan= luma_dc_zigzag_scan;
5910 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5911 if( dqp == INT_MIN ){
5912 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5916 if(((unsigned)s->qscale) > 51){
5917 if(s->qscale<0) s->qscale+= 52;
5918 else s->qscale-= 52;
5920 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5921 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5923 if( IS_INTRA16x16( mb_type ) ) {
5925 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5926 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5929 qmul = h->dequant4_coeff[0][s->qscale];
5930 for( i = 0; i < 16; i++ ) {
5931 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5932 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5935 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5939 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5940 if( cbp & (1<<i8x8) ) {
5941 if( IS_8x8DCT(mb_type) ) {
5942 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5943 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5945 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5946 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5947 const int index = 4*i8x8 + i4x4;
5948 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5950 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5951 //STOP_TIMER("decode_residual")
5955 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5956 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5963 for( c = 0; c < 2; c++ ) {
5964 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5965 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5971 for( c = 0; c < 2; c++ ) {
5972 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5973 for( i = 0; i < 4; i++ ) {
5974 const int index = 16 + 4 * c + i;
5975 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5976 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5980 uint8_t * const nnz= &h->non_zero_count_cache[0];
5981 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5982 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5985 uint8_t * const nnz= &h->non_zero_count_cache[0];
5986 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5987 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5988 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5989 h->last_qscale_diff = 0;
5992 s->current_picture.qscale_table[mb_xy]= s->qscale;
5993 write_back_non_zero_count(h);
5996 h->ref_count[0] >>= 1;
5997 h->ref_count[1] >>= 1;
6004 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6006 const int index_a = qp + h->slice_alpha_c0_offset;
6007 const int alpha = (alpha_table+52)[index_a];
6008 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6013 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6014 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6016 /* 16px edge length, because bS=4 is triggered by being at
6017 * the edge of an intra MB, so all 4 bS are the same */
6018 for( d = 0; d < 16; d++ ) {
6019 const int p0 = pix[-1];
6020 const int p1 = pix[-2];
6021 const int p2 = pix[-3];
6023 const int q0 = pix[0];
6024 const int q1 = pix[1];
6025 const int q2 = pix[2];
6027 if( FFABS( p0 - q0 ) < alpha &&
6028 FFABS( p1 - p0 ) < beta &&
6029 FFABS( q1 - q0 ) < beta ) {
6031 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6032 if( FFABS( p2 - p0 ) < beta)
6034 const int p3 = pix[-4];
6036 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6037 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6038 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6041 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6043 if( FFABS( q2 - q0 ) < beta)
6045 const int q3 = pix[3];
6047 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6048 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6049 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6052 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6056 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6057 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6059 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6065 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6067 const int index_a = qp + h->slice_alpha_c0_offset;
6068 const int alpha = (alpha_table+52)[index_a];
6069 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6074 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6075 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6077 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6081 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6083 for( i = 0; i < 16; i++, pix += stride) {
6089 int bS_index = (i >> 1);
6092 bS_index |= (i & 1);
6095 if( bS[bS_index] == 0 ) {
6099 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6100 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6101 alpha = (alpha_table+52)[index_a];
6102 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6104 if( bS[bS_index] < 4 ) {
6105 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6106 const int p0 = pix[-1];
6107 const int p1 = pix[-2];
6108 const int p2 = pix[-3];
6109 const int q0 = pix[0];
6110 const int q1 = pix[1];
6111 const int q2 = pix[2];
6113 if( FFABS( p0 - q0 ) < alpha &&
6114 FFABS( p1 - p0 ) < beta &&
6115 FFABS( q1 - q0 ) < beta ) {
6119 if( FFABS( p2 - p0 ) < beta ) {
6120 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6123 if( FFABS( q2 - q0 ) < beta ) {
6124 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6128 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6129 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6130 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6131 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6134 const int p0 = pix[-1];
6135 const int p1 = pix[-2];
6136 const int p2 = pix[-3];
6138 const int q0 = pix[0];
6139 const int q1 = pix[1];
6140 const int q2 = pix[2];
6142 if( FFABS( p0 - q0 ) < alpha &&
6143 FFABS( p1 - p0 ) < beta &&
6144 FFABS( q1 - q0 ) < beta ) {
6146 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6147 if( FFABS( p2 - p0 ) < beta)
6149 const int p3 = pix[-4];
6151 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6152 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6153 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6156 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6158 if( FFABS( q2 - q0 ) < beta)
6160 const int q3 = pix[3];
6162 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6163 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6164 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6167 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6171 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6172 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6174 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6179 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6181 for( i = 0; i < 8; i++, pix += stride) {
6189 if( bS[bS_index] == 0 ) {
6193 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6194 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6195 alpha = (alpha_table+52)[index_a];
6196 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6198 if( bS[bS_index] < 4 ) {
6199 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6200 const int p0 = pix[-1];
6201 const int p1 = pix[-2];
6202 const int q0 = pix[0];
6203 const int q1 = pix[1];
6205 if( FFABS( p0 - q0 ) < alpha &&
6206 FFABS( p1 - p0 ) < beta &&
6207 FFABS( q1 - q0 ) < beta ) {
6208 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6210 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6211 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6212 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6215 const int p0 = pix[-1];
6216 const int p1 = pix[-2];
6217 const int q0 = pix[0];
6218 const int q1 = pix[1];
6220 if( FFABS( p0 - q0 ) < alpha &&
6221 FFABS( p1 - p0 ) < beta &&
6222 FFABS( q1 - q0 ) < beta ) {
6224 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6225 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6226 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6232 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6234 const int index_a = qp + h->slice_alpha_c0_offset;
6235 const int alpha = (alpha_table+52)[index_a];
6236 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6237 const int pix_next = stride;
6242 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6243 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6245 /* 16px edge length, see filter_mb_edgev */
6246 for( d = 0; d < 16; d++ ) {
6247 const int p0 = pix[-1*pix_next];
6248 const int p1 = pix[-2*pix_next];
6249 const int p2 = pix[-3*pix_next];
6250 const int q0 = pix[0];
6251 const int q1 = pix[1*pix_next];
6252 const int q2 = pix[2*pix_next];
6254 if( FFABS( p0 - q0 ) < alpha &&
6255 FFABS( p1 - p0 ) < beta &&
6256 FFABS( q1 - q0 ) < beta ) {
6258 const int p3 = pix[-4*pix_next];
6259 const int q3 = pix[ 3*pix_next];
6261 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6262 if( FFABS( p2 - p0 ) < beta) {
6264 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6265 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6266 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6269 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6271 if( FFABS( q2 - q0 ) < beta) {
6273 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6274 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6275 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6278 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6282 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6283 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6285 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6292 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6294 const int index_a = qp + h->slice_alpha_c0_offset;
6295 const int alpha = (alpha_table+52)[index_a];
6296 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6301 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6302 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6304 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6308 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6309 MpegEncContext * const s = &h->s;
6310 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6312 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6314 mb_xy = mb_x + mb_y*s->mb_stride;
6316 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6317 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6318 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6319 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6322 assert(!FRAME_MBAFF);
6324 mb_type = s->current_picture.mb_type[mb_xy];
6325 qp = s->current_picture.qscale_table[mb_xy];
6326 qp0 = s->current_picture.qscale_table[mb_xy-1];
6327 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6328 qpc = get_chroma_qp( h, 0, qp );
6329 qpc0 = get_chroma_qp( h, 0, qp0 );
6330 qpc1 = get_chroma_qp( h, 0, qp1 );
6331 qp0 = (qp + qp0 + 1) >> 1;
6332 qp1 = (qp + qp1 + 1) >> 1;
6333 qpc0 = (qpc + qpc0 + 1) >> 1;
6334 qpc1 = (qpc + qpc1 + 1) >> 1;
6335 qp_thresh = 15 - h->slice_alpha_c0_offset;
6336 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6337 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6340 if( IS_INTRA(mb_type) ) {
6341 int16_t bS4[4] = {4,4,4,4};
6342 int16_t bS3[4] = {3,3,3,3};
6343 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6344 if( IS_8x8DCT(mb_type) ) {
6345 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6346 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6347 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6348 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6350 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6351 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6352 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6353 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6354 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6355 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6356 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6357 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6359 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6360 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6361 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6362 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6363 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6364 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6365 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6366 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6369 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6370 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6372 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6374 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6376 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6377 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6378 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6379 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6381 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6382 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6383 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6384 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6386 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6387 bSv[0][0] = 0x0004000400040004ULL;
6388 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6389 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6391 #define FILTER(hv,dir,edge)\
6392 if(bSv[dir][edge]) {\
6393 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6395 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6396 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6402 } else if( IS_8x8DCT(mb_type) ) {
6421 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6422 MpegEncContext * const s = &h->s;
6423 const int mb_xy= mb_x + mb_y*s->mb_stride;
6424 const int mb_type = s->current_picture.mb_type[mb_xy];
6425 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6426 int first_vertical_edge_done = 0;
6428 /* FIXME: A given frame may occupy more than one position in
6429 * the reference list. So ref2frm should be populated with
6430 * frame numbers, not indices. */
6431 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6432 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6434 //for sufficiently low qp, filtering wouldn't do anything
6435 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6437 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6438 int qp = s->current_picture.qscale_table[mb_xy];
6440 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6441 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6447 // left mb is in picture
6448 && h->slice_table[mb_xy-1] != 255
6449 // and current and left pair do not have the same interlaced type
6450 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6451 // and left mb is in the same slice if deblocking_filter == 2
6452 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6453 /* First vertical edge is different in MBAFF frames
6454 * There are 8 different bS to compute and 2 different Qp
6456 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6457 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6462 int mb_qp, mbn0_qp, mbn1_qp;
6464 first_vertical_edge_done = 1;
6466 if( IS_INTRA(mb_type) )
6467 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6469 for( i = 0; i < 8; i++ ) {
6470 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6472 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6474 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6475 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6476 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6483 mb_qp = s->current_picture.qscale_table[mb_xy];
6484 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6485 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6486 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6487 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6488 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6489 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6490 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6491 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6492 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6493 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6494 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6495 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6498 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6499 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6500 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6501 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6502 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6504 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6505 for( dir = 0; dir < 2; dir++ )
6508 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6509 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6510 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6512 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6513 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6514 // how often to recheck mv-based bS when iterating between edges
6515 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6516 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6517 // how often to recheck mv-based bS when iterating along each edge
6518 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6520 if (first_vertical_edge_done) {
6522 first_vertical_edge_done = 0;
6525 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6528 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6529 && !IS_INTERLACED(mb_type)
6530 && IS_INTERLACED(mbm_type)
6532 // This is a special case in the norm where the filtering must
6533 // be done twice (one each of the field) even if we are in a
6534 // frame macroblock.
6536 static const int nnz_idx[4] = {4,5,6,3};
6537 unsigned int tmp_linesize = 2 * linesize;
6538 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6539 int mbn_xy = mb_xy - 2 * s->mb_stride;
6544 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6545 if( IS_INTRA(mb_type) ||
6546 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6547 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6549 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6550 for( i = 0; i < 4; i++ ) {
6551 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6552 mbn_nnz[nnz_idx[i]] != 0 )
6558 // Do not use s->qscale as luma quantizer because it has not the same
6559 // value in IPCM macroblocks.
6560 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6561 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6562 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6563 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6564 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6565 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6566 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6567 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6574 for( edge = start; edge < edges; edge++ ) {
6575 /* mbn_xy: neighbor macroblock */
6576 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6577 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6581 if( (edge&1) && IS_8x8DCT(mb_type) )
6584 if( IS_INTRA(mb_type) ||
6585 IS_INTRA(mbn_type) ) {
6588 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6589 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6598 bS[0] = bS[1] = bS[2] = bS[3] = value;
6603 if( edge & mask_edge ) {
6604 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6607 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6608 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6611 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6612 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6613 int bn_idx= b_idx - (dir ? 8:1);
6615 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6616 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6617 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6618 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6620 bS[0] = bS[1] = bS[2] = bS[3] = v;
6626 for( i = 0; i < 4; i++ ) {
6627 int x = dir == 0 ? edge : i;
6628 int y = dir == 0 ? i : edge;
6629 int b_idx= 8 + 4 + x + 8*y;
6630 int bn_idx= b_idx - (dir ? 8:1);
6632 if( h->non_zero_count_cache[b_idx] != 0 ||
6633 h->non_zero_count_cache[bn_idx] != 0 ) {
6639 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6640 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6641 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6642 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6650 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6655 // Do not use s->qscale as luma quantizer because it has not the same
6656 // value in IPCM macroblocks.
6657 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6658 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6659 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6660 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6662 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6663 if( (edge&1) == 0 ) {
6664 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6665 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6666 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6667 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6670 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6671 if( (edge&1) == 0 ) {
6672 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6673 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6674 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6675 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6682 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6683 MpegEncContext * const s = &h->s;
6684 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6688 if( h->pps.cabac ) {
6692 align_get_bits( &s->gb );
6695 ff_init_cabac_states( &h->cabac);
6696 ff_init_cabac_decoder( &h->cabac,
6697 s->gb.buffer + get_bits_count(&s->gb)/8,
6698 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6699 /* calculate pre-state */
6700 for( i= 0; i < 460; i++ ) {
6702 if( h->slice_type == I_TYPE )
6703 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6705 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6708 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6710 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6715 int ret = decode_mb_cabac(h);
6717 //STOP_TIMER("decode_mb_cabac")
6719 if(ret>=0) hl_decode_mb(h);
6721 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6724 if(ret>=0) ret = decode_mb_cabac(h);
6726 if(ret>=0) hl_decode_mb(h);
6729 eos = get_cabac_terminate( &h->cabac );
6731 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6732 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6733 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6737 if( ++s->mb_x >= s->mb_width ) {
6739 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6741 if(FIELD_OR_MBAFF_PICTURE) {
6746 if( eos || s->mb_y >= s->mb_height ) {
6747 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6748 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6755 int ret = decode_mb_cavlc(h);
6757 if(ret>=0) hl_decode_mb(h);
6759 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6761 ret = decode_mb_cavlc(h);
6763 if(ret>=0) hl_decode_mb(h);
6768 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6769 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6774 if(++s->mb_x >= s->mb_width){
6776 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6778 if(FIELD_OR_MBAFF_PICTURE) {
6781 if(s->mb_y >= s->mb_height){
6782 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6784 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6785 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6789 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6796 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6797 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6798 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6799 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6803 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6812 for(;s->mb_y < s->mb_height; s->mb_y++){
6813 for(;s->mb_x < s->mb_width; s->mb_x++){
6814 int ret= decode_mb(h);
6819 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6820 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6825 if(++s->mb_x >= s->mb_width){
6827 if(++s->mb_y >= s->mb_height){
6828 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6829 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6833 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6840 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6841 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6842 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6846 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6853 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6856 return -1; //not reached
6859 static int decode_unregistered_user_data(H264Context *h, int size){
6860 MpegEncContext * const s = &h->s;
6861 uint8_t user_data[16+256];
6867 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6868 user_data[i]= get_bits(&s->gb, 8);
6872 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6873 if(e==1 && build>=0)
6874 h->x264_build= build;
6876 if(s->avctx->debug & FF_DEBUG_BUGS)
6877 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6880 skip_bits(&s->gb, 8);
6885 static int decode_sei(H264Context *h){
6886 MpegEncContext * const s = &h->s;
6888 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6893 type+= show_bits(&s->gb, 8);
6894 }while(get_bits(&s->gb, 8) == 255);
6898 size+= show_bits(&s->gb, 8);
6899 }while(get_bits(&s->gb, 8) == 255);
6903 if(decode_unregistered_user_data(h, size) < 0)
6907 skip_bits(&s->gb, 8*size);
6910 //FIXME check bits here
6911 align_get_bits(&s->gb);
6917 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6918 MpegEncContext * const s = &h->s;
6920 cpb_count = get_ue_golomb(&s->gb) + 1;
6921 get_bits(&s->gb, 4); /* bit_rate_scale */
6922 get_bits(&s->gb, 4); /* cpb_size_scale */
6923 for(i=0; i<cpb_count; i++){
6924 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6925 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6926 get_bits1(&s->gb); /* cbr_flag */
6928 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6929 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6930 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6931 get_bits(&s->gb, 5); /* time_offset_length */
6934 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6935 MpegEncContext * const s = &h->s;
6936 int aspect_ratio_info_present_flag;
6937 unsigned int aspect_ratio_idc;
6938 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6940 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6942 if( aspect_ratio_info_present_flag ) {
6943 aspect_ratio_idc= get_bits(&s->gb, 8);
6944 if( aspect_ratio_idc == EXTENDED_SAR ) {
6945 sps->sar.num= get_bits(&s->gb, 16);
6946 sps->sar.den= get_bits(&s->gb, 16);
6947 }else if(aspect_ratio_idc < 14){
6948 sps->sar= pixel_aspect[aspect_ratio_idc];
6950 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6957 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6959 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6960 get_bits1(&s->gb); /* overscan_appropriate_flag */
6963 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6964 get_bits(&s->gb, 3); /* video_format */
6965 get_bits1(&s->gb); /* video_full_range_flag */
6966 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6967 get_bits(&s->gb, 8); /* colour_primaries */
6968 get_bits(&s->gb, 8); /* transfer_characteristics */
6969 get_bits(&s->gb, 8); /* matrix_coefficients */
6973 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6974 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6975 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6978 sps->timing_info_present_flag = get_bits1(&s->gb);
6979 if(sps->timing_info_present_flag){
6980 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6981 sps->time_scale = get_bits_long(&s->gb, 32);
6982 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6985 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6986 if(nal_hrd_parameters_present_flag)
6987 decode_hrd_parameters(h, sps);
6988 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6989 if(vcl_hrd_parameters_present_flag)
6990 decode_hrd_parameters(h, sps);
6991 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6992 get_bits1(&s->gb); /* low_delay_hrd_flag */
6993 get_bits1(&s->gb); /* pic_struct_present_flag */
6995 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6996 if(sps->bitstream_restriction_flag){
6997 unsigned int num_reorder_frames;
6998 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6999 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7000 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7001 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7002 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7003 num_reorder_frames= get_ue_golomb(&s->gb);
7004 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7006 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7007 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7011 sps->num_reorder_frames= num_reorder_frames;
7017 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7018 const uint8_t *jvt_list, const uint8_t *fallback_list){
7019 MpegEncContext * const s = &h->s;
7020 int i, last = 8, next = 8;
7021 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7022 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7023 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7025 for(i=0;i<size;i++){
7027 next = (last + get_se_golomb(&s->gb)) & 0xff;
7028 if(!i && !next){ /* matrix not written, we use the preset one */
7029 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7032 last = factors[scan[i]] = next ? next : last;
7036 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7037 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7038 MpegEncContext * const s = &h->s;
7039 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7040 const uint8_t *fallback[4] = {
7041 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7042 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7043 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7044 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7046 if(get_bits1(&s->gb)){
7047 sps->scaling_matrix_present |= is_sps;
7048 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7049 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7050 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7051 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7052 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7053 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7054 if(is_sps || pps->transform_8x8_mode){
7055 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7056 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7058 } else if(fallback_sps) {
7059 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7060 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7065 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7068 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7069 const size_t size, const char *name)
7072 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7077 vec[id] = av_mallocz(size);
7079 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7084 static inline int decode_seq_parameter_set(H264Context *h){
7085 MpegEncContext * const s = &h->s;
7086 int profile_idc, level_idc;
7087 unsigned int sps_id, tmp, mb_width, mb_height;
7091 profile_idc= get_bits(&s->gb, 8);
7092 get_bits1(&s->gb); //constraint_set0_flag
7093 get_bits1(&s->gb); //constraint_set1_flag
7094 get_bits1(&s->gb); //constraint_set2_flag
7095 get_bits1(&s->gb); //constraint_set3_flag
7096 get_bits(&s->gb, 4); // reserved
7097 level_idc= get_bits(&s->gb, 8);
7098 sps_id= get_ue_golomb(&s->gb);
7100 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7104 sps->profile_idc= profile_idc;
7105 sps->level_idc= level_idc;
7107 if(sps->profile_idc >= 100){ //high profile
7108 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7109 get_bits1(&s->gb); //residual_color_transform_flag
7110 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7111 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7112 sps->transform_bypass = get_bits1(&s->gb);
7113 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7115 sps->scaling_matrix_present = 0;
7117 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7118 sps->poc_type= get_ue_golomb(&s->gb);
7120 if(sps->poc_type == 0){ //FIXME #define
7121 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7122 } else if(sps->poc_type == 1){//FIXME #define
7123 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7124 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7125 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7126 tmp= get_ue_golomb(&s->gb);
7128 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7129 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7132 sps->poc_cycle_length= tmp;
7134 for(i=0; i<sps->poc_cycle_length; i++)
7135 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7136 }else if(sps->poc_type != 2){
7137 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7141 tmp= get_ue_golomb(&s->gb);
7142 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7143 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7146 sps->ref_frame_count= tmp;
7147 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7148 mb_width= get_ue_golomb(&s->gb) + 1;
7149 mb_height= get_ue_golomb(&s->gb) + 1;
7150 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7151 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7152 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7155 sps->mb_width = mb_width;
7156 sps->mb_height= mb_height;
7158 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7159 if(!sps->frame_mbs_only_flag)
7160 sps->mb_aff= get_bits1(&s->gb);
7164 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7166 #ifndef ALLOW_INTERLACE
7168 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7170 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7171 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7173 sps->crop= get_bits1(&s->gb);
7175 sps->crop_left = get_ue_golomb(&s->gb);
7176 sps->crop_right = get_ue_golomb(&s->gb);
7177 sps->crop_top = get_ue_golomb(&s->gb);
7178 sps->crop_bottom= get_ue_golomb(&s->gb);
7179 if(sps->crop_left || sps->crop_top){
7180 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7186 sps->crop_bottom= 0;
7189 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7190 if( sps->vui_parameters_present_flag )
7191 decode_vui_parameters(h, sps);
7193 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7194 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7195 sps_id, sps->profile_idc, sps->level_idc,
7197 sps->ref_frame_count,
7198 sps->mb_width, sps->mb_height,
7199 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7200 sps->direct_8x8_inference_flag ? "8B8" : "",
7201 sps->crop_left, sps->crop_right,
7202 sps->crop_top, sps->crop_bottom,
7203 sps->vui_parameters_present_flag ? "VUI" : ""
7210 build_qp_table(PPS *pps, int t, int index)
7213 for(i = 0; i < 255; i++)
7214 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7217 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7218 MpegEncContext * const s = &h->s;
7219 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7222 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7226 tmp= get_ue_golomb(&s->gb);
7227 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7228 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7233 pps->cabac= get_bits1(&s->gb);
7234 pps->pic_order_present= get_bits1(&s->gb);
7235 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7236 if(pps->slice_group_count > 1 ){
7237 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7238 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7239 switch(pps->mb_slice_group_map_type){
7242 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7243 | run_length[ i ] |1 |ue(v) |
7248 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7250 | top_left_mb[ i ] |1 |ue(v) |
7251 | bottom_right_mb[ i ] |1 |ue(v) |
7259 | slice_group_change_direction_flag |1 |u(1) |
7260 | slice_group_change_rate_minus1 |1 |ue(v) |
7265 | slice_group_id_cnt_minus1 |1 |ue(v) |
7266 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7268 | slice_group_id[ i ] |1 |u(v) |
7273 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7274 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7275 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7276 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7277 pps->ref_count[0]= pps->ref_count[1]= 1;
7281 pps->weighted_pred= get_bits1(&s->gb);
7282 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7283 pps->init_qp= get_se_golomb(&s->gb) + 26;
7284 pps->init_qs= get_se_golomb(&s->gb) + 26;
7285 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7286 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7287 pps->constrained_intra_pred= get_bits1(&s->gb);
7288 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7290 pps->transform_8x8_mode= 0;
7291 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7292 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7293 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7295 if(get_bits_count(&s->gb) < bit_length){
7296 pps->transform_8x8_mode= get_bits1(&s->gb);
7297 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7298 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7300 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7303 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7304 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7305 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7306 h->pps.chroma_qp_diff= 1;
7308 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7310 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7311 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7312 pps_id, pps->sps_id,
7313 pps->cabac ? "CABAC" : "CAVLC",
7314 pps->slice_group_count,
7315 pps->ref_count[0], pps->ref_count[1],
7316 pps->weighted_pred ? "weighted" : "",
7317 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7318 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7319 pps->constrained_intra_pred ? "CONSTR" : "",
7320 pps->redundant_pic_cnt_present ? "REDU" : "",
7321 pps->transform_8x8_mode ? "8x8DCT" : ""
7329 * Call decode_slice() for each context.
7331 * @param h h264 master context
7332 * @param context_count number of contexts to execute
7334 static void execute_decode_slices(H264Context *h, int context_count){
7335 MpegEncContext * const s = &h->s;
7336 AVCodecContext * const avctx= s->avctx;
7340 if(context_count == 1) {
7341 decode_slice(avctx, h);
7343 for(i = 1; i < context_count; i++) {
7344 hx = h->thread_context[i];
7345 hx->s.error_resilience = avctx->error_resilience;
7346 hx->s.error_count = 0;
7349 avctx->execute(avctx, (void *)decode_slice,
7350 (void **)h->thread_context, NULL, context_count);
7352 /* pull back stuff from slices to master context */
7353 hx = h->thread_context[context_count - 1];
7354 s->mb_x = hx->s.mb_x;
7355 s->mb_y = hx->s.mb_y;
7356 s->dropable = hx->s.dropable;
7357 s->picture_structure = hx->s.picture_structure;
7358 for(i = 1; i < context_count; i++)
7359 h->s.error_count += h->thread_context[i]->s.error_count;
7364 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7365 MpegEncContext * const s = &h->s;
7366 AVCodecContext * const avctx= s->avctx;
7368 H264Context *hx; ///< thread context
7369 int context_count = 0;
7371 h->max_contexts = avctx->thread_count;
7374 for(i=0; i<50; i++){
7375 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7378 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7379 h->current_slice = 0;
7380 if (!s->first_field)
7381 s->current_picture_ptr= NULL;
7393 if(buf_index >= buf_size) break;
7395 for(i = 0; i < h->nal_length_size; i++)
7396 nalsize = (nalsize << 8) | buf[buf_index++];
7397 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7402 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7407 // start code prefix search
7408 for(; buf_index + 3 < buf_size; buf_index++){
7409 // This should always succeed in the first iteration.
7410 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7414 if(buf_index+3 >= buf_size) break;
7419 hx = h->thread_context[context_count];
7421 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7422 if (ptr==NULL || dst_length < 0){
7425 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7427 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7429 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7430 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7433 if (h->is_avc && (nalsize != consumed))
7434 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7436 buf_index += consumed;
7438 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7439 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7444 switch(hx->nal_unit_type){
7446 if (h->nal_unit_type != NAL_IDR_SLICE) {
7447 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7450 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7452 init_get_bits(&hx->s.gb, ptr, bit_length);
7454 hx->inter_gb_ptr= &hx->s.gb;
7455 hx->s.data_partitioning = 0;
7457 if((err = decode_slice_header(hx, h)))
7460 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7461 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7462 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7463 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7464 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7465 && avctx->skip_frame < AVDISCARD_ALL)
7469 init_get_bits(&hx->s.gb, ptr, bit_length);
7471 hx->inter_gb_ptr= NULL;
7472 hx->s.data_partitioning = 1;
7474 err = decode_slice_header(hx, h);
7477 init_get_bits(&hx->intra_gb, ptr, bit_length);
7478 hx->intra_gb_ptr= &hx->intra_gb;
7481 init_get_bits(&hx->inter_gb, ptr, bit_length);
7482 hx->inter_gb_ptr= &hx->inter_gb;
7484 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7485 && s->context_initialized
7487 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7488 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7489 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7490 && avctx->skip_frame < AVDISCARD_ALL)
7494 init_get_bits(&s->gb, ptr, bit_length);
7498 init_get_bits(&s->gb, ptr, bit_length);
7499 decode_seq_parameter_set(h);
7501 if(s->flags& CODEC_FLAG_LOW_DELAY)
7504 if(avctx->has_b_frames < 2)
7505 avctx->has_b_frames= !s->low_delay;
7508 init_get_bits(&s->gb, ptr, bit_length);
7510 decode_picture_parameter_set(h, bit_length);
7514 case NAL_END_SEQUENCE:
7515 case NAL_END_STREAM:
7516 case NAL_FILLER_DATA:
7518 case NAL_AUXILIARY_SLICE:
7521 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7524 if(context_count == h->max_contexts) {
7525 execute_decode_slices(h, context_count);
7530 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7532 /* Slice could not be decoded in parallel mode, copy down
7533 * NAL unit stuff to context 0 and restart. Note that
7534 * rbsp_buffer is not transfered, but since we no longer
7535 * run in parallel mode this should not be an issue. */
7536 h->nal_unit_type = hx->nal_unit_type;
7537 h->nal_ref_idc = hx->nal_ref_idc;
7543 execute_decode_slices(h, context_count);
7548 * returns the number of bytes consumed for building the current frame
7550 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7551 if(s->flags&CODEC_FLAG_TRUNCATED){
7552 pos -= s->parse_context.last_index;
7553 if(pos<0) pos=0; // FIXME remove (unneeded?)
7557 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7558 if(pos+10>buf_size) pos=buf_size; // oops ;)
7564 static int decode_frame(AVCodecContext *avctx,
7565 void *data, int *data_size,
7566 const uint8_t *buf, int buf_size)
7568 H264Context *h = avctx->priv_data;
7569 MpegEncContext *s = &h->s;
7570 AVFrame *pict = data;
7573 s->flags= avctx->flags;
7574 s->flags2= avctx->flags2;
7576 /* no supplementary picture */
7577 if (buf_size == 0) {
7581 //FIXME factorize this with the output code below
7582 out = h->delayed_pic[0];
7584 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7585 if(h->delayed_pic[i]->poc < out->poc){
7586 out = h->delayed_pic[i];
7590 for(i=out_idx; h->delayed_pic[i]; i++)
7591 h->delayed_pic[i] = h->delayed_pic[i+1];
7594 *data_size = sizeof(AVFrame);
7595 *pict= *(AVFrame*)out;
7601 if(s->flags&CODEC_FLAG_TRUNCATED){
7602 int next= ff_h264_find_frame_end(h, buf, buf_size);
7604 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7606 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7609 if(h->is_avc && !h->got_avcC) {
7610 int i, cnt, nalsize;
7611 unsigned char *p = avctx->extradata;
7612 if(avctx->extradata_size < 7) {
7613 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7617 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7620 /* sps and pps in the avcC always have length coded with 2 bytes,
7621 so put a fake nal_length_size = 2 while parsing them */
7622 h->nal_length_size = 2;
7623 // Decode sps from avcC
7624 cnt = *(p+5) & 0x1f; // Number of sps
7626 for (i = 0; i < cnt; i++) {
7627 nalsize = AV_RB16(p) + 2;
7628 if(decode_nal_units(h, p, nalsize) < 0) {
7629 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7634 // Decode pps from avcC
7635 cnt = *(p++); // Number of pps
7636 for (i = 0; i < cnt; i++) {
7637 nalsize = AV_RB16(p) + 2;
7638 if(decode_nal_units(h, p, nalsize) != nalsize) {
7639 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7644 // Now store right nal length size, that will be use to parse all other nals
7645 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7646 // Do not reparse avcC
7650 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7651 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7655 buf_index=decode_nal_units(h, buf, buf_size);
7659 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7660 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7661 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7665 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7666 Picture *out = s->current_picture_ptr;
7667 Picture *cur = s->current_picture_ptr;
7668 Picture *prev = h->delayed_output_pic;
7669 int i, pics, cross_idr, out_of_order, out_idx;
7673 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7674 s->current_picture_ptr->pict_type= s->pict_type;
7676 h->prev_frame_num_offset= h->frame_num_offset;
7677 h->prev_frame_num= h->frame_num;
7679 h->prev_poc_msb= h->poc_msb;
7680 h->prev_poc_lsb= h->poc_lsb;
7681 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7685 * FIXME: Error handling code does not seem to support interlaced
7686 * when slices span multiple rows
7687 * The ff_er_add_slice calls don't work right for bottom
7688 * fields; they cause massive erroneous error concealing
7689 * Error marking covers both fields (top and bottom).
7690 * This causes a mismatched s->error_count
7691 * and a bad error table. Further, the error count goes to
7692 * INT_MAX when called for bottom field, because mb_y is
7693 * past end by one (callers fault) and resync_mb_y != 0
7694 * causes problems for the first MB line, too.
7701 if (s->first_field) {
7702 /* Wait for second field. */
7706 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7707 /* Derive top_field_first from field pocs. */
7708 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7710 //FIXME do something with unavailable reference frames
7712 #if 0 //decode order
7713 *data_size = sizeof(AVFrame);
7715 /* Sort B-frames into display order */
7717 if(h->sps.bitstream_restriction_flag
7718 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7719 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7724 while(h->delayed_pic[pics]) pics++;
7726 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7728 h->delayed_pic[pics++] = cur;
7729 if(cur->reference == 0)
7730 cur->reference = DELAYED_PIC_REF;
7733 for(i=0; h->delayed_pic[i]; i++)
7734 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7737 out = h->delayed_pic[0];
7739 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7740 if(h->delayed_pic[i]->poc < out->poc){
7741 out = h->delayed_pic[i];
7745 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7746 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7748 else if(prev && pics <= s->avctx->has_b_frames)
7750 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7752 ((!cross_idr && prev && out->poc > prev->poc + 2)
7753 || cur->pict_type == B_TYPE)))
7756 s->avctx->has_b_frames++;
7759 else if(out_of_order)
7762 if(out_of_order || pics > s->avctx->has_b_frames){
7763 for(i=out_idx; h->delayed_pic[i]; i++)
7764 h->delayed_pic[i] = h->delayed_pic[i+1];
7770 *data_size = sizeof(AVFrame);
7771 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7772 prev->reference = 0;
7773 h->delayed_output_pic = out;
7777 *pict= *(AVFrame*)out;
7779 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7783 assert(pict->data[0] || !*data_size);
7784 ff_print_debug_info(s, pict);
7785 //printf("out %d\n", (int)pict->data[0]);
7788 /* Return the Picture timestamp as the frame number */
7789 /* we subtract 1 because it is added on utils.c */
7790 avctx->frame_number = s->picture_number - 1;
7792 return get_consumed_bytes(s, buf_index, buf_size);
7795 static inline void fill_mb_avail(H264Context *h){
7796 MpegEncContext * const s = &h->s;
7797 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7800 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7801 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7802 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7808 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7809 h->mb_avail[4]= 1; //FIXME move out
7810 h->mb_avail[5]= 0; //FIXME move out
7818 #define SIZE (COUNT*40)
7824 // int int_temp[10000];
7826 AVCodecContext avctx;
7828 dsputil_init(&dsp, &avctx);
7830 init_put_bits(&pb, temp, SIZE);
7831 printf("testing unsigned exp golomb\n");
7832 for(i=0; i<COUNT; i++){
7834 set_ue_golomb(&pb, i);
7835 STOP_TIMER("set_ue_golomb");
7837 flush_put_bits(&pb);
7839 init_get_bits(&gb, temp, 8*SIZE);
7840 for(i=0; i<COUNT; i++){
7843 s= show_bits(&gb, 24);
7846 j= get_ue_golomb(&gb);
7848 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7851 STOP_TIMER("get_ue_golomb");
7855 init_put_bits(&pb, temp, SIZE);
7856 printf("testing signed exp golomb\n");
7857 for(i=0; i<COUNT; i++){
7859 set_se_golomb(&pb, i - COUNT/2);
7860 STOP_TIMER("set_se_golomb");
7862 flush_put_bits(&pb);
7864 init_get_bits(&gb, temp, 8*SIZE);
7865 for(i=0; i<COUNT; i++){
7868 s= show_bits(&gb, 24);
7871 j= get_se_golomb(&gb);
7872 if(j != i - COUNT/2){
7873 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7876 STOP_TIMER("get_se_golomb");
7880 printf("testing 4x4 (I)DCT\n");
7883 uint8_t src[16], ref[16];
7884 uint64_t error= 0, max_error=0;
7886 for(i=0; i<COUNT; i++){
7888 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7889 for(j=0; j<16; j++){
7890 ref[j]= random()%255;
7891 src[j]= random()%255;
7894 h264_diff_dct_c(block, src, ref, 4);
7897 for(j=0; j<16; j++){
7898 // printf("%d ", block[j]);
7899 block[j]= block[j]*4;
7900 if(j&1) block[j]= (block[j]*4 + 2)/5;
7901 if(j&4) block[j]= (block[j]*4 + 2)/5;
7905 s->dsp.h264_idct_add(ref, block, 4);
7906 /* for(j=0; j<16; j++){
7907 printf("%d ", ref[j]);
7911 for(j=0; j<16; j++){
7912 int diff= FFABS(src[j] - ref[j]);
7915 max_error= FFMAX(max_error, diff);
7918 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7919 printf("testing quantizer\n");
7920 for(qp=0; qp<52; qp++){
7922 src1_block[i]= src2_block[i]= random()%255;
7925 printf("Testing NAL layer\n");
7927 uint8_t bitstream[COUNT];
7928 uint8_t nal[COUNT*2];
7930 memset(&h, 0, sizeof(H264Context));
7932 for(i=0; i<COUNT; i++){
7940 for(j=0; j<COUNT; j++){
7941 bitstream[j]= (random() % 255) + 1;
7944 for(j=0; j<zeros; j++){
7945 int pos= random() % COUNT;
7946 while(bitstream[pos] == 0){
7955 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7957 printf("encoding failed\n");
7961 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7965 if(out_length != COUNT){
7966 printf("incorrect length %d %d\n", out_length, COUNT);
7970 if(consumed != nal_length){
7971 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7975 if(memcmp(bitstream, out, COUNT)){
7976 printf("mismatch\n");
7982 printf("Testing RBSP\n");
7990 static int decode_end(AVCodecContext *avctx)
7992 H264Context *h = avctx->priv_data;
7993 MpegEncContext *s = &h->s;
7995 av_freep(&h->rbsp_buffer[0]);
7996 av_freep(&h->rbsp_buffer[1]);
7997 free_tables(h); //FIXME cleanup init stuff perhaps
8000 // memset(h, 0, sizeof(H264Context));
8006 AVCodec h264_decoder = {
8010 sizeof(H264Context),
8015 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,