2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
573 if(!(h->top_samples_available&0x8000)){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if(!(h->left_samples_available&0x8000)){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
667 // store all luma nnzs, for deblocking
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
785 }else if(top_ref==ref){
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
891 pred_motion(h, 0, 4, 0, 0, mx, my);
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
984 *mb_type |= MB_TYPE_DIRECT2;
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1005 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1007 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1013 if(ref[0] < 0 && ref[1] < 0){
1014 ref[0] = ref[1] = 0;
1015 mv[0][0] = mv[0][1] =
1016 mv[1][0] = mv[1][1] = 0;
1018 for(list=0; list<2; list++){
1020 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1022 mv[list][0] = mv[list][1] = 0;
1028 *mb_type &= ~MB_TYPE_L1;
1029 sub_mb_type &= ~MB_TYPE_L1;
1030 }else if(ref[0] < 0){
1032 *mb_type &= ~MB_TYPE_L0;
1033 sub_mb_type &= ~MB_TYPE_L0;
1036 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1037 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1038 int mb_types_col[2];
1039 int b8_stride = h->b8_stride;
1040 int b4_stride = h->b_stride;
1042 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1044 if(IS_INTERLACED(*mb_type)){
1045 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1046 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1048 l1ref0 -= 2*b8_stride;
1049 l1ref1 -= 2*b8_stride;
1050 l1mv0 -= 4*b4_stride;
1051 l1mv1 -= 4*b4_stride;
1056 int cur_poc = s->current_picture_ptr->poc;
1057 int *col_poc = h->ref_list[1]->field_poc;
1058 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1059 int dy = 2*col_parity - (s->mb_y&1);
1061 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1062 l1ref0 += dy*b8_stride;
1063 l1ref1 += dy*b8_stride;
1064 l1mv0 += 2*dy*b4_stride;
1065 l1mv1 += 2*dy*b4_stride;
1069 for(i8=0; i8<4; i8++){
1072 int xy8 = x8+y8*b8_stride;
1073 int xy4 = 3*x8+y8*b4_stride;
1076 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1078 h->sub_mb_type[i8] = sub_mb_type;
1080 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1081 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1082 if(!IS_INTRA(mb_types_col[y8])
1083 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1084 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1086 a= pack16to32(mv[0][0],mv[0][1]);
1088 b= pack16to32(mv[1][0],mv[1][1]);
1090 a= pack16to32(mv[0][0],mv[0][1]);
1091 b= pack16to32(mv[1][0],mv[1][1]);
1093 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1094 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1096 }else if(IS_16X16(*mb_type)){
1099 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1100 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1101 if(!IS_INTRA(mb_type_col)
1102 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1103 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1104 && (h->x264_build>33 || !h->x264_build)))){
1106 a= pack16to32(mv[0][0],mv[0][1]);
1108 b= pack16to32(mv[1][0],mv[1][1]);
1110 a= pack16to32(mv[0][0],mv[0][1]);
1111 b= pack16to32(mv[1][0],mv[1][1]);
1113 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1114 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1116 for(i8=0; i8<4; i8++){
1117 const int x8 = i8&1;
1118 const int y8 = i8>>1;
1120 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1122 h->sub_mb_type[i8] = sub_mb_type;
1124 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1126 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1127 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1130 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1131 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1132 && (h->x264_build>33 || !h->x264_build)))){
1133 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1134 if(IS_SUB_8X8(sub_mb_type)){
1135 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1136 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1138 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1140 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1143 for(i4=0; i4<4; i4++){
1144 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1145 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1147 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1149 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1155 }else{ /* direct temporal mv pred */
1156 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1157 const int *dist_scale_factor = h->dist_scale_factor;
1160 if(IS_INTERLACED(*mb_type)){
1161 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1162 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1163 dist_scale_factor = h->dist_scale_factor_field;
1165 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1166 /* FIXME assumes direct_8x8_inference == 1 */
1167 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1168 int mb_types_col[2];
1171 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1172 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1173 | (*mb_type & MB_TYPE_INTERLACED);
1174 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1176 if(IS_INTERLACED(*mb_type)){
1177 /* frame to field scaling */
1178 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1179 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1181 l1ref0 -= 2*h->b8_stride;
1182 l1ref1 -= 2*h->b8_stride;
1183 l1mv0 -= 4*h->b_stride;
1184 l1mv1 -= 4*h->b_stride;
1188 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1189 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1191 *mb_type |= MB_TYPE_16x8;
1193 *mb_type |= MB_TYPE_8x8;
1195 /* field to frame scaling */
1196 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1197 * but in MBAFF, top and bottom POC are equal */
1198 int dy = (s->mb_y&1) ? 1 : 2;
1200 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1201 l1ref0 += dy*h->b8_stride;
1202 l1ref1 += dy*h->b8_stride;
1203 l1mv0 += 2*dy*h->b_stride;
1204 l1mv1 += 2*dy*h->b_stride;
1207 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1209 *mb_type |= MB_TYPE_16x16;
1211 *mb_type |= MB_TYPE_8x8;
1214 for(i8=0; i8<4; i8++){
1215 const int x8 = i8&1;
1216 const int y8 = i8>>1;
1218 const int16_t (*l1mv)[2]= l1mv0;
1220 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1222 h->sub_mb_type[i8] = sub_mb_type;
1224 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1225 if(IS_INTRA(mb_types_col[y8])){
1226 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1227 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1232 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1234 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1236 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1239 scale = dist_scale_factor[ref0];
1240 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1243 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1244 int my_col = (mv_col[1]<<y_shift)/2;
1245 int mx = (scale * mv_col[0] + 128) >> 8;
1246 int my = (scale * my_col + 128) >> 8;
1247 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1248 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1255 /* one-to-one mv scaling */
1257 if(IS_16X16(*mb_type)){
1260 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1261 if(IS_INTRA(mb_type_col)){
1264 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1265 : map_col_to_list0[1][l1ref1[0]];
1266 const int scale = dist_scale_factor[ref0];
1267 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1269 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1270 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1272 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1273 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1275 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1276 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1277 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1279 for(i8=0; i8<4; i8++){
1280 const int x8 = i8&1;
1281 const int y8 = i8>>1;
1283 const int16_t (*l1mv)[2]= l1mv0;
1285 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1287 h->sub_mb_type[i8] = sub_mb_type;
1288 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1289 if(IS_INTRA(mb_type_col)){
1290 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1291 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1296 ref0 = l1ref0[x8 + y8*h->b8_stride];
1298 ref0 = map_col_to_list0[0][ref0];
1300 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1303 scale = dist_scale_factor[ref0];
1305 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1306 if(IS_SUB_8X8(sub_mb_type)){
1307 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1308 int mx = (scale * mv_col[0] + 128) >> 8;
1309 int my = (scale * mv_col[1] + 128) >> 8;
1310 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1311 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1313 for(i4=0; i4<4; i4++){
1314 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1315 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1316 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1317 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1318 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1319 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1326 static inline void write_back_motion(H264Context *h, int mb_type){
1327 MpegEncContext * const s = &h->s;
1328 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1329 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1332 if(!USES_LIST(mb_type, 0))
1333 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1335 for(list=0; list<h->list_count; list++){
1337 if(!USES_LIST(mb_type, list))
1341 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1344 if( h->pps.cabac ) {
1345 if(IS_SKIP(mb_type))
1346 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1349 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1350 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1355 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1356 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1357 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1358 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1359 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1363 if(h->slice_type == FF_B_TYPE && h->pps.cabac){
1364 if(IS_8X8(mb_type)){
1365 uint8_t *direct_table = &h->direct_table[b8_xy];
1366 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1367 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1368 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1374 * Decodes a network abstraction layer unit.
1375 * @param consumed is the number of bytes used as input
1376 * @param length is the length of the array
1377 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1378 * @returns decoded bytes, might be src+1 if no escapes
1380 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1385 // src[0]&0x80; //forbidden bit
1386 h->nal_ref_idc= src[0]>>5;
1387 h->nal_unit_type= src[0]&0x1F;
1391 for(i=0; i<length; i++)
1392 printf("%2X ", src[i]);
1394 for(i=0; i+1<length; i+=2){
1395 if(src[i]) continue;
1396 if(i>0 && src[i-1]==0) i--;
1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1399 /* startcode, so we must be past the end */
1406 if(i>=length-1){ //no escaped 0
1407 *dst_length= length;
1408 *consumed= length+1; //+1 for the header
1412 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1413 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1414 dst= h->rbsp_buffer[bufidx];
1420 //printf("decoding esc\n");
1423 //remove escapes (very rare 1:2^22)
1424 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1425 if(src[si+2]==3){ //escape
1430 }else //next start code
1434 dst[di++]= src[si++];
1438 *consumed= si + 1;//+1 for the header
1439 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1444 * identifies the exact end of the bitstream
1445 * @return the length of the trailing, or 0 if damaged
1447 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1451 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1461 * idct tranforms the 16 dc values and dequantize them.
1462 * @param qp quantization parameter
1464 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1467 int temp[16]; //FIXME check if this is a good idea
1468 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1469 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1471 //memset(block, 64, 2*256);
1474 const int offset= y_offset[i];
1475 const int z0= block[offset+stride*0] + block[offset+stride*4];
1476 const int z1= block[offset+stride*0] - block[offset+stride*4];
1477 const int z2= block[offset+stride*1] - block[offset+stride*5];
1478 const int z3= block[offset+stride*1] + block[offset+stride*5];
1487 const int offset= x_offset[i];
1488 const int z0= temp[4*0+i] + temp[4*2+i];
1489 const int z1= temp[4*0+i] - temp[4*2+i];
1490 const int z2= temp[4*1+i] - temp[4*3+i];
1491 const int z3= temp[4*1+i] + temp[4*3+i];
1493 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1494 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1495 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1496 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1502 * dct tranforms the 16 dc values.
1503 * @param qp quantization parameter ??? FIXME
1505 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1506 // const int qmul= dequant_coeff[qp][0];
1508 int temp[16]; //FIXME check if this is a good idea
1509 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1510 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1513 const int offset= y_offset[i];
1514 const int z0= block[offset+stride*0] + block[offset+stride*4];
1515 const int z1= block[offset+stride*0] - block[offset+stride*4];
1516 const int z2= block[offset+stride*1] - block[offset+stride*5];
1517 const int z3= block[offset+stride*1] + block[offset+stride*5];
1526 const int offset= x_offset[i];
1527 const int z0= temp[4*0+i] + temp[4*2+i];
1528 const int z1= temp[4*0+i] - temp[4*2+i];
1529 const int z2= temp[4*1+i] - temp[4*3+i];
1530 const int z3= temp[4*1+i] + temp[4*3+i];
1532 block[stride*0 +offset]= (z0 + z3)>>1;
1533 block[stride*2 +offset]= (z1 + z2)>>1;
1534 block[stride*8 +offset]= (z1 - z2)>>1;
1535 block[stride*10+offset]= (z0 - z3)>>1;
1543 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1544 const int stride= 16*2;
1545 const int xStride= 16;
1548 a= block[stride*0 + xStride*0];
1549 b= block[stride*0 + xStride*1];
1550 c= block[stride*1 + xStride*0];
1551 d= block[stride*1 + xStride*1];
1558 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1559 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1560 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1561 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1565 static void chroma_dc_dct_c(DCTELEM *block){
1566 const int stride= 16*2;
1567 const int xStride= 16;
1570 a= block[stride*0 + xStride*0];
1571 b= block[stride*0 + xStride*1];
1572 c= block[stride*1 + xStride*0];
1573 d= block[stride*1 + xStride*1];
1580 block[stride*0 + xStride*0]= (a+c);
1581 block[stride*0 + xStride*1]= (e+b);
1582 block[stride*1 + xStride*0]= (a-c);
1583 block[stride*1 + xStride*1]= (e-b);
1588 * gets the chroma qp.
1590 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1591 return h->pps.chroma_qp_table[t][qscale & 0xff];
1594 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1595 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1596 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1598 const int * const quant_table= quant_coeff[qscale];
1599 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1600 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1601 const unsigned int threshold2= (threshold1<<1);
1607 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1608 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1609 const unsigned int dc_threshold2= (dc_threshold1<<1);
1611 int level= block[0]*quant_coeff[qscale+18][0];
1612 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1614 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1617 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1620 // last_non_zero = i;
1625 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1626 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1627 const unsigned int dc_threshold2= (dc_threshold1<<1);
1629 int level= block[0]*quant_table[0];
1630 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1632 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1635 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1638 // last_non_zero = i;
1651 const int j= scantable[i];
1652 int level= block[j]*quant_table[j];
1654 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1655 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1656 if(((unsigned)(level+threshold1))>threshold2){
1658 level= (bias + level)>>QUANT_SHIFT;
1661 level= (bias - level)>>QUANT_SHIFT;
1670 return last_non_zero;
1673 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1674 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1675 int src_x_offset, int src_y_offset,
1676 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1677 MpegEncContext * const s = &h->s;
1678 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1679 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1680 const int luma_xy= (mx&3) + ((my&3)<<2);
1681 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1682 uint8_t * src_cb, * src_cr;
1683 int extra_width= h->emu_edge_width;
1684 int extra_height= h->emu_edge_height;
1686 const int full_mx= mx>>2;
1687 const int full_my= my>>2;
1688 const int pic_width = 16*s->mb_width;
1689 const int pic_height = 16*s->mb_height >> MB_FIELD;
1691 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1694 if(mx&7) extra_width -= 3;
1695 if(my&7) extra_height -= 3;
1697 if( full_mx < 0-extra_width
1698 || full_my < 0-extra_height
1699 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1700 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1701 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1702 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1706 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1708 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1711 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1714 // chroma offset when predicting from a field of opposite parity
1715 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1716 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1718 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1719 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1722 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1723 src_cb= s->edge_emu_buffer;
1725 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1728 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1729 src_cr= s->edge_emu_buffer;
1731 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1734 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1735 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1736 int x_offset, int y_offset,
1737 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1738 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1739 int list0, int list1){
1740 MpegEncContext * const s = &h->s;
1741 qpel_mc_func *qpix_op= qpix_put;
1742 h264_chroma_mc_func chroma_op= chroma_put;
1744 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1745 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1746 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1747 x_offset += 8*s->mb_x;
1748 y_offset += 8*(s->mb_y >> MB_FIELD);
1751 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1752 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1753 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1754 qpix_op, chroma_op);
1757 chroma_op= chroma_avg;
1761 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1762 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1763 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1764 qpix_op, chroma_op);
1768 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1769 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1770 int x_offset, int y_offset,
1771 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1772 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1773 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1774 int list0, int list1){
1775 MpegEncContext * const s = &h->s;
1777 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1778 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1779 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1780 x_offset += 8*s->mb_x;
1781 y_offset += 8*(s->mb_y >> MB_FIELD);
1784 /* don't optimize for luma-only case, since B-frames usually
1785 * use implicit weights => chroma too. */
1786 uint8_t *tmp_cb = s->obmc_scratchpad;
1787 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1788 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1789 int refn0 = h->ref_cache[0][ scan8[n] ];
1790 int refn1 = h->ref_cache[1][ scan8[n] ];
1792 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1793 dest_y, dest_cb, dest_cr,
1794 x_offset, y_offset, qpix_put, chroma_put);
1795 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1796 tmp_y, tmp_cb, tmp_cr,
1797 x_offset, y_offset, qpix_put, chroma_put);
1799 if(h->use_weight == 2){
1800 int weight0 = h->implicit_weight[refn0][refn1];
1801 int weight1 = 64 - weight0;
1802 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1803 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1804 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1806 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1807 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1808 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1809 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1810 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1811 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1812 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1813 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1814 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1817 int list = list1 ? 1 : 0;
1818 int refn = h->ref_cache[list][ scan8[n] ];
1819 Picture *ref= &h->ref_list[list][refn];
1820 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1821 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1822 qpix_put, chroma_put);
1824 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1825 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1826 if(h->use_weight_chroma){
1827 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1828 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1829 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1835 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1836 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1837 int x_offset, int y_offset,
1838 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1839 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1840 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1841 int list0, int list1){
1842 if((h->use_weight==2 && list0 && list1
1843 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1844 || h->use_weight==1)
1845 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put,
1847 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1849 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1850 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1853 static inline void prefetch_motion(H264Context *h, int list){
1854 /* fetch pixels for estimated mv 4 macroblocks ahead
1855 * optimized for 64byte cache lines */
1856 MpegEncContext * const s = &h->s;
1857 const int refn = h->ref_cache[list][scan8[0]];
1859 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1860 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1861 uint8_t **src= h->ref_list[list][refn].data;
1862 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1863 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1864 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1865 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1869 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1870 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1871 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1872 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1873 MpegEncContext * const s = &h->s;
1874 const int mb_xy= h->mb_xy;
1875 const int mb_type= s->current_picture.mb_type[mb_xy];
1877 assert(IS_INTER(mb_type));
1879 prefetch_motion(h, 0);
1881 if(IS_16X16(mb_type)){
1882 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1883 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1884 &weight_op[0], &weight_avg[0],
1885 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1886 }else if(IS_16X8(mb_type)){
1887 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1891 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1892 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1893 &weight_op[1], &weight_avg[1],
1894 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1895 }else if(IS_8X16(mb_type)){
1896 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1900 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1901 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1902 &weight_op[2], &weight_avg[2],
1903 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1907 assert(IS_8X8(mb_type));
1910 const int sub_mb_type= h->sub_mb_type[i];
1912 int x_offset= (i&1)<<2;
1913 int y_offset= (i&2)<<1;
1915 if(IS_SUB_8X8(sub_mb_type)){
1916 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1917 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1918 &weight_op[3], &weight_avg[3],
1919 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1920 }else if(IS_SUB_8X4(sub_mb_type)){
1921 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1926 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1927 &weight_op[4], &weight_avg[4],
1928 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1929 }else if(IS_SUB_4X8(sub_mb_type)){
1930 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1934 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1935 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1936 &weight_op[5], &weight_avg[5],
1937 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1940 assert(IS_SUB_4X4(sub_mb_type));
1942 int sub_x_offset= x_offset + 2*(j&1);
1943 int sub_y_offset= y_offset + (j&2);
1944 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1945 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1946 &weight_op[6], &weight_avg[6],
1947 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1953 prefetch_motion(h, 1);
1956 static av_cold void decode_init_vlc(void){
1957 static int done = 0;
1963 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1964 &chroma_dc_coeff_token_len [0], 1, 1,
1965 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1968 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1969 &coeff_token_len [i][0], 1, 1,
1970 &coeff_token_bits[i][0], 1, 1, 1);
1974 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1975 &chroma_dc_total_zeros_len [i][0], 1, 1,
1976 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1978 for(i=0; i<15; i++){
1979 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1980 &total_zeros_len [i][0], 1, 1,
1981 &total_zeros_bits[i][0], 1, 1, 1);
1985 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1986 &run_len [i][0], 1, 1,
1987 &run_bits[i][0], 1, 1, 1);
1989 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1990 &run_len [6][0], 1, 1,
1991 &run_bits[6][0], 1, 1, 1);
1995 static void free_tables(H264Context *h){
1998 av_freep(&h->intra4x4_pred_mode);
1999 av_freep(&h->chroma_pred_mode_table);
2000 av_freep(&h->cbp_table);
2001 av_freep(&h->mvd_table[0]);
2002 av_freep(&h->mvd_table[1]);
2003 av_freep(&h->direct_table);
2004 av_freep(&h->non_zero_count);
2005 av_freep(&h->slice_table_base);
2006 h->slice_table= NULL;
2008 av_freep(&h->mb2b_xy);
2009 av_freep(&h->mb2b8_xy);
2011 for(i = 0; i < MAX_SPS_COUNT; i++)
2012 av_freep(h->sps_buffers + i);
2014 for(i = 0; i < MAX_PPS_COUNT; i++)
2015 av_freep(h->pps_buffers + i);
2017 for(i = 0; i < h->s.avctx->thread_count; i++) {
2018 hx = h->thread_context[i];
2020 av_freep(&hx->top_borders[1]);
2021 av_freep(&hx->top_borders[0]);
2022 av_freep(&hx->s.obmc_scratchpad);
2026 static void init_dequant8_coeff_table(H264Context *h){
2028 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2029 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2030 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2032 for(i=0; i<2; i++ ){
2033 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2034 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2038 for(q=0; q<52; q++){
2039 int shift = ff_div6[q];
2040 int idx = ff_rem6[q];
2042 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2043 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2044 h->pps.scaling_matrix8[i][x]) << shift;
2049 static void init_dequant4_coeff_table(H264Context *h){
2051 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2052 for(i=0; i<6; i++ ){
2053 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2055 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2056 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2063 for(q=0; q<52; q++){
2064 int shift = ff_div6[q] + 2;
2065 int idx = ff_rem6[q];
2067 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2068 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2069 h->pps.scaling_matrix4[i][x]) << shift;
2074 static void init_dequant_tables(H264Context *h){
2076 init_dequant4_coeff_table(h);
2077 if(h->pps.transform_8x8_mode)
2078 init_dequant8_coeff_table(h);
2079 if(h->sps.transform_bypass){
2082 h->dequant4_coeff[i][0][x] = 1<<6;
2083 if(h->pps.transform_8x8_mode)
2086 h->dequant8_coeff[i][0][x] = 1<<6;
2093 * needs width/height
2095 static int alloc_tables(H264Context *h){
2096 MpegEncContext * const s = &h->s;
2097 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2100 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2102 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2106 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2107 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2108 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2109 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2111 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2112 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2114 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2115 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2116 for(y=0; y<s->mb_height; y++){
2117 for(x=0; x<s->mb_width; x++){
2118 const int mb_xy= x + y*s->mb_stride;
2119 const int b_xy = 4*x + 4*y*h->b_stride;
2120 const int b8_xy= 2*x + 2*y*h->b8_stride;
2122 h->mb2b_xy [mb_xy]= b_xy;
2123 h->mb2b8_xy[mb_xy]= b8_xy;
2127 s->obmc_scratchpad = NULL;
2129 if(!h->dequant4_coeff[0])
2130 init_dequant_tables(h);
2139 * Mimic alloc_tables(), but for every context thread.
2141 static void clone_tables(H264Context *dst, H264Context *src){
2142 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2143 dst->non_zero_count = src->non_zero_count;
2144 dst->slice_table = src->slice_table;
2145 dst->cbp_table = src->cbp_table;
2146 dst->mb2b_xy = src->mb2b_xy;
2147 dst->mb2b8_xy = src->mb2b8_xy;
2148 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2149 dst->mvd_table[0] = src->mvd_table[0];
2150 dst->mvd_table[1] = src->mvd_table[1];
2151 dst->direct_table = src->direct_table;
2153 dst->s.obmc_scratchpad = NULL;
2154 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2159 * Allocate buffers which are not shared amongst multiple threads.
2161 static int context_init(H264Context *h){
2162 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2167 return -1; // free_tables will clean up for us
2170 static av_cold void common_init(H264Context *h){
2171 MpegEncContext * const s = &h->s;
2173 s->width = s->avctx->width;
2174 s->height = s->avctx->height;
2175 s->codec_id= s->avctx->codec->id;
2177 ff_h264_pred_init(&h->hpc, s->codec_id);
2179 h->dequant_coeff_pps= -1;
2180 s->unrestricted_mv=1;
2181 s->decode=1; //FIXME
2183 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2184 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2187 static av_cold int decode_init(AVCodecContext *avctx){
2188 H264Context *h= avctx->priv_data;
2189 MpegEncContext * const s = &h->s;
2191 MPV_decode_defaults(s);
2196 s->out_format = FMT_H264;
2197 s->workaround_bugs= avctx->workaround_bugs;
2200 // s->decode_mb= ff_h263_decode_mb;
2201 s->quarter_sample = 1;
2203 avctx->pix_fmt= PIX_FMT_YUV420P;
2207 if(avctx->extradata_size > 0 && avctx->extradata &&
2208 *(char *)avctx->extradata == 1){
2215 h->thread_context[0] = h;
2219 static int frame_start(H264Context *h){
2220 MpegEncContext * const s = &h->s;
2223 if(MPV_frame_start(s, s->avctx) < 0)
2225 ff_er_frame_start(s);
2227 * MPV_frame_start uses pict_type to derive key_frame.
2228 * This is incorrect for H.264; IDR markings must be used.
2229 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2230 * See decode_nal_units().
2232 s->current_picture_ptr->key_frame= 0;
2234 assert(s->linesize && s->uvlinesize);
2236 for(i=0; i<16; i++){
2237 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[16+i]=
2242 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2243 h->block_offset[24+16+i]=
2244 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 /* can't be in alloc_tables because linesize isn't known there.
2248 * FIXME: redo bipred weight to not require extra buffer? */
2249 for(i = 0; i < s->avctx->thread_count; i++)
2250 if(!h->thread_context[i]->s.obmc_scratchpad)
2251 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2253 /* some macroblocks will be accessed before they're available */
2254 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2255 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2257 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2261 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2262 MpegEncContext * const s = &h->s;
2266 src_cb -= uvlinesize;
2267 src_cr -= uvlinesize;
2269 // There are two lines saved, the line above the the top macroblock of a pair,
2270 // and the line above the bottom macroblock
2271 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2272 for(i=1; i<17; i++){
2273 h->left_border[i]= src_y[15+i* linesize];
2276 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2277 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2279 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2280 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2281 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2283 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2284 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2287 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2291 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2292 MpegEncContext * const s = &h->s;
2299 if(h->deblocking_filter == 2) {
2301 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2302 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2304 deblock_left = (s->mb_x > 0);
2305 deblock_top = (s->mb_y > 0);
2308 src_y -= linesize + 1;
2309 src_cb -= uvlinesize + 1;
2310 src_cr -= uvlinesize + 1;
2312 #define XCHG(a,b,t,xchg)\
2319 for(i = !deblock_top; i<17; i++){
2320 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2325 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2326 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2327 if(s->mb_x+1 < s->mb_width){
2328 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2332 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2334 for(i = !deblock_top; i<9; i++){
2335 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2336 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2340 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2341 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2346 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2347 MpegEncContext * const s = &h->s;
2350 src_y -= 2 * linesize;
2351 src_cb -= 2 * uvlinesize;
2352 src_cr -= 2 * uvlinesize;
2354 // There are two lines saved, the line above the the top macroblock of a pair,
2355 // and the line above the bottom macroblock
2356 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2357 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2358 for(i=2; i<34; i++){
2359 h->left_border[i]= src_y[15+i* linesize];
2362 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2363 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2364 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2365 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2367 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2368 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2369 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2370 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2371 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2372 for(i=2; i<18; i++){
2373 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2374 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2376 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2377 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2378 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2379 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2383 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2384 MpegEncContext * const s = &h->s;
2387 int deblock_left = (s->mb_x > 0);
2388 int deblock_top = (s->mb_y > 1);
2390 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2392 src_y -= 2 * linesize + 1;
2393 src_cb -= 2 * uvlinesize + 1;
2394 src_cr -= 2 * uvlinesize + 1;
2396 #define XCHG(a,b,t,xchg)\
2403 for(i = (!deblock_top)<<1; i<34; i++){
2404 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2409 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2410 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2411 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2412 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2413 if(s->mb_x+1 < s->mb_width){
2414 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2415 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2419 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2421 for(i = (!deblock_top) << 1; i<18; i++){
2422 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2423 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2427 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2428 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2429 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2430 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2435 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2436 MpegEncContext * const s = &h->s;
2437 const int mb_x= s->mb_x;
2438 const int mb_y= s->mb_y;
2439 const int mb_xy= h->mb_xy;
2440 const int mb_type= s->current_picture.mb_type[mb_xy];
2441 uint8_t *dest_y, *dest_cb, *dest_cr;
2442 int linesize, uvlinesize /*dct_offset*/;
2444 int *block_offset = &h->block_offset[0];
2445 const unsigned int bottom = mb_y & 1;
2446 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2447 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2448 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2450 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2451 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2452 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2454 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2455 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2457 if (!simple && MB_FIELD) {
2458 linesize = h->mb_linesize = s->linesize * 2;
2459 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2460 block_offset = &h->block_offset[24];
2461 if(mb_y&1){ //FIXME move out of this func?
2462 dest_y -= s->linesize*15;
2463 dest_cb-= s->uvlinesize*7;
2464 dest_cr-= s->uvlinesize*7;
2468 for(list=0; list<h->list_count; list++){
2469 if(!USES_LIST(mb_type, list))
2471 if(IS_16X16(mb_type)){
2472 int8_t *ref = &h->ref_cache[list][scan8[0]];
2473 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2475 for(i=0; i<16; i+=4){
2476 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2477 int ref = h->ref_cache[list][scan8[i]];
2479 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2485 linesize = h->mb_linesize = s->linesize;
2486 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2487 // dct_offset = s->linesize * 16;
2490 if(transform_bypass){
2492 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2493 }else if(IS_8x8DCT(mb_type)){
2494 idct_dc_add = s->dsp.h264_idct8_dc_add;
2495 idct_add = s->dsp.h264_idct8_add;
2497 idct_dc_add = s->dsp.h264_idct_dc_add;
2498 idct_add = s->dsp.h264_idct_add;
2501 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2502 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2503 int mbt_y = mb_y&~1;
2504 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2505 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2506 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2507 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2510 if (!simple && IS_INTRA_PCM(mb_type)) {
2513 // The pixels are stored in h->mb array in the same order as levels,
2514 // copy them in output in the correct order.
2515 for(i=0; i<16; i++) {
2516 for (y=0; y<4; y++) {
2517 for (x=0; x<4; x++) {
2518 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2522 for(i=16; i<16+4; i++) {
2523 for (y=0; y<4; y++) {
2524 for (x=0; x<4; x++) {
2525 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2529 for(i=20; i<20+4; i++) {
2530 for (y=0; y<4; y++) {
2531 for (x=0; x<4; x++) {
2532 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2537 if(IS_INTRA(mb_type)){
2538 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2539 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2541 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2542 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2543 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2546 if(IS_INTRA4x4(mb_type)){
2547 if(simple || !s->encoding){
2548 if(IS_8x8DCT(mb_type)){
2549 for(i=0; i<16; i+=4){
2550 uint8_t * const ptr= dest_y + block_offset[i];
2551 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2552 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2553 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2554 (h->topright_samples_available<<i)&0x4000, linesize);
2556 if(nnz == 1 && h->mb[i*16])
2557 idct_dc_add(ptr, h->mb + i*16, linesize);
2559 idct_add(ptr, h->mb + i*16, linesize);
2563 for(i=0; i<16; i++){
2564 uint8_t * const ptr= dest_y + block_offset[i];
2566 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2569 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2570 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2571 assert(mb_y || linesize <= block_offset[i]);
2572 if(!topright_avail){
2573 tr= ptr[3 - linesize]*0x01010101;
2574 topright= (uint8_t*) &tr;
2576 topright= ptr + 4 - linesize;
2580 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2581 nnz = h->non_zero_count_cache[ scan8[i] ];
2584 if(nnz == 1 && h->mb[i*16])
2585 idct_dc_add(ptr, h->mb + i*16, linesize);
2587 idct_add(ptr, h->mb + i*16, linesize);
2589 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2594 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2596 if(!transform_bypass)
2597 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2599 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2601 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2602 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2604 hl_motion(h, dest_y, dest_cb, dest_cr,
2605 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2606 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2607 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2611 if(!IS_INTRA4x4(mb_type)){
2613 if(IS_INTRA16x16(mb_type)){
2614 for(i=0; i<16; i++){
2615 if(h->non_zero_count_cache[ scan8[i] ])
2616 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2617 else if(h->mb[i*16])
2618 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2621 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2622 for(i=0; i<16; i+=di){
2623 int nnz = h->non_zero_count_cache[ scan8[i] ];
2625 if(nnz==1 && h->mb[i*16])
2626 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2628 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2633 for(i=0; i<16; i++){
2634 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2635 uint8_t * const ptr= dest_y + block_offset[i];
2636 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2642 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2643 uint8_t *dest[2] = {dest_cb, dest_cr};
2644 if(transform_bypass){
2645 idct_add = idct_dc_add = s->dsp.add_pixels4;
2647 idct_add = s->dsp.h264_idct_add;
2648 idct_dc_add = s->dsp.h264_idct_dc_add;
2649 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2650 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2653 for(i=16; i<16+8; i++){
2654 if(h->non_zero_count_cache[ scan8[i] ])
2655 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2656 else if(h->mb[i*16])
2657 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2660 for(i=16; i<16+8; i++){
2661 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2662 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2663 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2669 if(h->deblocking_filter) {
2670 if (!simple && FRAME_MBAFF) {
2671 //FIXME try deblocking one mb at a time?
2672 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2673 const int mb_y = s->mb_y - 1;
2674 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2675 const int mb_xy= mb_x + mb_y*s->mb_stride;
2676 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2677 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2678 if (!bottom) return;
2679 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2680 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2681 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2683 if(IS_INTRA(mb_type_top | mb_type_bottom))
2684 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2686 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2689 s->mb_y--; h->mb_xy -= s->mb_stride;
2690 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2691 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2692 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2693 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2694 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2696 s->mb_y++; h->mb_xy += s->mb_stride;
2697 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2698 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2699 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2700 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2701 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2703 tprintf(h->s.avctx, "call filter_mb\n");
2704 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2705 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2706 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2712 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2714 static void hl_decode_mb_simple(H264Context *h){
2715 hl_decode_mb_internal(h, 1);
2719 * Process a macroblock; this handles edge cases, such as interlacing.
2721 static void av_noinline hl_decode_mb_complex(H264Context *h){
2722 hl_decode_mb_internal(h, 0);
2725 static void hl_decode_mb(H264Context *h){
2726 MpegEncContext * const s = &h->s;
2727 const int mb_xy= h->mb_xy;
2728 const int mb_type= s->current_picture.mb_type[mb_xy];
2729 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2730 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2732 if(ENABLE_H264_ENCODER && !s->decode)
2736 hl_decode_mb_complex(h);
2737 else hl_decode_mb_simple(h);
2740 static void pic_as_field(Picture *pic, const int parity){
2742 for (i = 0; i < 4; ++i) {
2743 if (parity == PICT_BOTTOM_FIELD)
2744 pic->data[i] += pic->linesize[i];
2745 pic->reference = parity;
2746 pic->linesize[i] *= 2;
2750 static int split_field_copy(Picture *dest, Picture *src,
2751 int parity, int id_add){
2752 int match = !!(src->reference & parity);
2756 pic_as_field(dest, parity);
2758 dest->pic_id += id_add;
2765 * Split one reference list into field parts, interleaving by parity
2766 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2767 * set to look at the actual start of data for that field.
2769 * @param dest output list
2770 * @param dest_len maximum number of fields to put in dest
2771 * @param src the source reference list containing fields and/or field pairs
2772 * (aka short_ref/long_ref, or
2773 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2774 * @param src_len number of Picture's in source (pairs and unmatched fields)
2775 * @param parity the parity of the picture being decoded/needing
2776 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2777 * @return number of fields placed in dest
2779 static int split_field_half_ref_list(Picture *dest, int dest_len,
2780 Picture *src, int src_len, int parity){
2781 int same_parity = 1;
2787 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2788 if (same_parity && same_i < src_len) {
2789 field_output = split_field_copy(dest + out_i, src + same_i,
2791 same_parity = !field_output;
2794 } else if (opp_i < src_len) {
2795 field_output = split_field_copy(dest + out_i, src + opp_i,
2796 PICT_FRAME - parity, 0);
2797 same_parity = field_output;
2809 * Split the reference frame list into a reference field list.
2810 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2811 * The input list contains both reference field pairs and
2812 * unmatched reference fields; it is ordered as spec describes
2813 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2814 * unmatched field pairs are also present. Conceptually this is equivalent
2815 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2817 * @param dest output reference list where ordered fields are to be placed
2818 * @param dest_len max number of fields to place at dest
2819 * @param src source reference list, as described above
2820 * @param src_len number of pictures (pairs and unmatched fields) in src
2821 * @param parity parity of field being currently decoded
2822 * (one of PICT_{TOP,BOTTOM}_FIELD)
2823 * @param long_i index into src array that holds first long reference picture,
2824 * or src_len if no long refs present.
2826 static int split_field_ref_list(Picture *dest, int dest_len,
2827 Picture *src, int src_len,
2828 int parity, int long_i){
2830 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2834 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2835 src_len - long_i, parity);
2840 * fills the default_ref_list.
2842 static int fill_default_ref_list(H264Context *h){
2843 MpegEncContext * const s = &h->s;
2845 int smallest_poc_greater_than_current = -1;
2847 Picture sorted_short_ref[32];
2848 Picture field_entry_list[2][32];
2849 Picture *frame_list[2];
2851 if (FIELD_PICTURE) {
2852 structure_sel = PICT_FRAME;
2853 frame_list[0] = field_entry_list[0];
2854 frame_list[1] = field_entry_list[1];
2857 frame_list[0] = h->default_ref_list[0];
2858 frame_list[1] = h->default_ref_list[1];
2861 if(h->slice_type==FF_B_TYPE){
2868 /* sort frame according to poc in B slice */
2869 for(out_i=0; out_i<h->short_ref_count; out_i++){
2871 int best_poc=INT_MAX;
2873 for(i=0; i<h->short_ref_count; i++){
2874 const int poc= h->short_ref[i]->poc;
2875 if(poc > limit && poc < best_poc){
2881 assert(best_i != INT_MIN);
2884 sorted_short_ref[out_i]= *h->short_ref[best_i];
2885 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2886 if (-1 == smallest_poc_greater_than_current) {
2887 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2888 smallest_poc_greater_than_current = out_i;
2893 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2895 // find the largest poc
2896 for(list=0; list<2; list++){
2899 int step= list ? -1 : 1;
2901 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2903 while(j<0 || j>= h->short_ref_count){
2904 if(j != -99 && step == (list ? -1 : 1))
2907 j= smallest_poc_greater_than_current + (step>>1);
2909 sel = sorted_short_ref[j].reference | structure_sel;
2910 if(sel != PICT_FRAME) continue;
2911 frame_list[list][index ]= sorted_short_ref[j];
2912 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2914 short_len[list] = index;
2916 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2918 if(h->long_ref[i] == NULL) continue;
2919 sel = h->long_ref[i]->reference | structure_sel;
2920 if(sel != PICT_FRAME) continue;
2922 frame_list[ list ][index ]= *h->long_ref[i];
2923 frame_list[ list ][index++].pic_id= i;
2928 for(list=0; list<2; list++){
2930 len[list] = split_field_ref_list(h->default_ref_list[list],
2934 s->picture_structure,
2937 // swap the two first elements of L1 when L0 and L1 are identical
2938 if(list && len[0] > 1 && len[0] == len[1])
2939 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2941 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2945 if(len[list] < h->ref_count[ list ])
2946 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2953 for(i=0; i<h->short_ref_count; i++){
2955 sel = h->short_ref[i]->reference | structure_sel;
2956 if(sel != PICT_FRAME) continue;
2957 frame_list[0][index ]= *h->short_ref[i];
2958 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2961 for(i = 0; i < 16; i++){
2963 if(h->long_ref[i] == NULL) continue;
2964 sel = h->long_ref[i]->reference | structure_sel;
2965 if(sel != PICT_FRAME) continue;
2966 frame_list[0][index ]= *h->long_ref[i];
2967 frame_list[0][index++].pic_id= i;
2971 index = split_field_ref_list(h->default_ref_list[0],
2972 h->ref_count[0], frame_list[0],
2973 index, s->picture_structure,
2976 if(index < h->ref_count[0])
2977 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2980 for (i=0; i<h->ref_count[0]; i++) {
2981 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2983 if(h->slice_type==FF_B_TYPE){
2984 for (i=0; i<h->ref_count[1]; i++) {
2985 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2992 static void print_short_term(H264Context *h);
2993 static void print_long_term(H264Context *h);
2996 * Extract structure information about the picture described by pic_num in
2997 * the current decoding context (frame or field). Note that pic_num is
2998 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2999 * @param pic_num picture number for which to extract structure information
3000 * @param structure one of PICT_XXX describing structure of picture
3002 * @return frame number (short term) or long term index of picture
3003 * described by pic_num
3005 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3006 MpegEncContext * const s = &h->s;
3008 *structure = s->picture_structure;
3011 /* opposite field */
3012 *structure ^= PICT_FRAME;
3019 static int decode_ref_pic_list_reordering(H264Context *h){
3020 MpegEncContext * const s = &h->s;
3021 int list, index, pic_structure;
3023 print_short_term(h);
3025 if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
3027 for(list=0; list<h->list_count; list++){
3028 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3030 if(get_bits1(&s->gb)){
3031 int pred= h->curr_pic_num;
3033 for(index=0; ; index++){
3034 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3035 unsigned int pic_id;
3037 Picture *ref = NULL;
3039 if(reordering_of_pic_nums_idc==3)
3042 if(index >= h->ref_count[list]){
3043 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3047 if(reordering_of_pic_nums_idc<3){
3048 if(reordering_of_pic_nums_idc<2){
3049 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3052 if(abs_diff_pic_num > h->max_pic_num){
3053 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3057 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3058 else pred+= abs_diff_pic_num;
3059 pred &= h->max_pic_num - 1;
3061 frame_num = pic_num_extract(h, pred, &pic_structure);
3063 for(i= h->short_ref_count-1; i>=0; i--){
3064 ref = h->short_ref[i];
3065 assert(ref->reference);
3066 assert(!ref->long_ref);
3067 if(ref->data[0] != NULL &&
3068 ref->frame_num == frame_num &&
3069 (ref->reference & pic_structure) &&
3070 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3077 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3079 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3082 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3085 ref = h->long_ref[long_idx];
3086 assert(!(ref && !ref->reference));
3087 if(ref && (ref->reference & pic_structure)){
3088 ref->pic_id= pic_id;
3089 assert(ref->long_ref);
3097 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3098 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3100 for(i=index; i+1<h->ref_count[list]; i++){
3101 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3104 for(; i > index; i--){
3105 h->ref_list[list][i]= h->ref_list[list][i-1];
3107 h->ref_list[list][index]= *ref;
3109 pic_as_field(&h->ref_list[list][index], pic_structure);
3113 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3119 for(list=0; list<h->list_count; list++){
3120 for(index= 0; index < h->ref_count[list]; index++){
3121 if(!h->ref_list[list][index].data[0])
3122 h->ref_list[list][index]= s->current_picture;
3126 if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
3127 direct_dist_scale_factor(h);
3128 direct_ref_list_init(h);
3132 static void fill_mbaff_ref_list(H264Context *h){
3134 for(list=0; list<2; list++){ //FIXME try list_count
3135 for(i=0; i<h->ref_count[list]; i++){
3136 Picture *frame = &h->ref_list[list][i];
3137 Picture *field = &h->ref_list[list][16+2*i];
3140 field[0].linesize[j] <<= 1;
3141 field[0].reference = PICT_TOP_FIELD;
3142 field[1] = field[0];
3144 field[1].data[j] += frame->linesize[j];
3145 field[1].reference = PICT_BOTTOM_FIELD;
3147 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3148 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3150 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3151 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3155 for(j=0; j<h->ref_count[1]; j++){
3156 for(i=0; i<h->ref_count[0]; i++)
3157 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3158 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3159 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3163 static int pred_weight_table(H264Context *h){
3164 MpegEncContext * const s = &h->s;
3166 int luma_def, chroma_def;
3169 h->use_weight_chroma= 0;
3170 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3171 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3172 luma_def = 1<<h->luma_log2_weight_denom;
3173 chroma_def = 1<<h->chroma_log2_weight_denom;
3175 for(list=0; list<2; list++){
3176 for(i=0; i<h->ref_count[list]; i++){
3177 int luma_weight_flag, chroma_weight_flag;
3179 luma_weight_flag= get_bits1(&s->gb);
3180 if(luma_weight_flag){
3181 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3182 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3183 if( h->luma_weight[list][i] != luma_def
3184 || h->luma_offset[list][i] != 0)
3187 h->luma_weight[list][i]= luma_def;
3188 h->luma_offset[list][i]= 0;
3191 chroma_weight_flag= get_bits1(&s->gb);
3192 if(chroma_weight_flag){
3195 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3196 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3197 if( h->chroma_weight[list][i][j] != chroma_def
3198 || h->chroma_offset[list][i][j] != 0)
3199 h->use_weight_chroma= 1;
3204 h->chroma_weight[list][i][j]= chroma_def;
3205 h->chroma_offset[list][i][j]= 0;
3209 if(h->slice_type != FF_B_TYPE) break;
3211 h->use_weight= h->use_weight || h->use_weight_chroma;
3215 static void implicit_weight_table(H264Context *h){
3216 MpegEncContext * const s = &h->s;
3218 int cur_poc = s->current_picture_ptr->poc;
3220 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3221 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3223 h->use_weight_chroma= 0;
3228 h->use_weight_chroma= 2;
3229 h->luma_log2_weight_denom= 5;
3230 h->chroma_log2_weight_denom= 5;
3232 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3233 int poc0 = h->ref_list[0][ref0].poc;
3234 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3235 int poc1 = h->ref_list[1][ref1].poc;
3236 int td = av_clip(poc1 - poc0, -128, 127);
3238 int tb = av_clip(cur_poc - poc0, -128, 127);
3239 int tx = (16384 + (FFABS(td) >> 1)) / td;
3240 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3241 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3242 h->implicit_weight[ref0][ref1] = 32;
3244 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3246 h->implicit_weight[ref0][ref1] = 32;
3252 * Mark a picture as no longer needed for reference. The refmask
3253 * argument allows unreferencing of individual fields or the whole frame.
3254 * If the picture becomes entirely unreferenced, but is being held for
3255 * display purposes, it is marked as such.
3256 * @param refmask mask of fields to unreference; the mask is bitwise
3257 * anded with the reference marking of pic
3258 * @return non-zero if pic becomes entirely unreferenced (except possibly
3259 * for display purposes) zero if one of the fields remains in
3262 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3264 if (pic->reference &= refmask) {
3267 if(pic == h->delayed_output_pic)
3268 pic->reference=DELAYED_PIC_REF;
3270 for(i = 0; h->delayed_pic[i]; i++)
3271 if(pic == h->delayed_pic[i]){
3272 pic->reference=DELAYED_PIC_REF;
3281 * instantaneous decoder refresh.
3283 static void idr(H264Context *h){
3286 for(i=0; i<16; i++){
3287 if (h->long_ref[i] != NULL) {
3288 unreference_pic(h, h->long_ref[i], 0);
3289 h->long_ref[i]= NULL;
3292 h->long_ref_count=0;
3294 for(i=0; i<h->short_ref_count; i++){
3295 unreference_pic(h, h->short_ref[i], 0);
3296 h->short_ref[i]= NULL;
3298 h->short_ref_count=0;
3301 /* forget old pics after a seek */
3302 static void flush_dpb(AVCodecContext *avctx){
3303 H264Context *h= avctx->priv_data;
3305 for(i=0; i<16; i++) {
3306 if(h->delayed_pic[i])
3307 h->delayed_pic[i]->reference= 0;
3308 h->delayed_pic[i]= NULL;
3310 if(h->delayed_output_pic)
3311 h->delayed_output_pic->reference= 0;
3312 h->delayed_output_pic= NULL;
3314 if(h->s.current_picture_ptr)
3315 h->s.current_picture_ptr->reference= 0;
3316 h->s.first_field= 0;
3317 ff_mpeg_flush(avctx);
3321 * Find a Picture in the short term reference list by frame number.
3322 * @param frame_num frame number to search for
3323 * @param idx the index into h->short_ref where returned picture is found
3324 * undefined if no picture found.
3325 * @return pointer to the found picture, or NULL if no pic with the provided
3326 * frame number is found
3328 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3329 MpegEncContext * const s = &h->s;
3332 for(i=0; i<h->short_ref_count; i++){
3333 Picture *pic= h->short_ref[i];
3334 if(s->avctx->debug&FF_DEBUG_MMCO)
3335 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3336 if(pic->frame_num == frame_num) {
3345 * Remove a picture from the short term reference list by its index in
3346 * that list. This does no checking on the provided index; it is assumed
3347 * to be valid. Other list entries are shifted down.
3348 * @param i index into h->short_ref of picture to remove.
3350 static void remove_short_at_index(H264Context *h, int i){
3351 assert(i > 0 && i < h->short_ref_count);
3352 h->short_ref[i]= NULL;
3353 if (--h->short_ref_count)
3354 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3359 * @return the removed picture or NULL if an error occurs
3361 static Picture * remove_short(H264Context *h, int frame_num){
3362 MpegEncContext * const s = &h->s;
3366 if(s->avctx->debug&FF_DEBUG_MMCO)
3367 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3369 pic = find_short(h, frame_num, &i);
3371 remove_short_at_index(h, i);
3377 * Remove a picture from the long term reference list by its index in
3378 * that list. This does no checking on the provided index; it is assumed
3379 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3380 * @param i index into h->long_ref of picture to remove.
3382 static void remove_long_at_index(H264Context *h, int i){
3383 h->long_ref[i]= NULL;
3384 h->long_ref_count--;
3389 * @return the removed picture or NULL if an error occurs
3391 static Picture * remove_long(H264Context *h, int i){
3394 pic= h->long_ref[i];
3396 remove_long_at_index(h, i);
3402 * print short term list
3404 static void print_short_term(H264Context *h) {
3406 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3407 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3408 for(i=0; i<h->short_ref_count; i++){
3409 Picture *pic= h->short_ref[i];
3410 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3416 * print long term list
3418 static void print_long_term(H264Context *h) {
3420 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3421 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3422 for(i = 0; i < 16; i++){
3423 Picture *pic= h->long_ref[i];
3425 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3432 * Executes the reference picture marking (memory management control operations).
3434 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3435 MpegEncContext * const s = &h->s;
3437 int current_ref_assigned=0;
3440 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3441 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3443 for(i=0; i<mmco_count; i++){
3444 int structure, frame_num, unref_pic;
3445 if(s->avctx->debug&FF_DEBUG_MMCO)
3446 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3448 switch(mmco[i].opcode){
3449 case MMCO_SHORT2UNUSED:
3450 if(s->avctx->debug&FF_DEBUG_MMCO)
3451 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3452 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3453 pic = find_short(h, frame_num, &j);
3455 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3456 remove_short_at_index(h, j);
3457 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3458 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3460 case MMCO_SHORT2LONG:
3461 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3462 h->long_ref[mmco[i].long_arg]->frame_num ==
3463 mmco[i].short_pic_num / 2) {
3464 /* do nothing, we've already moved this field pair. */
3466 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3468 pic= remove_long(h, mmco[i].long_arg);
3469 if(pic) unreference_pic(h, pic, 0);
3471 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3472 if (h->long_ref[ mmco[i].long_arg ]){
3473 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3474 h->long_ref_count++;
3478 case MMCO_LONG2UNUSED:
3479 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3480 pic = h->long_ref[j];
3482 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3483 remove_long_at_index(h, j);
3484 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3485 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3489 if (FIELD_PICTURE && !s->first_field) {
3490 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3491 /* Just mark second field as referenced */
3493 } else if (s->current_picture_ptr->reference) {
3494 /* First field in pair is in short term list or
3495 * at a different long term index.
3496 * This is not allowed; see 7.4.3, notes 2 and 3.
3497 * Report the problem and keep the pair where it is,
3498 * and mark this field valid.
3500 av_log(h->s.avctx, AV_LOG_ERROR,
3501 "illegal long term reference assignment for second "
3502 "field in complementary field pair (first field is "
3503 "short term or has non-matching long index)\n");
3509 pic= remove_long(h, mmco[i].long_arg);
3510 if(pic) unreference_pic(h, pic, 0);
3512 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3513 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3514 h->long_ref_count++;
3517 s->current_picture_ptr->reference |= s->picture_structure;
3518 current_ref_assigned=1;
3520 case MMCO_SET_MAX_LONG:
3521 assert(mmco[i].long_arg <= 16);
3522 // just remove the long term which index is greater than new max
3523 for(j = mmco[i].long_arg; j<16; j++){
3524 pic = remove_long(h, j);
3525 if (pic) unreference_pic(h, pic, 0);
3529 while(h->short_ref_count){
3530 pic= remove_short(h, h->short_ref[0]->frame_num);
3531 if(pic) unreference_pic(h, pic, 0);
3533 for(j = 0; j < 16; j++) {
3534 pic= remove_long(h, j);
3535 if(pic) unreference_pic(h, pic, 0);
3542 if (!current_ref_assigned && FIELD_PICTURE &&
3543 !s->first_field && s->current_picture_ptr->reference) {
3545 /* Second field of complementary field pair; the first field of
3546 * which is already referenced. If short referenced, it
3547 * should be first entry in short_ref. If not, it must exist
3548 * in long_ref; trying to put it on the short list here is an
3549 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3551 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3552 /* Just mark the second field valid */
3553 s->current_picture_ptr->reference = PICT_FRAME;
3554 } else if (s->current_picture_ptr->long_ref) {
3555 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3556 "assignment for second field "
3557 "in complementary field pair "
3558 "(first field is long term)\n");
3561 * First field in reference, but not in any sensible place on our
3562 * reference lists. This shouldn't happen unless reference
3563 * handling somewhere else is wrong.
3567 current_ref_assigned = 1;
3570 if(!current_ref_assigned){
3571 pic= remove_short(h, s->current_picture_ptr->frame_num);
3573 unreference_pic(h, pic, 0);
3574 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3577 if(h->short_ref_count)
3578 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3580 h->short_ref[0]= s->current_picture_ptr;
3581 h->short_ref[0]->long_ref=0;
3582 h->short_ref_count++;
3583 s->current_picture_ptr->reference |= s->picture_structure;
3586 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3588 /* We have too many reference frames, probably due to corrupted
3589 * stream. Need to discard one frame. Prevents overrun of the
3590 * short_ref and long_ref buffers.
3592 av_log(h->s.avctx, AV_LOG_ERROR,
3593 "number of reference frames exceeds max (probably "
3594 "corrupt input), discarding one\n");
3596 if (h->long_ref_count) {
3597 for (i = 0; i < 16; ++i)
3602 pic = h->long_ref[i];
3603 remove_long_at_index(h, i);
3605 pic = h->short_ref[h->short_ref_count - 1];
3606 remove_short_at_index(h, h->short_ref_count - 1);
3608 unreference_pic(h, pic, 0);
3611 print_short_term(h);
3616 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3617 MpegEncContext * const s = &h->s;
3620 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3621 s->broken_link= get_bits1(gb) -1;
3622 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3623 if(h->mmco[0].long_arg == -1)
3626 h->mmco[0].opcode= MMCO_LONG;
3630 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3631 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3632 MMCOOpcode opcode= get_ue_golomb(gb);
3634 h->mmco[i].opcode= opcode;
3635 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3636 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3637 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3638 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3642 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3643 unsigned int long_arg= get_ue_golomb(gb);
3644 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3645 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3648 h->mmco[i].long_arg= long_arg;
3651 if(opcode > (unsigned)MMCO_LONG){
3652 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3655 if(opcode == MMCO_END)
3660 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3662 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3663 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3664 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3665 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3667 if (FIELD_PICTURE) {
3668 h->mmco[0].short_pic_num *= 2;
3669 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3670 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3681 static int init_poc(H264Context *h){
3682 MpegEncContext * const s = &h->s;
3683 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3686 if(h->nal_unit_type == NAL_IDR_SLICE){
3687 h->frame_num_offset= 0;
3689 if(h->frame_num < h->prev_frame_num)
3690 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3692 h->frame_num_offset= h->prev_frame_num_offset;
3695 if(h->sps.poc_type==0){
3696 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3698 if(h->nal_unit_type == NAL_IDR_SLICE){
3703 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3704 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3705 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3706 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3708 h->poc_msb = h->prev_poc_msb;
3709 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3711 field_poc[1] = h->poc_msb + h->poc_lsb;
3712 if(s->picture_structure == PICT_FRAME)
3713 field_poc[1] += h->delta_poc_bottom;
3714 }else if(h->sps.poc_type==1){
3715 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3718 if(h->sps.poc_cycle_length != 0)
3719 abs_frame_num = h->frame_num_offset + h->frame_num;
3723 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3726 expected_delta_per_poc_cycle = 0;
3727 for(i=0; i < h->sps.poc_cycle_length; i++)
3728 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3730 if(abs_frame_num > 0){
3731 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3732 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3734 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3735 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3736 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3740 if(h->nal_ref_idc == 0)
3741 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3743 field_poc[0] = expectedpoc + h->delta_poc[0];
3744 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3746 if(s->picture_structure == PICT_FRAME)
3747 field_poc[1] += h->delta_poc[1];
3750 if(h->nal_unit_type == NAL_IDR_SLICE){
3753 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3754 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3760 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3761 s->current_picture_ptr->field_poc[0]= field_poc[0];
3762 s->current_picture_ptr->poc = field_poc[0];
3764 if(s->picture_structure != PICT_TOP_FIELD) {
3765 s->current_picture_ptr->field_poc[1]= field_poc[1];
3766 s->current_picture_ptr->poc = field_poc[1];
3768 if(!FIELD_PICTURE || !s->first_field) {
3769 Picture *cur = s->current_picture_ptr;
3770 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3778 * initialize scan tables
3780 static void init_scan_tables(H264Context *h){
3781 MpegEncContext * const s = &h->s;
3783 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3784 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3785 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3787 for(i=0; i<16; i++){
3788 #define T(x) (x>>2) | ((x<<2) & 0xF)
3789 h->zigzag_scan[i] = T(zigzag_scan[i]);
3790 h-> field_scan[i] = T( field_scan[i]);
3794 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3795 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3796 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3797 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3798 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3800 for(i=0; i<64; i++){
3801 #define T(x) (x>>3) | ((x&7)<<3)
3802 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3803 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3804 h->field_scan8x8[i] = T(field_scan8x8[i]);
3805 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3809 if(h->sps.transform_bypass){ //FIXME same ugly
3810 h->zigzag_scan_q0 = zigzag_scan;
3811 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3812 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3813 h->field_scan_q0 = field_scan;
3814 h->field_scan8x8_q0 = field_scan8x8;
3815 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3817 h->zigzag_scan_q0 = h->zigzag_scan;
3818 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3819 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3820 h->field_scan_q0 = h->field_scan;
3821 h->field_scan8x8_q0 = h->field_scan8x8;
3822 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3827 * Replicates H264 "master" context to thread contexts.
3829 static void clone_slice(H264Context *dst, H264Context *src)
3831 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3832 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3833 dst->s.current_picture = src->s.current_picture;
3834 dst->s.linesize = src->s.linesize;
3835 dst->s.uvlinesize = src->s.uvlinesize;
3836 dst->s.first_field = src->s.first_field;
3838 dst->prev_poc_msb = src->prev_poc_msb;
3839 dst->prev_poc_lsb = src->prev_poc_lsb;
3840 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3841 dst->prev_frame_num = src->prev_frame_num;
3842 dst->short_ref_count = src->short_ref_count;
3844 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3845 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3846 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3847 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3849 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3850 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3854 * decodes a slice header.
3855 * This will also call MPV_common_init() and frame_start() as needed.
3857 * @param h h264context
3858 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3860 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3862 static int decode_slice_header(H264Context *h, H264Context *h0){
3863 MpegEncContext * const s = &h->s;
3864 MpegEncContext * const s0 = &h0->s;
3865 unsigned int first_mb_in_slice;
3866 unsigned int pps_id;
3867 int num_ref_idx_active_override_flag;
3868 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3869 unsigned int slice_type, tmp, i;
3870 int default_ref_list_done = 0;
3871 int last_pic_structure;
3873 s->dropable= h->nal_ref_idc == 0;
3875 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3876 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3877 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3879 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3880 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3883 first_mb_in_slice= get_ue_golomb(&s->gb);
3885 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3886 h0->current_slice = 0;
3887 if (!s0->first_field)
3888 s->current_picture_ptr= NULL;
3891 slice_type= get_ue_golomb(&s->gb);
3893 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3898 h->slice_type_fixed=1;
3900 h->slice_type_fixed=0;
3902 slice_type= slice_type_map[ slice_type ];
3903 if (slice_type == FF_I_TYPE
3904 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3905 default_ref_list_done = 1;
3907 h->slice_type= slice_type;
3909 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3910 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3911 av_log(h->s.avctx, AV_LOG_ERROR,
3912 "B picture before any references, skipping\n");
3916 pps_id= get_ue_golomb(&s->gb);
3917 if(pps_id>=MAX_PPS_COUNT){
3918 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3921 if(!h0->pps_buffers[pps_id]) {
3922 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3925 h->pps= *h0->pps_buffers[pps_id];
3927 if(!h0->sps_buffers[h->pps.sps_id]) {
3928 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3931 h->sps = *h0->sps_buffers[h->pps.sps_id];
3933 if(h == h0 && h->dequant_coeff_pps != pps_id){
3934 h->dequant_coeff_pps = pps_id;
3935 init_dequant_tables(h);
3938 s->mb_width= h->sps.mb_width;
3939 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3941 h->b_stride= s->mb_width*4;
3942 h->b8_stride= s->mb_width*2;
3944 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3945 if(h->sps.frame_mbs_only_flag)
3946 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3948 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3950 if (s->context_initialized
3951 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3953 return -1; // width / height changed during parallelized decoding
3957 if (!s->context_initialized) {
3959 return -1; // we cant (re-)initialize context during parallel decoding
3960 if (MPV_common_init(s) < 0)
3964 init_scan_tables(h);
3967 for(i = 1; i < s->avctx->thread_count; i++) {
3969 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3970 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3971 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3974 init_scan_tables(c);
3978 for(i = 0; i < s->avctx->thread_count; i++)
3979 if(context_init(h->thread_context[i]) < 0)
3982 s->avctx->width = s->width;
3983 s->avctx->height = s->height;
3984 s->avctx->sample_aspect_ratio= h->sps.sar;
3985 if(!s->avctx->sample_aspect_ratio.den)
3986 s->avctx->sample_aspect_ratio.den = 1;
3988 if(h->sps.timing_info_present_flag){
3989 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3990 if(h->x264_build > 0 && h->x264_build < 44)
3991 s->avctx->time_base.den *= 2;
3992 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3993 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3997 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4000 h->mb_aff_frame = 0;
4001 last_pic_structure = s0->picture_structure;
4002 if(h->sps.frame_mbs_only_flag){
4003 s->picture_structure= PICT_FRAME;
4005 if(get_bits1(&s->gb)) { //field_pic_flag
4006 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4008 s->picture_structure= PICT_FRAME;
4009 h->mb_aff_frame = h->sps.mb_aff;
4013 if(h0->current_slice == 0){
4014 /* See if we have a decoded first field looking for a pair... */
4015 if (s0->first_field) {
4016 assert(s0->current_picture_ptr);
4017 assert(s0->current_picture_ptr->data[0]);
4018 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4020 /* figure out if we have a complementary field pair */
4021 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4023 * Previous field is unmatched. Don't display it, but let it
4024 * remain for reference if marked as such.
4026 s0->current_picture_ptr = NULL;
4027 s0->first_field = FIELD_PICTURE;
4030 if (h->nal_ref_idc &&
4031 s0->current_picture_ptr->reference &&
4032 s0->current_picture_ptr->frame_num != h->frame_num) {
4034 * This and previous field were reference, but had
4035 * different frame_nums. Consider this field first in
4036 * pair. Throw away previous field except for reference
4039 s0->first_field = 1;
4040 s0->current_picture_ptr = NULL;
4043 /* Second field in complementary pair */
4044 s0->first_field = 0;
4049 /* Frame or first field in a potentially complementary pair */
4050 assert(!s0->current_picture_ptr);
4051 s0->first_field = FIELD_PICTURE;
4054 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4055 s0->first_field = 0;
4062 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4064 assert(s->mb_num == s->mb_width * s->mb_height);
4065 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4066 first_mb_in_slice >= s->mb_num){
4067 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4070 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4071 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4072 if (s->picture_structure == PICT_BOTTOM_FIELD)
4073 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4074 assert(s->mb_y < s->mb_height);
4076 if(s->picture_structure==PICT_FRAME){
4077 h->curr_pic_num= h->frame_num;
4078 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4080 h->curr_pic_num= 2*h->frame_num + 1;
4081 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4084 if(h->nal_unit_type == NAL_IDR_SLICE){
4085 get_ue_golomb(&s->gb); /* idr_pic_id */
4088 if(h->sps.poc_type==0){
4089 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4091 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4092 h->delta_poc_bottom= get_se_golomb(&s->gb);
4096 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4097 h->delta_poc[0]= get_se_golomb(&s->gb);
4099 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4100 h->delta_poc[1]= get_se_golomb(&s->gb);
4105 if(h->pps.redundant_pic_cnt_present){
4106 h->redundant_pic_count= get_ue_golomb(&s->gb);
4109 //set defaults, might be overriden a few line later
4110 h->ref_count[0]= h->pps.ref_count[0];
4111 h->ref_count[1]= h->pps.ref_count[1];
4113 if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
4114 if(h->slice_type == FF_B_TYPE){
4115 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4116 if(FIELD_PICTURE && h->direct_spatial_mv_pred)
4117 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF + spatial direct mode is not implemented\n");
4119 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4121 if(num_ref_idx_active_override_flag){
4122 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4123 if(h->slice_type==FF_B_TYPE)
4124 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4126 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4127 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4128 h->ref_count[0]= h->ref_count[1]= 1;
4132 if(h->slice_type == FF_B_TYPE)
4139 if(!default_ref_list_done){
4140 fill_default_ref_list(h);
4143 if(decode_ref_pic_list_reordering(h) < 0)
4146 if( (h->pps.weighted_pred && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
4147 || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
4148 pred_weight_table(h);
4149 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
4150 implicit_weight_table(h);
4155 decode_ref_pic_marking(h0, &s->gb);
4158 fill_mbaff_ref_list(h);
4160 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
4161 tmp = get_ue_golomb(&s->gb);
4163 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4166 h->cabac_init_idc= tmp;
4169 h->last_qscale_diff = 0;
4170 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4172 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4176 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4177 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4178 //FIXME qscale / qp ... stuff
4179 if(h->slice_type == FF_SP_TYPE){
4180 get_bits1(&s->gb); /* sp_for_switch_flag */
4182 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4183 get_se_golomb(&s->gb); /* slice_qs_delta */
4186 h->deblocking_filter = 1;
4187 h->slice_alpha_c0_offset = 0;
4188 h->slice_beta_offset = 0;
4189 if( h->pps.deblocking_filter_parameters_present ) {
4190 tmp= get_ue_golomb(&s->gb);
4192 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4195 h->deblocking_filter= tmp;
4196 if(h->deblocking_filter < 2)
4197 h->deblocking_filter^= 1; // 1<->0
4199 if( h->deblocking_filter ) {
4200 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4201 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4205 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4206 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
4207 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
4208 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4209 h->deblocking_filter= 0;
4211 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4212 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4213 /* Cheat slightly for speed:
4214 Do not bother to deblock across slices. */
4215 h->deblocking_filter = 2;
4217 h0->max_contexts = 1;
4218 if(!h0->single_decode_warning) {
4219 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4220 h0->single_decode_warning = 1;
4223 return 1; // deblocking switched inside frame
4228 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4229 slice_group_change_cycle= get_bits(&s->gb, ?);
4232 h0->last_slice_type = slice_type;
4233 h->slice_num = ++h0->current_slice;
4235 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4236 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4238 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4239 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4241 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4243 av_get_pict_type_char(h->slice_type),
4244 pps_id, h->frame_num,
4245 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4246 h->ref_count[0], h->ref_count[1],
4248 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4250 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4251 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4261 static inline int get_level_prefix(GetBitContext *gb){
4265 OPEN_READER(re, gb);
4266 UPDATE_CACHE(re, gb);
4267 buf=GET_CACHE(re, gb);
4269 log= 32 - av_log2(buf);
4271 print_bin(buf>>(32-log), log);
4272 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4275 LAST_SKIP_BITS(re, gb, log);
4276 CLOSE_READER(re, gb);
4281 static inline int get_dct8x8_allowed(H264Context *h){
4284 if(!IS_SUB_8X8(h->sub_mb_type[i])
4285 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4292 * decodes a residual block.
4293 * @param n block index
4294 * @param scantable scantable
4295 * @param max_coeff number of coefficients in the block
4296 * @return <0 if an error occurred
4298 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4299 MpegEncContext * const s = &h->s;
4300 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4302 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4304 //FIXME put trailing_onex into the context
4306 if(n == CHROMA_DC_BLOCK_INDEX){
4307 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4308 total_coeff= coeff_token>>2;
4310 if(n == LUMA_DC_BLOCK_INDEX){
4311 total_coeff= pred_non_zero_count(h, 0);
4312 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4313 total_coeff= coeff_token>>2;
4315 total_coeff= pred_non_zero_count(h, n);
4316 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4317 total_coeff= coeff_token>>2;
4318 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4322 //FIXME set last_non_zero?
4326 if(total_coeff > (unsigned)max_coeff) {
4327 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4331 trailing_ones= coeff_token&3;
4332 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4333 assert(total_coeff<=16);
4335 for(i=0; i<trailing_ones; i++){
4336 level[i]= 1 - 2*get_bits1(gb);
4340 int level_code, mask;
4341 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4342 int prefix= get_level_prefix(gb);
4344 //first coefficient has suffix_length equal to 0 or 1
4345 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4347 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4349 level_code= (prefix<<suffix_length); //part
4350 }else if(prefix==14){
4352 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4354 level_code= prefix + get_bits(gb, 4); //part
4356 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4357 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4359 level_code += (1<<(prefix-3))-4096;
4362 if(trailing_ones < 3) level_code += 2;
4367 mask= -(level_code&1);
4368 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4371 //remaining coefficients have suffix_length > 0
4372 for(;i<total_coeff;i++) {
4373 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4374 prefix = get_level_prefix(gb);
4376 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4378 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4380 level_code += (1<<(prefix-3))-4096;
4382 mask= -(level_code&1);
4383 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4384 if(level_code > suffix_limit[suffix_length])
4389 if(total_coeff == max_coeff)
4392 if(n == CHROMA_DC_BLOCK_INDEX)
4393 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4395 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4398 coeff_num = zeros_left + total_coeff - 1;
4399 j = scantable[coeff_num];
4401 block[j] = level[0];
4402 for(i=1;i<total_coeff;i++) {
4405 else if(zeros_left < 7){
4406 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4408 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4410 zeros_left -= run_before;
4411 coeff_num -= 1 + run_before;
4412 j= scantable[ coeff_num ];
4417 block[j] = (level[0] * qmul[j] + 32)>>6;
4418 for(i=1;i<total_coeff;i++) {
4421 else if(zeros_left < 7){
4422 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4424 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4426 zeros_left -= run_before;
4427 coeff_num -= 1 + run_before;
4428 j= scantable[ coeff_num ];
4430 block[j]= (level[i] * qmul[j] + 32)>>6;
4435 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4442 static void predict_field_decoding_flag(H264Context *h){
4443 MpegEncContext * const s = &h->s;
4444 const int mb_xy= h->mb_xy;
4445 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4446 ? s->current_picture.mb_type[mb_xy-1]
4447 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4448 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4450 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4454 * decodes a P_SKIP or B_SKIP macroblock
4456 static void decode_mb_skip(H264Context *h){
4457 MpegEncContext * const s = &h->s;
4458 const int mb_xy= h->mb_xy;
4461 memset(h->non_zero_count[mb_xy], 0, 16);
4462 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4465 mb_type|= MB_TYPE_INTERLACED;
4467 if( h->slice_type == FF_B_TYPE )
4469 // just for fill_caches. pred_direct_motion will set the real mb_type
4470 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4472 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4473 pred_direct_motion(h, &mb_type);
4474 mb_type|= MB_TYPE_SKIP;
4479 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4481 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4482 pred_pskip_motion(h, &mx, &my);
4483 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4484 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4487 write_back_motion(h, mb_type);
4488 s->current_picture.mb_type[mb_xy]= mb_type;
4489 s->current_picture.qscale_table[mb_xy]= s->qscale;
4490 h->slice_table[ mb_xy ]= h->slice_num;
4491 h->prev_mb_skipped= 1;
4495 * decodes a macroblock
4496 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4498 static int decode_mb_cavlc(H264Context *h){
4499 MpegEncContext * const s = &h->s;
4501 int partition_count;
4502 unsigned int mb_type, cbp;
4503 int dct8x8_allowed= h->pps.transform_8x8_mode;
4505 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4507 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4509 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4510 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4512 if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
4513 if(s->mb_skip_run==-1)
4514 s->mb_skip_run= get_ue_golomb(&s->gb);
4516 if (s->mb_skip_run--) {
4517 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4518 if(s->mb_skip_run==0)
4519 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4521 predict_field_decoding_flag(h);
4528 if( (s->mb_y&1) == 0 )
4529 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4531 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4533 h->prev_mb_skipped= 0;
4535 mb_type= get_ue_golomb(&s->gb);
4536 if(h->slice_type == FF_B_TYPE){
4538 partition_count= b_mb_type_info[mb_type].partition_count;
4539 mb_type= b_mb_type_info[mb_type].type;
4542 goto decode_intra_mb;
4544 }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
4546 partition_count= p_mb_type_info[mb_type].partition_count;
4547 mb_type= p_mb_type_info[mb_type].type;
4550 goto decode_intra_mb;
4553 assert(h->slice_type == FF_I_TYPE);
4556 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4560 cbp= i_mb_type_info[mb_type].cbp;
4561 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4562 mb_type= i_mb_type_info[mb_type].type;
4566 mb_type |= MB_TYPE_INTERLACED;
4568 h->slice_table[ mb_xy ]= h->slice_num;
4570 if(IS_INTRA_PCM(mb_type)){
4573 // We assume these blocks are very rare so we do not optimize it.
4574 align_get_bits(&s->gb);
4576 // The pixels are stored in the same order as levels in h->mb array.
4577 for(y=0; y<16; y++){
4578 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4579 for(x=0; x<16; x++){
4580 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4581 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4585 const int index= 256 + 4*(y&3) + 32*(y>>2);
4587 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4588 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4592 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4594 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4595 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4599 // In deblocking, the quantizer is 0
4600 s->current_picture.qscale_table[mb_xy]= 0;
4601 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4602 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4603 // All coeffs are present
4604 memset(h->non_zero_count[mb_xy], 16, 16);
4606 s->current_picture.mb_type[mb_xy]= mb_type;
4611 h->ref_count[0] <<= 1;
4612 h->ref_count[1] <<= 1;
4615 fill_caches(h, mb_type, 0);
4618 if(IS_INTRA(mb_type)){
4620 // init_top_left_availability(h);
4621 if(IS_INTRA4x4(mb_type)){
4624 if(dct8x8_allowed && get_bits1(&s->gb)){
4625 mb_type |= MB_TYPE_8x8DCT;
4629 // fill_intra4x4_pred_table(h);
4630 for(i=0; i<16; i+=di){
4631 int mode= pred_intra_mode(h, i);
4633 if(!get_bits1(&s->gb)){
4634 const int rem_mode= get_bits(&s->gb, 3);
4635 mode = rem_mode + (rem_mode >= mode);
4639 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4641 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4643 write_back_intra_pred_mode(h);
4644 if( check_intra4x4_pred_mode(h) < 0)
4647 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4648 if(h->intra16x16_pred_mode < 0)
4652 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4655 h->chroma_pred_mode= pred_mode;
4656 }else if(partition_count==4){
4657 int i, j, sub_partition_count[4], list, ref[2][4];
4659 if(h->slice_type == FF_B_TYPE){
4661 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4662 if(h->sub_mb_type[i] >=13){
4663 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4666 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4667 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4669 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4670 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4671 pred_direct_motion(h, &mb_type);
4672 h->ref_cache[0][scan8[4]] =
4673 h->ref_cache[1][scan8[4]] =
4674 h->ref_cache[0][scan8[12]] =
4675 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4678 assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
4680 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4681 if(h->sub_mb_type[i] >=4){
4682 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4685 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4686 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4690 for(list=0; list<h->list_count; list++){
4691 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4693 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4694 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4695 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4697 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4709 dct8x8_allowed = get_dct8x8_allowed(h);
4711 for(list=0; list<h->list_count; list++){
4713 if(IS_DIRECT(h->sub_mb_type[i])) {
4714 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4717 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4718 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4720 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4721 const int sub_mb_type= h->sub_mb_type[i];
4722 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4723 for(j=0; j<sub_partition_count[i]; j++){
4725 const int index= 4*i + block_width*j;
4726 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4727 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4728 mx += get_se_golomb(&s->gb);
4729 my += get_se_golomb(&s->gb);
4730 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4732 if(IS_SUB_8X8(sub_mb_type)){
4734 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4736 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4737 }else if(IS_SUB_8X4(sub_mb_type)){
4738 mv_cache[ 1 ][0]= mx;
4739 mv_cache[ 1 ][1]= my;
4740 }else if(IS_SUB_4X8(sub_mb_type)){
4741 mv_cache[ 8 ][0]= mx;
4742 mv_cache[ 8 ][1]= my;
4744 mv_cache[ 0 ][0]= mx;
4745 mv_cache[ 0 ][1]= my;
4748 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4754 }else if(IS_DIRECT(mb_type)){
4755 pred_direct_motion(h, &mb_type);
4756 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4758 int list, mx, my, i;
4759 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4760 if(IS_16X16(mb_type)){
4761 for(list=0; list<h->list_count; list++){
4763 if(IS_DIR(mb_type, 0, list)){
4764 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4765 if(val >= h->ref_count[list]){
4766 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4770 val= LIST_NOT_USED&0xFF;
4771 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4773 for(list=0; list<h->list_count; list++){
4775 if(IS_DIR(mb_type, 0, list)){
4776 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4777 mx += get_se_golomb(&s->gb);
4778 my += get_se_golomb(&s->gb);
4779 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4781 val= pack16to32(mx,my);
4784 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4787 else if(IS_16X8(mb_type)){
4788 for(list=0; list<h->list_count; list++){
4791 if(IS_DIR(mb_type, i, list)){
4792 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4793 if(val >= h->ref_count[list]){
4794 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4798 val= LIST_NOT_USED&0xFF;
4799 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4802 for(list=0; list<h->list_count; list++){
4805 if(IS_DIR(mb_type, i, list)){
4806 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4807 mx += get_se_golomb(&s->gb);
4808 my += get_se_golomb(&s->gb);
4809 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4811 val= pack16to32(mx,my);
4814 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4818 assert(IS_8X16(mb_type));
4819 for(list=0; list<h->list_count; list++){
4822 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4823 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4824 if(val >= h->ref_count[list]){
4825 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4829 val= LIST_NOT_USED&0xFF;
4830 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4833 for(list=0; list<h->list_count; list++){
4836 if(IS_DIR(mb_type, i, list)){
4837 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4838 mx += get_se_golomb(&s->gb);
4839 my += get_se_golomb(&s->gb);
4840 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4842 val= pack16to32(mx,my);
4845 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4851 if(IS_INTER(mb_type))
4852 write_back_motion(h, mb_type);
4854 if(!IS_INTRA16x16(mb_type)){
4855 cbp= get_ue_golomb(&s->gb);
4857 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4861 if(IS_INTRA4x4(mb_type))
4862 cbp= golomb_to_intra4x4_cbp[cbp];
4864 cbp= golomb_to_inter_cbp[cbp];
4868 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4869 if(get_bits1(&s->gb))
4870 mb_type |= MB_TYPE_8x8DCT;
4872 s->current_picture.mb_type[mb_xy]= mb_type;
4874 if(cbp || IS_INTRA16x16(mb_type)){
4875 int i8x8, i4x4, chroma_idx;
4877 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4878 const uint8_t *scan, *scan8x8, *dc_scan;
4880 // fill_non_zero_count_cache(h);
4882 if(IS_INTERLACED(mb_type)){
4883 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4884 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4885 dc_scan= luma_dc_field_scan;
4887 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4888 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4889 dc_scan= luma_dc_zigzag_scan;
4892 dquant= get_se_golomb(&s->gb);
4894 if( dquant > 25 || dquant < -26 ){
4895 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4899 s->qscale += dquant;
4900 if(((unsigned)s->qscale) > 51){
4901 if(s->qscale<0) s->qscale+= 52;
4902 else s->qscale-= 52;
4905 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4906 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4907 if(IS_INTRA16x16(mb_type)){
4908 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4909 return -1; //FIXME continue if partitioned and other return -1 too
4912 assert((cbp&15) == 0 || (cbp&15) == 15);
4915 for(i8x8=0; i8x8<4; i8x8++){
4916 for(i4x4=0; i4x4<4; i4x4++){
4917 const int index= i4x4 + 4*i8x8;
4918 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4924 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4927 for(i8x8=0; i8x8<4; i8x8++){
4928 if(cbp & (1<<i8x8)){
4929 if(IS_8x8DCT(mb_type)){
4930 DCTELEM *buf = &h->mb[64*i8x8];
4932 for(i4x4=0; i4x4<4; i4x4++){
4933 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4934 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4937 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4938 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4940 for(i4x4=0; i4x4<4; i4x4++){
4941 const int index= i4x4 + 4*i8x8;
4943 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4949 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4950 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4956 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4957 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4963 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4964 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4965 for(i4x4=0; i4x4<4; i4x4++){
4966 const int index= 16 + 4*chroma_idx + i4x4;
4967 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4973 uint8_t * const nnz= &h->non_zero_count_cache[0];
4974 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4975 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4978 uint8_t * const nnz= &h->non_zero_count_cache[0];
4979 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4980 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4981 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4983 s->current_picture.qscale_table[mb_xy]= s->qscale;
4984 write_back_non_zero_count(h);
4987 h->ref_count[0] >>= 1;
4988 h->ref_count[1] >>= 1;
4994 static int decode_cabac_field_decoding_flag(H264Context *h) {
4995 MpegEncContext * const s = &h->s;
4996 const int mb_x = s->mb_x;
4997 const int mb_y = s->mb_y & ~1;
4998 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4999 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5001 unsigned int ctx = 0;
5003 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5006 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5010 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5013 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5014 uint8_t *state= &h->cabac_state[ctx_base];
5018 MpegEncContext * const s = &h->s;
5019 const int mba_xy = h->left_mb_xy[0];
5020 const int mbb_xy = h->top_mb_xy;
5022 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5024 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5026 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5027 return 0; /* I4x4 */
5030 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5031 return 0; /* I4x4 */
5034 if( get_cabac_terminate( &h->cabac ) )
5035 return 25; /* PCM */
5037 mb_type = 1; /* I16x16 */
5038 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5039 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5040 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5041 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5042 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5046 static int decode_cabac_mb_type( H264Context *h ) {
5047 MpegEncContext * const s = &h->s;
5049 if( h->slice_type == FF_I_TYPE ) {
5050 return decode_cabac_intra_mb_type(h, 3, 1);
5051 } else if( h->slice_type == FF_P_TYPE ) {
5052 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5054 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5055 /* P_L0_D16x16, P_8x8 */
5056 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5058 /* P_L0_D8x16, P_L0_D16x8 */
5059 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5062 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5064 } else if( h->slice_type == FF_B_TYPE ) {
5065 const int mba_xy = h->left_mb_xy[0];
5066 const int mbb_xy = h->top_mb_xy;
5070 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5072 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5075 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5076 return 0; /* B_Direct_16x16 */
5078 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5079 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5082 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5083 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5084 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5085 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5087 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5088 else if( bits == 13 ) {
5089 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5090 } else if( bits == 14 )
5091 return 11; /* B_L1_L0_8x16 */
5092 else if( bits == 15 )
5093 return 22; /* B_8x8 */
5095 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5096 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5098 /* TODO SI/SP frames? */
5103 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5104 MpegEncContext * const s = &h->s;
5108 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5109 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5112 && h->slice_table[mba_xy] == h->slice_num
5113 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5114 mba_xy += s->mb_stride;
5116 mbb_xy = mb_xy - s->mb_stride;
5118 && h->slice_table[mbb_xy] == h->slice_num
5119 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5120 mbb_xy -= s->mb_stride;
5122 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5124 int mb_xy = h->mb_xy;
5126 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5129 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5131 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5134 if( h->slice_type == FF_B_TYPE )
5136 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5139 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5142 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5145 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5146 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5147 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5149 if( mode >= pred_mode )
5155 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5156 const int mba_xy = h->left_mb_xy[0];
5157 const int mbb_xy = h->top_mb_xy;
5161 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5162 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5165 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5168 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5171 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5173 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5179 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5180 int cbp_b, cbp_a, ctx, cbp = 0;
5182 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5183 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5185 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5186 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5187 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5188 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5189 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5190 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5191 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5192 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5195 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5199 cbp_a = (h->left_cbp>>4)&0x03;
5200 cbp_b = (h-> top_cbp>>4)&0x03;
5203 if( cbp_a > 0 ) ctx++;
5204 if( cbp_b > 0 ) ctx += 2;
5205 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5209 if( cbp_a == 2 ) ctx++;
5210 if( cbp_b == 2 ) ctx += 2;
5211 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5213 static int decode_cabac_mb_dqp( H264Context *h) {
5217 if( h->last_qscale_diff != 0 )
5220 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5226 if(val > 102) //prevent infinite loop
5233 return -(val + 1)/2;
5235 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5236 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5238 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5240 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5244 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5246 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5247 return 0; /* B_Direct_8x8 */
5248 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5249 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5251 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5252 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5253 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5256 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5257 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5261 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5262 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5265 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5266 int refa = h->ref_cache[list][scan8[n] - 1];
5267 int refb = h->ref_cache[list][scan8[n] - 8];
5271 if( h->slice_type == FF_B_TYPE) {
5272 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5274 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5283 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5289 if(ref >= 32 /*h->ref_list[list]*/){
5290 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5291 return 0; //FIXME we should return -1 and check the return everywhere
5297 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5298 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5299 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5300 int ctxbase = (l == 0) ? 40 : 47;
5305 else if( amvd > 32 )
5310 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5315 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5323 while( get_cabac_bypass( &h->cabac ) ) {
5327 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5332 if( get_cabac_bypass( &h->cabac ) )
5336 return get_cabac_bypass_sign( &h->cabac, -mvd );
5339 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5345 nza = h->left_cbp&0x100;
5346 nzb = h-> top_cbp&0x100;
5348 nza = (h->left_cbp>>(6+idx))&0x01;
5349 nzb = (h-> top_cbp>>(6+idx))&0x01;
5353 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5354 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5356 assert(cat == 1 || cat == 2);
5357 nza = h->non_zero_count_cache[scan8[idx] - 1];
5358 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5368 return ctx + 4 * cat;
5371 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5372 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5373 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5374 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5375 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5378 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5379 static const int significant_coeff_flag_offset[2][6] = {
5380 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5381 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5383 static const int last_coeff_flag_offset[2][6] = {
5384 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5385 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5387 static const int coeff_abs_level_m1_offset[6] = {
5388 227+0, 227+10, 227+20, 227+30, 227+39, 426
5390 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5391 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5392 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5393 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5394 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5395 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5396 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5397 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5398 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5400 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5401 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5402 * map node ctx => cabac ctx for level=1 */
5403 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5404 /* map node ctx => cabac ctx for level>1 */
5405 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5406 static const uint8_t coeff_abs_level_transition[2][8] = {
5407 /* update node ctx after decoding a level=1 */
5408 { 1, 2, 3, 3, 4, 5, 6, 7 },
5409 /* update node ctx after decoding a level>1 */
5410 { 4, 4, 4, 4, 5, 6, 7, 7 }
5416 int coeff_count = 0;
5419 uint8_t *significant_coeff_ctx_base;
5420 uint8_t *last_coeff_ctx_base;
5421 uint8_t *abs_level_m1_ctx_base;
5424 #define CABAC_ON_STACK
5426 #ifdef CABAC_ON_STACK
5429 cc.range = h->cabac.range;
5430 cc.low = h->cabac.low;
5431 cc.bytestream= h->cabac.bytestream;
5433 #define CC &h->cabac
5437 /* cat: 0-> DC 16x16 n = 0
5438 * 1-> AC 16x16 n = luma4x4idx
5439 * 2-> Luma4x4 n = luma4x4idx
5440 * 3-> DC Chroma n = iCbCr
5441 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5442 * 5-> Luma8x8 n = 4 * luma8x8idx
5445 /* read coded block flag */
5446 if( is_dc || cat != 5 ) {
5447 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5450 h->non_zero_count_cache[scan8[16+n]] = 0;
5452 h->non_zero_count_cache[scan8[n]] = 0;
5455 #ifdef CABAC_ON_STACK
5456 h->cabac.range = cc.range ;
5457 h->cabac.low = cc.low ;
5458 h->cabac.bytestream= cc.bytestream;
5464 significant_coeff_ctx_base = h->cabac_state
5465 + significant_coeff_flag_offset[MB_FIELD][cat];
5466 last_coeff_ctx_base = h->cabac_state
5467 + last_coeff_flag_offset[MB_FIELD][cat];
5468 abs_level_m1_ctx_base = h->cabac_state
5469 + coeff_abs_level_m1_offset[cat];
5471 if( !is_dc && cat == 5 ) {
5472 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5473 for(last= 0; last < coefs; last++) { \
5474 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5475 if( get_cabac( CC, sig_ctx )) { \
5476 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5477 index[coeff_count++] = last; \
5478 if( get_cabac( CC, last_ctx ) ) { \
5484 if( last == max_coeff -1 ) {\
5485 index[coeff_count++] = last;\
5487 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5488 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5489 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5491 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5493 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5495 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5498 assert(coeff_count > 0);
5502 h->cbp_table[h->mb_xy] |= 0x100;
5504 h->cbp_table[h->mb_xy] |= 0x40 << n;
5507 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5509 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5511 assert( cat == 1 || cat == 2 );
5512 h->non_zero_count_cache[scan8[n]] = coeff_count;
5516 while( coeff_count-- ) {
5517 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5519 int j= scantable[index[coeff_count]];
5521 if( get_cabac( CC, ctx ) == 0 ) {
5522 node_ctx = coeff_abs_level_transition[0][node_ctx];
5524 block[j] = get_cabac_bypass_sign( CC, -1);
5526 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5530 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5531 node_ctx = coeff_abs_level_transition[1][node_ctx];
5533 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5537 if( coeff_abs >= 15 ) {
5539 while( get_cabac_bypass( CC ) ) {
5545 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5551 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5553 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5557 #ifdef CABAC_ON_STACK
5558 h->cabac.range = cc.range ;
5559 h->cabac.low = cc.low ;
5560 h->cabac.bytestream= cc.bytestream;
5565 #ifndef CONFIG_SMALL
5566 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5567 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5570 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5571 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5575 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5577 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5579 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5580 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5584 static inline void compute_mb_neighbors(H264Context *h)
5586 MpegEncContext * const s = &h->s;
5587 const int mb_xy = h->mb_xy;
5588 h->top_mb_xy = mb_xy - s->mb_stride;
5589 h->left_mb_xy[0] = mb_xy - 1;
5591 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5592 const int top_pair_xy = pair_xy - s->mb_stride;
5593 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5594 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5595 const int curr_mb_frame_flag = !MB_FIELD;
5596 const int bottom = (s->mb_y & 1);
5598 ? !curr_mb_frame_flag // bottom macroblock
5599 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5601 h->top_mb_xy -= s->mb_stride;
5603 if (left_mb_frame_flag != curr_mb_frame_flag) {
5604 h->left_mb_xy[0] = pair_xy - 1;
5606 } else if (FIELD_PICTURE) {
5607 h->top_mb_xy -= s->mb_stride;
5613 * decodes a macroblock
5614 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5616 static int decode_mb_cabac(H264Context *h) {
5617 MpegEncContext * const s = &h->s;
5619 int mb_type, partition_count, cbp = 0;
5620 int dct8x8_allowed= h->pps.transform_8x8_mode;
5622 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5624 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5626 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5627 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
5629 /* a skipped mb needs the aff flag from the following mb */
5630 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5631 predict_field_decoding_flag(h);
5632 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5633 skip = h->next_mb_skipped;
5635 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5636 /* read skip flags */
5638 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5639 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5640 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5641 if(h->next_mb_skipped)
5642 predict_field_decoding_flag(h);
5644 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5649 h->cbp_table[mb_xy] = 0;
5650 h->chroma_pred_mode_table[mb_xy] = 0;
5651 h->last_qscale_diff = 0;
5658 if( (s->mb_y&1) == 0 )
5660 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5662 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5664 h->prev_mb_skipped = 0;
5666 compute_mb_neighbors(h);
5667 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5668 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5672 if( h->slice_type == FF_B_TYPE ) {
5674 partition_count= b_mb_type_info[mb_type].partition_count;
5675 mb_type= b_mb_type_info[mb_type].type;
5678 goto decode_intra_mb;
5680 } else if( h->slice_type == FF_P_TYPE ) {
5682 partition_count= p_mb_type_info[mb_type].partition_count;
5683 mb_type= p_mb_type_info[mb_type].type;
5686 goto decode_intra_mb;
5689 assert(h->slice_type == FF_I_TYPE);
5691 partition_count = 0;
5692 cbp= i_mb_type_info[mb_type].cbp;
5693 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5694 mb_type= i_mb_type_info[mb_type].type;
5697 mb_type |= MB_TYPE_INTERLACED;
5699 h->slice_table[ mb_xy ]= h->slice_num;
5701 if(IS_INTRA_PCM(mb_type)) {
5705 // We assume these blocks are very rare so we do not optimize it.
5706 // FIXME The two following lines get the bitstream position in the cabac
5707 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5708 ptr= h->cabac.bytestream;
5709 if(h->cabac.low&0x1) ptr--;
5711 if(h->cabac.low&0x1FF) ptr--;
5714 // The pixels are stored in the same order as levels in h->mb array.
5715 for(y=0; y<16; y++){
5716 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5717 for(x=0; x<16; x++){
5718 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5719 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5723 const int index= 256 + 4*(y&3) + 32*(y>>2);
5725 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5726 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5730 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5732 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5733 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5737 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5739 // All blocks are present
5740 h->cbp_table[mb_xy] = 0x1ef;
5741 h->chroma_pred_mode_table[mb_xy] = 0;
5742 // In deblocking, the quantizer is 0
5743 s->current_picture.qscale_table[mb_xy]= 0;
5744 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5745 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5746 // All coeffs are present
5747 memset(h->non_zero_count[mb_xy], 16, 16);
5748 s->current_picture.mb_type[mb_xy]= mb_type;
5749 h->last_qscale_diff = 0;
5754 h->ref_count[0] <<= 1;
5755 h->ref_count[1] <<= 1;
5758 fill_caches(h, mb_type, 0);
5760 if( IS_INTRA( mb_type ) ) {
5762 if( IS_INTRA4x4( mb_type ) ) {
5763 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5764 mb_type |= MB_TYPE_8x8DCT;
5765 for( i = 0; i < 16; i+=4 ) {
5766 int pred = pred_intra_mode( h, i );
5767 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5768 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5771 for( i = 0; i < 16; i++ ) {
5772 int pred = pred_intra_mode( h, i );
5773 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5775 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5778 write_back_intra_pred_mode(h);
5779 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5781 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5782 if( h->intra16x16_pred_mode < 0 ) return -1;
5784 h->chroma_pred_mode_table[mb_xy] =
5785 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5787 pred_mode= check_intra_pred_mode( h, pred_mode );
5788 if( pred_mode < 0 ) return -1;
5789 h->chroma_pred_mode= pred_mode;
5790 } else if( partition_count == 4 ) {
5791 int i, j, sub_partition_count[4], list, ref[2][4];
5793 if( h->slice_type == FF_B_TYPE ) {
5794 for( i = 0; i < 4; i++ ) {
5795 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5796 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5797 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5799 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5800 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5801 pred_direct_motion(h, &mb_type);
5802 h->ref_cache[0][scan8[4]] =
5803 h->ref_cache[1][scan8[4]] =
5804 h->ref_cache[0][scan8[12]] =
5805 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5806 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5807 for( i = 0; i < 4; i++ )
5808 if( IS_DIRECT(h->sub_mb_type[i]) )
5809 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5813 for( i = 0; i < 4; i++ ) {
5814 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5815 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5816 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5820 for( list = 0; list < h->list_count; list++ ) {
5821 for( i = 0; i < 4; i++ ) {
5822 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5823 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5824 if( h->ref_count[list] > 1 )
5825 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5831 h->ref_cache[list][ scan8[4*i]+1 ]=
5832 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5837 dct8x8_allowed = get_dct8x8_allowed(h);
5839 for(list=0; list<h->list_count; list++){
5841 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5842 if(IS_DIRECT(h->sub_mb_type[i])){
5843 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5847 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5848 const int sub_mb_type= h->sub_mb_type[i];
5849 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5850 for(j=0; j<sub_partition_count[i]; j++){
5853 const int index= 4*i + block_width*j;
5854 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5855 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5856 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5858 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5859 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5860 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5862 if(IS_SUB_8X8(sub_mb_type)){
5864 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5866 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5869 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5871 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5872 }else if(IS_SUB_8X4(sub_mb_type)){
5873 mv_cache[ 1 ][0]= mx;
5874 mv_cache[ 1 ][1]= my;
5876 mvd_cache[ 1 ][0]= mx - mpx;
5877 mvd_cache[ 1 ][1]= my - mpy;
5878 }else if(IS_SUB_4X8(sub_mb_type)){
5879 mv_cache[ 8 ][0]= mx;
5880 mv_cache[ 8 ][1]= my;
5882 mvd_cache[ 8 ][0]= mx - mpx;
5883 mvd_cache[ 8 ][1]= my - mpy;
5885 mv_cache[ 0 ][0]= mx;
5886 mv_cache[ 0 ][1]= my;
5888 mvd_cache[ 0 ][0]= mx - mpx;
5889 mvd_cache[ 0 ][1]= my - mpy;
5892 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5893 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5894 p[0] = p[1] = p[8] = p[9] = 0;
5895 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5899 } else if( IS_DIRECT(mb_type) ) {
5900 pred_direct_motion(h, &mb_type);
5901 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5902 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5903 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5905 int list, mx, my, i, mpx, mpy;
5906 if(IS_16X16(mb_type)){
5907 for(list=0; list<h->list_count; list++){
5908 if(IS_DIR(mb_type, 0, list)){
5909 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5910 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5912 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5914 for(list=0; list<h->list_count; list++){
5915 if(IS_DIR(mb_type, 0, list)){
5916 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5918 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5919 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5920 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5922 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5923 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5925 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5928 else if(IS_16X8(mb_type)){
5929 for(list=0; list<h->list_count; list++){
5931 if(IS_DIR(mb_type, i, list)){
5932 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5933 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5935 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5938 for(list=0; list<h->list_count; list++){
5940 if(IS_DIR(mb_type, i, list)){
5941 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5942 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5943 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5944 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5946 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5947 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5949 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5950 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5955 assert(IS_8X16(mb_type));
5956 for(list=0; list<h->list_count; list++){
5958 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5959 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5960 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5962 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5965 for(list=0; list<h->list_count; list++){
5967 if(IS_DIR(mb_type, i, list)){
5968 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5969 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5970 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5972 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5973 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5974 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5976 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5977 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5984 if( IS_INTER( mb_type ) ) {
5985 h->chroma_pred_mode_table[mb_xy] = 0;
5986 write_back_motion( h, mb_type );
5989 if( !IS_INTRA16x16( mb_type ) ) {
5990 cbp = decode_cabac_mb_cbp_luma( h );
5991 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5994 h->cbp_table[mb_xy] = h->cbp = cbp;
5996 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5997 if( decode_cabac_mb_transform_size( h ) )
5998 mb_type |= MB_TYPE_8x8DCT;
6000 s->current_picture.mb_type[mb_xy]= mb_type;
6002 if( cbp || IS_INTRA16x16( mb_type ) ) {
6003 const uint8_t *scan, *scan8x8, *dc_scan;
6004 const uint32_t *qmul;
6007 if(IS_INTERLACED(mb_type)){
6008 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6009 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6010 dc_scan= luma_dc_field_scan;
6012 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6013 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6014 dc_scan= luma_dc_zigzag_scan;
6017 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6018 if( dqp == INT_MIN ){
6019 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6023 if(((unsigned)s->qscale) > 51){
6024 if(s->qscale<0) s->qscale+= 52;
6025 else s->qscale-= 52;
6027 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6028 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6030 if( IS_INTRA16x16( mb_type ) ) {
6032 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6033 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6036 qmul = h->dequant4_coeff[0][s->qscale];
6037 for( i = 0; i < 16; i++ ) {
6038 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6039 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6042 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6046 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6047 if( cbp & (1<<i8x8) ) {
6048 if( IS_8x8DCT(mb_type) ) {
6049 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6050 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6052 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6053 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6054 const int index = 4*i8x8 + i4x4;
6055 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6057 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6058 //STOP_TIMER("decode_residual")
6062 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6063 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6070 for( c = 0; c < 2; c++ ) {
6071 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6072 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6078 for( c = 0; c < 2; c++ ) {
6079 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6080 for( i = 0; i < 4; i++ ) {
6081 const int index = 16 + 4 * c + i;
6082 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6083 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6087 uint8_t * const nnz= &h->non_zero_count_cache[0];
6088 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6089 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6092 uint8_t * const nnz= &h->non_zero_count_cache[0];
6093 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6094 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6095 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6096 h->last_qscale_diff = 0;
6099 s->current_picture.qscale_table[mb_xy]= s->qscale;
6100 write_back_non_zero_count(h);
6103 h->ref_count[0] >>= 1;
6104 h->ref_count[1] >>= 1;
6111 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6113 const int index_a = qp + h->slice_alpha_c0_offset;
6114 const int alpha = (alpha_table+52)[index_a];
6115 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6120 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6121 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6123 /* 16px edge length, because bS=4 is triggered by being at
6124 * the edge of an intra MB, so all 4 bS are the same */
6125 for( d = 0; d < 16; d++ ) {
6126 const int p0 = pix[-1];
6127 const int p1 = pix[-2];
6128 const int p2 = pix[-3];
6130 const int q0 = pix[0];
6131 const int q1 = pix[1];
6132 const int q2 = pix[2];
6134 if( FFABS( p0 - q0 ) < alpha &&
6135 FFABS( p1 - p0 ) < beta &&
6136 FFABS( q1 - q0 ) < beta ) {
6138 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6139 if( FFABS( p2 - p0 ) < beta)
6141 const int p3 = pix[-4];
6143 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6144 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6145 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6148 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6150 if( FFABS( q2 - q0 ) < beta)
6152 const int q3 = pix[3];
6154 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6155 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6156 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6159 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6163 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6164 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6166 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6172 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6174 const int index_a = qp + h->slice_alpha_c0_offset;
6175 const int alpha = (alpha_table+52)[index_a];
6176 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6181 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6182 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6184 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6188 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6190 for( i = 0; i < 16; i++, pix += stride) {
6196 int bS_index = (i >> 1);
6199 bS_index |= (i & 1);
6202 if( bS[bS_index] == 0 ) {
6206 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6207 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6208 alpha = (alpha_table+52)[index_a];
6209 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6211 if( bS[bS_index] < 4 ) {
6212 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6213 const int p0 = pix[-1];
6214 const int p1 = pix[-2];
6215 const int p2 = pix[-3];
6216 const int q0 = pix[0];
6217 const int q1 = pix[1];
6218 const int q2 = pix[2];
6220 if( FFABS( p0 - q0 ) < alpha &&
6221 FFABS( p1 - p0 ) < beta &&
6222 FFABS( q1 - q0 ) < beta ) {
6226 if( FFABS( p2 - p0 ) < beta ) {
6227 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6230 if( FFABS( q2 - q0 ) < beta ) {
6231 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6235 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6236 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6237 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6238 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6241 const int p0 = pix[-1];
6242 const int p1 = pix[-2];
6243 const int p2 = pix[-3];
6245 const int q0 = pix[0];
6246 const int q1 = pix[1];
6247 const int q2 = pix[2];
6249 if( FFABS( p0 - q0 ) < alpha &&
6250 FFABS( p1 - p0 ) < beta &&
6251 FFABS( q1 - q0 ) < beta ) {
6253 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6254 if( FFABS( p2 - p0 ) < beta)
6256 const int p3 = pix[-4];
6258 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6259 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6260 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6263 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6265 if( FFABS( q2 - q0 ) < beta)
6267 const int q3 = pix[3];
6269 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6270 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6271 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6274 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6278 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6279 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6281 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6286 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6288 for( i = 0; i < 8; i++, pix += stride) {
6296 if( bS[bS_index] == 0 ) {
6300 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6301 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6302 alpha = (alpha_table+52)[index_a];
6303 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6305 if( bS[bS_index] < 4 ) {
6306 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6307 const int p0 = pix[-1];
6308 const int p1 = pix[-2];
6309 const int q0 = pix[0];
6310 const int q1 = pix[1];
6312 if( FFABS( p0 - q0 ) < alpha &&
6313 FFABS( p1 - p0 ) < beta &&
6314 FFABS( q1 - q0 ) < beta ) {
6315 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6317 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6318 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6319 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6322 const int p0 = pix[-1];
6323 const int p1 = pix[-2];
6324 const int q0 = pix[0];
6325 const int q1 = pix[1];
6327 if( FFABS( p0 - q0 ) < alpha &&
6328 FFABS( p1 - p0 ) < beta &&
6329 FFABS( q1 - q0 ) < beta ) {
6331 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6332 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6333 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6339 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6341 const int index_a = qp + h->slice_alpha_c0_offset;
6342 const int alpha = (alpha_table+52)[index_a];
6343 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6344 const int pix_next = stride;
6349 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6350 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6352 /* 16px edge length, see filter_mb_edgev */
6353 for( d = 0; d < 16; d++ ) {
6354 const int p0 = pix[-1*pix_next];
6355 const int p1 = pix[-2*pix_next];
6356 const int p2 = pix[-3*pix_next];
6357 const int q0 = pix[0];
6358 const int q1 = pix[1*pix_next];
6359 const int q2 = pix[2*pix_next];
6361 if( FFABS( p0 - q0 ) < alpha &&
6362 FFABS( p1 - p0 ) < beta &&
6363 FFABS( q1 - q0 ) < beta ) {
6365 const int p3 = pix[-4*pix_next];
6366 const int q3 = pix[ 3*pix_next];
6368 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6369 if( FFABS( p2 - p0 ) < beta) {
6371 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6372 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6373 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6376 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6378 if( FFABS( q2 - q0 ) < beta) {
6380 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6381 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6382 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6385 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6389 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6390 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6392 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6399 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6401 const int index_a = qp + h->slice_alpha_c0_offset;
6402 const int alpha = (alpha_table+52)[index_a];
6403 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6408 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6409 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6411 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6415 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6416 MpegEncContext * const s = &h->s;
6417 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6419 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6423 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6424 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6425 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6426 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6429 assert(!FRAME_MBAFF);
6431 mb_type = s->current_picture.mb_type[mb_xy];
6432 qp = s->current_picture.qscale_table[mb_xy];
6433 qp0 = s->current_picture.qscale_table[mb_xy-1];
6434 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6435 qpc = get_chroma_qp( h, 0, qp );
6436 qpc0 = get_chroma_qp( h, 0, qp0 );
6437 qpc1 = get_chroma_qp( h, 0, qp1 );
6438 qp0 = (qp + qp0 + 1) >> 1;
6439 qp1 = (qp + qp1 + 1) >> 1;
6440 qpc0 = (qpc + qpc0 + 1) >> 1;
6441 qpc1 = (qpc + qpc1 + 1) >> 1;
6442 qp_thresh = 15 - h->slice_alpha_c0_offset;
6443 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6444 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6447 if( IS_INTRA(mb_type) ) {
6448 int16_t bS4[4] = {4,4,4,4};
6449 int16_t bS3[4] = {3,3,3,3};
6450 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6451 if( IS_8x8DCT(mb_type) ) {
6452 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6453 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6454 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6455 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6457 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6458 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6459 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6460 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6461 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6462 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6463 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6464 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6466 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6467 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6468 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6469 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6470 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6471 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6472 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6473 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6476 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6477 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6479 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6481 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6483 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6484 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6485 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6486 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6488 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6489 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6490 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6491 (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
6493 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6494 bSv[0][0] = 0x0004000400040004ULL;
6495 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6496 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6498 #define FILTER(hv,dir,edge)\
6499 if(bSv[dir][edge]) {\
6500 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6502 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6503 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6509 } else if( IS_8x8DCT(mb_type) ) {
6528 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6529 MpegEncContext * const s = &h->s;
6530 const int mb_xy= mb_x + mb_y*s->mb_stride;
6531 const int mb_type = s->current_picture.mb_type[mb_xy];
6532 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6533 int first_vertical_edge_done = 0;
6535 /* FIXME: A given frame may occupy more than one position in
6536 * the reference list. So ref2frm should be populated with
6537 * frame numbers, not indexes. */
6538 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6539 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6541 //for sufficiently low qp, filtering wouldn't do anything
6542 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6544 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6545 int qp = s->current_picture.qscale_table[mb_xy];
6547 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6548 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6554 // left mb is in picture
6555 && h->slice_table[mb_xy-1] != 255
6556 // and current and left pair do not have the same interlaced type
6557 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6558 // and left mb is in the same slice if deblocking_filter == 2
6559 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6560 /* First vertical edge is different in MBAFF frames
6561 * There are 8 different bS to compute and 2 different Qp
6563 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6564 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6569 int mb_qp, mbn0_qp, mbn1_qp;
6571 first_vertical_edge_done = 1;
6573 if( IS_INTRA(mb_type) )
6574 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6576 for( i = 0; i < 8; i++ ) {
6577 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6579 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6581 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6582 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6583 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6590 mb_qp = s->current_picture.qscale_table[mb_xy];
6591 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6592 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6593 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6594 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6595 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6596 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6597 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6598 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6599 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6600 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6601 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6602 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6605 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6606 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6607 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6608 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6609 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6611 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6612 for( dir = 0; dir < 2; dir++ )
6615 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6616 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6617 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6619 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6620 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6621 // how often to recheck mv-based bS when iterating between edges
6622 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6623 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6624 // how often to recheck mv-based bS when iterating along each edge
6625 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6627 if (first_vertical_edge_done) {
6629 first_vertical_edge_done = 0;
6632 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6635 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6636 && !IS_INTERLACED(mb_type)
6637 && IS_INTERLACED(mbm_type)
6639 // This is a special case in the norm where the filtering must
6640 // be done twice (one each of the field) even if we are in a
6641 // frame macroblock.
6643 static const int nnz_idx[4] = {4,5,6,3};
6644 unsigned int tmp_linesize = 2 * linesize;
6645 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6646 int mbn_xy = mb_xy - 2 * s->mb_stride;
6651 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6652 if( IS_INTRA(mb_type) ||
6653 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6654 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6656 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6657 for( i = 0; i < 4; i++ ) {
6658 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6659 mbn_nnz[nnz_idx[i]] != 0 )
6665 // Do not use s->qscale as luma quantizer because it has not the same
6666 // value in IPCM macroblocks.
6667 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6668 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6669 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6670 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6671 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6672 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6673 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6674 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6681 for( edge = start; edge < edges; edge++ ) {
6682 /* mbn_xy: neighbor macroblock */
6683 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6684 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6688 if( (edge&1) && IS_8x8DCT(mb_type) )
6691 if( IS_INTRA(mb_type) ||
6692 IS_INTRA(mbn_type) ) {
6695 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6696 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6705 bS[0] = bS[1] = bS[2] = bS[3] = value;
6710 if( edge & mask_edge ) {
6711 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6714 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6715 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6718 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6719 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6720 int bn_idx= b_idx - (dir ? 8:1);
6722 for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6723 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6724 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6725 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6727 bS[0] = bS[1] = bS[2] = bS[3] = v;
6733 for( i = 0; i < 4; i++ ) {
6734 int x = dir == 0 ? edge : i;
6735 int y = dir == 0 ? i : edge;
6736 int b_idx= 8 + 4 + x + 8*y;
6737 int bn_idx= b_idx - (dir ? 8:1);
6739 if( h->non_zero_count_cache[b_idx] != 0 ||
6740 h->non_zero_count_cache[bn_idx] != 0 ) {
6746 for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6747 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6748 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6749 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6757 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6762 // Do not use s->qscale as luma quantizer because it has not the same
6763 // value in IPCM macroblocks.
6764 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6765 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6766 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6767 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6769 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6770 if( (edge&1) == 0 ) {
6771 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6772 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6773 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6774 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6777 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6778 if( (edge&1) == 0 ) {
6779 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6780 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6781 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6782 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6789 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6790 MpegEncContext * const s = &h->s;
6791 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6795 if( h->pps.cabac ) {
6799 align_get_bits( &s->gb );
6802 ff_init_cabac_states( &h->cabac);
6803 ff_init_cabac_decoder( &h->cabac,
6804 s->gb.buffer + get_bits_count(&s->gb)/8,
6805 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6806 /* calculate pre-state */
6807 for( i= 0; i < 460; i++ ) {
6809 if( h->slice_type == FF_I_TYPE )
6810 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6812 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6815 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6817 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6822 int ret = decode_mb_cabac(h);
6824 //STOP_TIMER("decode_mb_cabac")
6826 if(ret>=0) hl_decode_mb(h);
6828 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6831 if(ret>=0) ret = decode_mb_cabac(h);
6833 if(ret>=0) hl_decode_mb(h);
6836 eos = get_cabac_terminate( &h->cabac );
6838 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6839 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6840 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6844 if( ++s->mb_x >= s->mb_width ) {
6846 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6848 if(FIELD_OR_MBAFF_PICTURE) {
6853 if( eos || s->mb_y >= s->mb_height ) {
6854 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6855 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6862 int ret = decode_mb_cavlc(h);
6864 if(ret>=0) hl_decode_mb(h);
6866 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6868 ret = decode_mb_cavlc(h);
6870 if(ret>=0) hl_decode_mb(h);
6875 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6876 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6881 if(++s->mb_x >= s->mb_width){
6883 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6885 if(FIELD_OR_MBAFF_PICTURE) {
6888 if(s->mb_y >= s->mb_height){
6889 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6891 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6892 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6896 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6903 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6904 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6905 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6906 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6910 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6919 for(;s->mb_y < s->mb_height; s->mb_y++){
6920 for(;s->mb_x < s->mb_width; s->mb_x++){
6921 int ret= decode_mb(h);
6926 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6927 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6932 if(++s->mb_x >= s->mb_width){
6934 if(++s->mb_y >= s->mb_height){
6935 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6936 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6940 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6947 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6948 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6949 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6953 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6960 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6963 return -1; //not reached
6966 static int decode_unregistered_user_data(H264Context *h, int size){
6967 MpegEncContext * const s = &h->s;
6968 uint8_t user_data[16+256];
6974 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6975 user_data[i]= get_bits(&s->gb, 8);
6979 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6980 if(e==1 && build>=0)
6981 h->x264_build= build;
6983 if(s->avctx->debug & FF_DEBUG_BUGS)
6984 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6987 skip_bits(&s->gb, 8);
6992 static int decode_sei(H264Context *h){
6993 MpegEncContext * const s = &h->s;
6995 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7000 type+= show_bits(&s->gb, 8);
7001 }while(get_bits(&s->gb, 8) == 255);
7005 size+= show_bits(&s->gb, 8);
7006 }while(get_bits(&s->gb, 8) == 255);
7010 if(decode_unregistered_user_data(h, size) < 0)
7014 skip_bits(&s->gb, 8*size);
7017 //FIXME check bits here
7018 align_get_bits(&s->gb);
7024 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7025 MpegEncContext * const s = &h->s;
7027 cpb_count = get_ue_golomb(&s->gb) + 1;
7028 get_bits(&s->gb, 4); /* bit_rate_scale */
7029 get_bits(&s->gb, 4); /* cpb_size_scale */
7030 for(i=0; i<cpb_count; i++){
7031 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7032 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7033 get_bits1(&s->gb); /* cbr_flag */
7035 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7036 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7037 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7038 get_bits(&s->gb, 5); /* time_offset_length */
7041 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7042 MpegEncContext * const s = &h->s;
7043 int aspect_ratio_info_present_flag;
7044 unsigned int aspect_ratio_idc;
7045 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7047 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7049 if( aspect_ratio_info_present_flag ) {
7050 aspect_ratio_idc= get_bits(&s->gb, 8);
7051 if( aspect_ratio_idc == EXTENDED_SAR ) {
7052 sps->sar.num= get_bits(&s->gb, 16);
7053 sps->sar.den= get_bits(&s->gb, 16);
7054 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7055 sps->sar= pixel_aspect[aspect_ratio_idc];
7057 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7064 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7066 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7067 get_bits1(&s->gb); /* overscan_appropriate_flag */
7070 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7071 get_bits(&s->gb, 3); /* video_format */
7072 get_bits1(&s->gb); /* video_full_range_flag */
7073 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7074 get_bits(&s->gb, 8); /* colour_primaries */
7075 get_bits(&s->gb, 8); /* transfer_characteristics */
7076 get_bits(&s->gb, 8); /* matrix_coefficients */
7080 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7081 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7082 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7085 sps->timing_info_present_flag = get_bits1(&s->gb);
7086 if(sps->timing_info_present_flag){
7087 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7088 sps->time_scale = get_bits_long(&s->gb, 32);
7089 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7092 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7093 if(nal_hrd_parameters_present_flag)
7094 decode_hrd_parameters(h, sps);
7095 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7096 if(vcl_hrd_parameters_present_flag)
7097 decode_hrd_parameters(h, sps);
7098 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7099 get_bits1(&s->gb); /* low_delay_hrd_flag */
7100 get_bits1(&s->gb); /* pic_struct_present_flag */
7102 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7103 if(sps->bitstream_restriction_flag){
7104 unsigned int num_reorder_frames;
7105 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7106 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7107 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7108 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7109 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7110 num_reorder_frames= get_ue_golomb(&s->gb);
7111 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7113 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7114 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7118 sps->num_reorder_frames= num_reorder_frames;
7124 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7125 const uint8_t *jvt_list, const uint8_t *fallback_list){
7126 MpegEncContext * const s = &h->s;
7127 int i, last = 8, next = 8;
7128 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7129 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7130 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7132 for(i=0;i<size;i++){
7134 next = (last + get_se_golomb(&s->gb)) & 0xff;
7135 if(!i && !next){ /* matrix not written, we use the preset one */
7136 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7139 last = factors[scan[i]] = next ? next : last;
7143 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7144 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7145 MpegEncContext * const s = &h->s;
7146 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7147 const uint8_t *fallback[4] = {
7148 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7149 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7150 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7151 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7153 if(get_bits1(&s->gb)){
7154 sps->scaling_matrix_present |= is_sps;
7155 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7156 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7157 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7158 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7159 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7160 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7161 if(is_sps || pps->transform_8x8_mode){
7162 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7163 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7165 } else if(fallback_sps) {
7166 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7167 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7172 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7175 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7176 const size_t size, const char *name)
7179 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7184 vec[id] = av_mallocz(size);
7186 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7191 static inline int decode_seq_parameter_set(H264Context *h){
7192 MpegEncContext * const s = &h->s;
7193 int profile_idc, level_idc;
7194 unsigned int sps_id, tmp, mb_width, mb_height;
7198 profile_idc= get_bits(&s->gb, 8);
7199 get_bits1(&s->gb); //constraint_set0_flag
7200 get_bits1(&s->gb); //constraint_set1_flag
7201 get_bits1(&s->gb); //constraint_set2_flag
7202 get_bits1(&s->gb); //constraint_set3_flag
7203 get_bits(&s->gb, 4); // reserved
7204 level_idc= get_bits(&s->gb, 8);
7205 sps_id= get_ue_golomb(&s->gb);
7207 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7211 sps->profile_idc= profile_idc;
7212 sps->level_idc= level_idc;
7214 if(sps->profile_idc >= 100){ //high profile
7215 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7216 get_bits1(&s->gb); //residual_color_transform_flag
7217 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7218 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7219 sps->transform_bypass = get_bits1(&s->gb);
7220 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7222 sps->scaling_matrix_present = 0;
7224 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7225 sps->poc_type= get_ue_golomb(&s->gb);
7227 if(sps->poc_type == 0){ //FIXME #define
7228 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7229 } else if(sps->poc_type == 1){//FIXME #define
7230 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7231 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7232 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7233 tmp= get_ue_golomb(&s->gb);
7235 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7236 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7239 sps->poc_cycle_length= tmp;
7241 for(i=0; i<sps->poc_cycle_length; i++)
7242 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7243 }else if(sps->poc_type != 2){
7244 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7248 tmp= get_ue_golomb(&s->gb);
7249 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7250 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7253 sps->ref_frame_count= tmp;
7254 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7255 mb_width= get_ue_golomb(&s->gb) + 1;
7256 mb_height= get_ue_golomb(&s->gb) + 1;
7257 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7258 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7259 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7262 sps->mb_width = mb_width;
7263 sps->mb_height= mb_height;
7265 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7266 if(!sps->frame_mbs_only_flag)
7267 sps->mb_aff= get_bits1(&s->gb);
7271 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7273 #ifndef ALLOW_INTERLACE
7275 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7277 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7278 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7280 sps->crop= get_bits1(&s->gb);
7282 sps->crop_left = get_ue_golomb(&s->gb);
7283 sps->crop_right = get_ue_golomb(&s->gb);
7284 sps->crop_top = get_ue_golomb(&s->gb);
7285 sps->crop_bottom= get_ue_golomb(&s->gb);
7286 if(sps->crop_left || sps->crop_top){
7287 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7289 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7290 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7296 sps->crop_bottom= 0;
7299 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7300 if( sps->vui_parameters_present_flag )
7301 decode_vui_parameters(h, sps);
7303 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7304 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7305 sps_id, sps->profile_idc, sps->level_idc,
7307 sps->ref_frame_count,
7308 sps->mb_width, sps->mb_height,
7309 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7310 sps->direct_8x8_inference_flag ? "8B8" : "",
7311 sps->crop_left, sps->crop_right,
7312 sps->crop_top, sps->crop_bottom,
7313 sps->vui_parameters_present_flag ? "VUI" : ""
7320 build_qp_table(PPS *pps, int t, int index)
7323 for(i = 0; i < 255; i++)
7324 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7327 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7328 MpegEncContext * const s = &h->s;
7329 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7332 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7336 tmp= get_ue_golomb(&s->gb);
7337 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7338 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7343 pps->cabac= get_bits1(&s->gb);
7344 pps->pic_order_present= get_bits1(&s->gb);
7345 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7346 if(pps->slice_group_count > 1 ){
7347 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7348 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7349 switch(pps->mb_slice_group_map_type){
7352 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7353 | run_length[ i ] |1 |ue(v) |
7358 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7360 | top_left_mb[ i ] |1 |ue(v) |
7361 | bottom_right_mb[ i ] |1 |ue(v) |
7369 | slice_group_change_direction_flag |1 |u(1) |
7370 | slice_group_change_rate_minus1 |1 |ue(v) |
7375 | slice_group_id_cnt_minus1 |1 |ue(v) |
7376 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7378 | slice_group_id[ i ] |1 |u(v) |
7383 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7384 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7385 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7386 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7387 pps->ref_count[0]= pps->ref_count[1]= 1;
7391 pps->weighted_pred= get_bits1(&s->gb);
7392 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7393 pps->init_qp= get_se_golomb(&s->gb) + 26;
7394 pps->init_qs= get_se_golomb(&s->gb) + 26;
7395 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7396 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7397 pps->constrained_intra_pred= get_bits1(&s->gb);
7398 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7400 pps->transform_8x8_mode= 0;
7401 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7402 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7403 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7405 if(get_bits_count(&s->gb) < bit_length){
7406 pps->transform_8x8_mode= get_bits1(&s->gb);
7407 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7408 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7410 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7413 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7414 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7415 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7416 h->pps.chroma_qp_diff= 1;
7418 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7420 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7421 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7422 pps_id, pps->sps_id,
7423 pps->cabac ? "CABAC" : "CAVLC",
7424 pps->slice_group_count,
7425 pps->ref_count[0], pps->ref_count[1],
7426 pps->weighted_pred ? "weighted" : "",
7427 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7428 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7429 pps->constrained_intra_pred ? "CONSTR" : "",
7430 pps->redundant_pic_cnt_present ? "REDU" : "",
7431 pps->transform_8x8_mode ? "8x8DCT" : ""
7439 * Call decode_slice() for each context.
7441 * @param h h264 master context
7442 * @param context_count number of contexts to execute
7444 static void execute_decode_slices(H264Context *h, int context_count){
7445 MpegEncContext * const s = &h->s;
7446 AVCodecContext * const avctx= s->avctx;
7450 if(context_count == 1) {
7451 decode_slice(avctx, h);
7453 for(i = 1; i < context_count; i++) {
7454 hx = h->thread_context[i];
7455 hx->s.error_resilience = avctx->error_resilience;
7456 hx->s.error_count = 0;
7459 avctx->execute(avctx, (void *)decode_slice,
7460 (void **)h->thread_context, NULL, context_count);
7462 /* pull back stuff from slices to master context */
7463 hx = h->thread_context[context_count - 1];
7464 s->mb_x = hx->s.mb_x;
7465 s->mb_y = hx->s.mb_y;
7466 s->dropable = hx->s.dropable;
7467 s->picture_structure = hx->s.picture_structure;
7468 for(i = 1; i < context_count; i++)
7469 h->s.error_count += h->thread_context[i]->s.error_count;
7474 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7475 MpegEncContext * const s = &h->s;
7476 AVCodecContext * const avctx= s->avctx;
7478 H264Context *hx; ///< thread context
7479 int context_count = 0;
7481 h->max_contexts = avctx->thread_count;
7484 for(i=0; i<50; i++){
7485 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7488 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7489 h->current_slice = 0;
7490 if (!s->first_field)
7491 s->current_picture_ptr= NULL;
7503 if(buf_index >= buf_size) break;
7505 for(i = 0; i < h->nal_length_size; i++)
7506 nalsize = (nalsize << 8) | buf[buf_index++];
7507 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7512 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7517 // start code prefix search
7518 for(; buf_index + 3 < buf_size; buf_index++){
7519 // This should always succeed in the first iteration.
7520 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7524 if(buf_index+3 >= buf_size) break;
7529 hx = h->thread_context[context_count];
7531 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7532 if (ptr==NULL || dst_length < 0){
7535 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7537 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7539 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7540 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7543 if (h->is_avc && (nalsize != consumed)){
7544 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7548 buf_index += consumed;
7550 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7551 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7556 switch(hx->nal_unit_type){
7558 if (h->nal_unit_type != NAL_IDR_SLICE) {
7559 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7562 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7564 init_get_bits(&hx->s.gb, ptr, bit_length);
7566 hx->inter_gb_ptr= &hx->s.gb;
7567 hx->s.data_partitioning = 0;
7569 if((err = decode_slice_header(hx, h)))
7572 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7573 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7574 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7575 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7576 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7577 && avctx->skip_frame < AVDISCARD_ALL)
7581 init_get_bits(&hx->s.gb, ptr, bit_length);
7583 hx->inter_gb_ptr= NULL;
7584 hx->s.data_partitioning = 1;
7586 err = decode_slice_header(hx, h);
7589 init_get_bits(&hx->intra_gb, ptr, bit_length);
7590 hx->intra_gb_ptr= &hx->intra_gb;
7593 init_get_bits(&hx->inter_gb, ptr, bit_length);
7594 hx->inter_gb_ptr= &hx->inter_gb;
7596 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7597 && s->context_initialized
7599 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7600 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7601 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7602 && avctx->skip_frame < AVDISCARD_ALL)
7606 init_get_bits(&s->gb, ptr, bit_length);
7610 init_get_bits(&s->gb, ptr, bit_length);
7611 decode_seq_parameter_set(h);
7613 if(s->flags& CODEC_FLAG_LOW_DELAY)
7616 if(avctx->has_b_frames < 2)
7617 avctx->has_b_frames= !s->low_delay;
7620 init_get_bits(&s->gb, ptr, bit_length);
7622 decode_picture_parameter_set(h, bit_length);
7626 case NAL_END_SEQUENCE:
7627 case NAL_END_STREAM:
7628 case NAL_FILLER_DATA:
7630 case NAL_AUXILIARY_SLICE:
7633 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7636 if(context_count == h->max_contexts) {
7637 execute_decode_slices(h, context_count);
7642 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7644 /* Slice could not be decoded in parallel mode, copy down
7645 * NAL unit stuff to context 0 and restart. Note that
7646 * rbsp_buffer is not transfered, but since we no longer
7647 * run in parallel mode this should not be an issue. */
7648 h->nal_unit_type = hx->nal_unit_type;
7649 h->nal_ref_idc = hx->nal_ref_idc;
7655 execute_decode_slices(h, context_count);
7660 * returns the number of bytes consumed for building the current frame
7662 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7663 if(s->flags&CODEC_FLAG_TRUNCATED){
7664 pos -= s->parse_context.last_index;
7665 if(pos<0) pos=0; // FIXME remove (unneeded?)
7669 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7670 if(pos+10>buf_size) pos=buf_size; // oops ;)
7676 static int decode_frame(AVCodecContext *avctx,
7677 void *data, int *data_size,
7678 const uint8_t *buf, int buf_size)
7680 H264Context *h = avctx->priv_data;
7681 MpegEncContext *s = &h->s;
7682 AVFrame *pict = data;
7685 s->flags= avctx->flags;
7686 s->flags2= avctx->flags2;
7688 if(s->flags&CODEC_FLAG_TRUNCATED){
7689 const int next= ff_h264_find_frame_end(h, buf, buf_size);
7690 assert((buf_size > 0) || (next == END_NOT_FOUND));
7692 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7694 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7697 /* no supplementary picture */
7698 if (buf_size == 0) {
7702 //FIXME factorize this with the output code below
7703 out = h->delayed_pic[0];
7705 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7706 if(h->delayed_pic[i]->poc < out->poc){
7707 out = h->delayed_pic[i];
7711 for(i=out_idx; h->delayed_pic[i]; i++)
7712 h->delayed_pic[i] = h->delayed_pic[i+1];
7715 *data_size = sizeof(AVFrame);
7716 *pict= *(AVFrame*)out;
7722 if(h->is_avc && !h->got_avcC) {
7723 int i, cnt, nalsize;
7724 unsigned char *p = avctx->extradata;
7725 if(avctx->extradata_size < 7) {
7726 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7730 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7733 /* sps and pps in the avcC always have length coded with 2 bytes,
7734 so put a fake nal_length_size = 2 while parsing them */
7735 h->nal_length_size = 2;
7736 // Decode sps from avcC
7737 cnt = *(p+5) & 0x1f; // Number of sps
7739 for (i = 0; i < cnt; i++) {
7740 nalsize = AV_RB16(p) + 2;
7741 if(decode_nal_units(h, p, nalsize) < 0) {
7742 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7747 // Decode pps from avcC
7748 cnt = *(p++); // Number of pps
7749 for (i = 0; i < cnt; i++) {
7750 nalsize = AV_RB16(p) + 2;
7751 if(decode_nal_units(h, p, nalsize) != nalsize) {
7752 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7757 // Now store right nal length size, that will be use to parse all other nals
7758 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7759 // Do not reparse avcC
7763 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7764 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7768 buf_index=decode_nal_units(h, buf, buf_size);
7772 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7773 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7774 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7778 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7779 Picture *out = s->current_picture_ptr;
7780 Picture *cur = s->current_picture_ptr;
7781 Picture *prev = h->delayed_output_pic;
7782 int i, pics, cross_idr, out_of_order, out_idx;
7786 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7787 s->current_picture_ptr->pict_type= s->pict_type;
7789 h->prev_frame_num_offset= h->frame_num_offset;
7790 h->prev_frame_num= h->frame_num;
7792 h->prev_poc_msb= h->poc_msb;
7793 h->prev_poc_lsb= h->poc_lsb;
7794 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7798 * FIXME: Error handling code does not seem to support interlaced
7799 * when slices span multiple rows
7800 * The ff_er_add_slice calls don't work right for bottom
7801 * fields; they cause massive erroneous error concealing
7802 * Error marking covers both fields (top and bottom).
7803 * This causes a mismatched s->error_count
7804 * and a bad error table. Further, the error count goes to
7805 * INT_MAX when called for bottom field, because mb_y is
7806 * past end by one (callers fault) and resync_mb_y != 0
7807 * causes problems for the first MB line, too.
7814 if (s->first_field) {
7815 /* Wait for second field. */
7819 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7820 /* Derive top_field_first from field pocs. */
7821 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7823 //FIXME do something with unavailable reference frames
7825 #if 0 //decode order
7826 *data_size = sizeof(AVFrame);
7828 /* Sort B-frames into display order */
7830 if(h->sps.bitstream_restriction_flag
7831 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7832 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7837 while(h->delayed_pic[pics]) pics++;
7839 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7841 h->delayed_pic[pics++] = cur;
7842 if(cur->reference == 0)
7843 cur->reference = DELAYED_PIC_REF;
7846 for(i=0; h->delayed_pic[i]; i++)
7847 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7850 out = h->delayed_pic[0];
7852 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7853 if(h->delayed_pic[i]->poc < out->poc){
7854 out = h->delayed_pic[i];
7858 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7859 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7861 else if(prev && pics <= s->avctx->has_b_frames)
7863 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7865 ((!cross_idr && prev && out->poc > prev->poc + 2)
7866 || cur->pict_type == FF_B_TYPE)))
7869 s->avctx->has_b_frames++;
7872 else if(out_of_order)
7875 if(out_of_order || pics > s->avctx->has_b_frames){
7876 for(i=out_idx; h->delayed_pic[i]; i++)
7877 h->delayed_pic[i] = h->delayed_pic[i+1];
7883 *data_size = sizeof(AVFrame);
7884 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7885 prev->reference = 0;
7886 h->delayed_output_pic = out;
7890 *pict= *(AVFrame*)out;
7892 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7896 assert(pict->data[0] || !*data_size);
7897 ff_print_debug_info(s, pict);
7898 //printf("out %d\n", (int)pict->data[0]);
7901 /* Return the Picture timestamp as the frame number */
7902 /* we subtract 1 because it is added on utils.c */
7903 avctx->frame_number = s->picture_number - 1;
7905 return get_consumed_bytes(s, buf_index, buf_size);
7908 static inline void fill_mb_avail(H264Context *h){
7909 MpegEncContext * const s = &h->s;
7910 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7913 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7914 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7915 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7921 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7922 h->mb_avail[4]= 1; //FIXME move out
7923 h->mb_avail[5]= 0; //FIXME move out
7931 #define SIZE (COUNT*40)
7937 // int int_temp[10000];
7939 AVCodecContext avctx;
7941 dsputil_init(&dsp, &avctx);
7943 init_put_bits(&pb, temp, SIZE);
7944 printf("testing unsigned exp golomb\n");
7945 for(i=0; i<COUNT; i++){
7947 set_ue_golomb(&pb, i);
7948 STOP_TIMER("set_ue_golomb");
7950 flush_put_bits(&pb);
7952 init_get_bits(&gb, temp, 8*SIZE);
7953 for(i=0; i<COUNT; i++){
7956 s= show_bits(&gb, 24);
7959 j= get_ue_golomb(&gb);
7961 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7964 STOP_TIMER("get_ue_golomb");
7968 init_put_bits(&pb, temp, SIZE);
7969 printf("testing signed exp golomb\n");
7970 for(i=0; i<COUNT; i++){
7972 set_se_golomb(&pb, i - COUNT/2);
7973 STOP_TIMER("set_se_golomb");
7975 flush_put_bits(&pb);
7977 init_get_bits(&gb, temp, 8*SIZE);
7978 for(i=0; i<COUNT; i++){
7981 s= show_bits(&gb, 24);
7984 j= get_se_golomb(&gb);
7985 if(j != i - COUNT/2){
7986 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7989 STOP_TIMER("get_se_golomb");
7993 printf("testing 4x4 (I)DCT\n");
7996 uint8_t src[16], ref[16];
7997 uint64_t error= 0, max_error=0;
7999 for(i=0; i<COUNT; i++){
8001 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8002 for(j=0; j<16; j++){
8003 ref[j]= random()%255;
8004 src[j]= random()%255;
8007 h264_diff_dct_c(block, src, ref, 4);
8010 for(j=0; j<16; j++){
8011 // printf("%d ", block[j]);
8012 block[j]= block[j]*4;
8013 if(j&1) block[j]= (block[j]*4 + 2)/5;
8014 if(j&4) block[j]= (block[j]*4 + 2)/5;
8018 s->dsp.h264_idct_add(ref, block, 4);
8019 /* for(j=0; j<16; j++){
8020 printf("%d ", ref[j]);
8024 for(j=0; j<16; j++){
8025 int diff= FFABS(src[j] - ref[j]);
8028 max_error= FFMAX(max_error, diff);
8031 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8032 printf("testing quantizer\n");
8033 for(qp=0; qp<52; qp++){
8035 src1_block[i]= src2_block[i]= random()%255;
8038 printf("Testing NAL layer\n");
8040 uint8_t bitstream[COUNT];
8041 uint8_t nal[COUNT*2];
8043 memset(&h, 0, sizeof(H264Context));
8045 for(i=0; i<COUNT; i++){
8053 for(j=0; j<COUNT; j++){
8054 bitstream[j]= (random() % 255) + 1;
8057 for(j=0; j<zeros; j++){
8058 int pos= random() % COUNT;
8059 while(bitstream[pos] == 0){
8068 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8070 printf("encoding failed\n");
8074 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8078 if(out_length != COUNT){
8079 printf("incorrect length %d %d\n", out_length, COUNT);
8083 if(consumed != nal_length){
8084 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8088 if(memcmp(bitstream, out, COUNT)){
8089 printf("mismatch\n");
8095 printf("Testing RBSP\n");
8103 static av_cold int decode_end(AVCodecContext *avctx)
8105 H264Context *h = avctx->priv_data;
8106 MpegEncContext *s = &h->s;
8108 av_freep(&h->rbsp_buffer[0]);
8109 av_freep(&h->rbsp_buffer[1]);
8110 free_tables(h); //FIXME cleanup init stuff perhaps
8113 // memset(h, 0, sizeof(H264Context));
8119 AVCodec h264_decoder = {
8123 sizeof(H264Context),
8128 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8130 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),