2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
573 if(!(h->top_samples_available&0x8000)){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if(!(h->left_samples_available&0x8000)){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
667 // store all luma nnzs, for deblocking
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
785 }else if(top_ref==ref){
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
891 pred_motion(h, 0, 4, 0, 0, mx, my);
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
984 *mb_type |= MB_TYPE_DIRECT2;
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1005 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1007 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1013 if(ref[0] < 0 && ref[1] < 0){
1014 ref[0] = ref[1] = 0;
1015 mv[0][0] = mv[0][1] =
1016 mv[1][0] = mv[1][1] = 0;
1018 for(list=0; list<2; list++){
1020 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1022 mv[list][0] = mv[list][1] = 0;
1028 *mb_type &= ~MB_TYPE_L1;
1029 sub_mb_type &= ~MB_TYPE_L1;
1030 }else if(ref[0] < 0){
1032 *mb_type &= ~MB_TYPE_L0;
1033 sub_mb_type &= ~MB_TYPE_L0;
1036 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1037 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1038 int mb_types_col[2];
1039 int b8_stride = h->b8_stride;
1040 int b4_stride = h->b_stride;
1042 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1044 if(IS_INTERLACED(*mb_type)){
1045 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1046 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1048 l1ref0 -= 2*b8_stride;
1049 l1ref1 -= 2*b8_stride;
1050 l1mv0 -= 4*b4_stride;
1051 l1mv1 -= 4*b4_stride;
1056 int cur_poc = s->current_picture_ptr->poc;
1057 int *col_poc = h->ref_list[1]->field_poc;
1058 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1059 int dy = 2*col_parity - (s->mb_y&1);
1061 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1062 l1ref0 += dy*b8_stride;
1063 l1ref1 += dy*b8_stride;
1064 l1mv0 += 2*dy*b4_stride;
1065 l1mv1 += 2*dy*b4_stride;
1069 for(i8=0; i8<4; i8++){
1072 int xy8 = x8+y8*b8_stride;
1073 int xy4 = 3*x8+y8*b4_stride;
1076 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1078 h->sub_mb_type[i8] = sub_mb_type;
1080 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1081 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1082 if(!IS_INTRA(mb_types_col[y8])
1083 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1084 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1086 a= pack16to32(mv[0][0],mv[0][1]);
1088 b= pack16to32(mv[1][0],mv[1][1]);
1090 a= pack16to32(mv[0][0],mv[0][1]);
1091 b= pack16to32(mv[1][0],mv[1][1]);
1093 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1094 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1096 }else if(IS_16X16(*mb_type)){
1099 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1100 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1101 if(!IS_INTRA(mb_type_col)
1102 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1103 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1104 && (h->x264_build>33 || !h->x264_build)))){
1106 a= pack16to32(mv[0][0],mv[0][1]);
1108 b= pack16to32(mv[1][0],mv[1][1]);
1110 a= pack16to32(mv[0][0],mv[0][1]);
1111 b= pack16to32(mv[1][0],mv[1][1]);
1113 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1114 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1116 for(i8=0; i8<4; i8++){
1117 const int x8 = i8&1;
1118 const int y8 = i8>>1;
1120 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1122 h->sub_mb_type[i8] = sub_mb_type;
1124 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1126 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1127 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1130 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1131 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1132 && (h->x264_build>33 || !h->x264_build)))){
1133 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1134 if(IS_SUB_8X8(sub_mb_type)){
1135 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1136 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1138 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1140 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1143 for(i4=0; i4<4; i4++){
1144 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1145 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1147 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1149 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1155 }else{ /* direct temporal mv pred */
1156 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1157 const int *dist_scale_factor = h->dist_scale_factor;
1160 if(IS_INTERLACED(*mb_type)){
1161 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1162 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1163 dist_scale_factor = h->dist_scale_factor_field;
1165 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1166 /* FIXME assumes direct_8x8_inference == 1 */
1167 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1168 int mb_types_col[2];
1171 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1172 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1173 | (*mb_type & MB_TYPE_INTERLACED);
1174 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1176 if(IS_INTERLACED(*mb_type)){
1177 /* frame to field scaling */
1178 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1179 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1181 l1ref0 -= 2*h->b8_stride;
1182 l1ref1 -= 2*h->b8_stride;
1183 l1mv0 -= 4*h->b_stride;
1184 l1mv1 -= 4*h->b_stride;
1188 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1189 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1191 *mb_type |= MB_TYPE_16x8;
1193 *mb_type |= MB_TYPE_8x8;
1195 /* field to frame scaling */
1196 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1197 * but in MBAFF, top and bottom POC are equal */
1198 int dy = (s->mb_y&1) ? 1 : 2;
1200 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1201 l1ref0 += dy*h->b8_stride;
1202 l1ref1 += dy*h->b8_stride;
1203 l1mv0 += 2*dy*h->b_stride;
1204 l1mv1 += 2*dy*h->b_stride;
1207 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1209 *mb_type |= MB_TYPE_16x16;
1211 *mb_type |= MB_TYPE_8x8;
1214 for(i8=0; i8<4; i8++){
1215 const int x8 = i8&1;
1216 const int y8 = i8>>1;
1218 const int16_t (*l1mv)[2]= l1mv0;
1220 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1222 h->sub_mb_type[i8] = sub_mb_type;
1224 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1225 if(IS_INTRA(mb_types_col[y8])){
1226 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1227 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1232 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1234 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1236 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1239 scale = dist_scale_factor[ref0];
1240 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1243 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1244 int my_col = (mv_col[1]<<y_shift)/2;
1245 int mx = (scale * mv_col[0] + 128) >> 8;
1246 int my = (scale * my_col + 128) >> 8;
1247 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1248 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1255 /* one-to-one mv scaling */
1257 if(IS_16X16(*mb_type)){
1260 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1261 if(IS_INTRA(mb_type_col)){
1264 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1265 : map_col_to_list0[1][l1ref1[0]];
1266 const int scale = dist_scale_factor[ref0];
1267 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1269 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1270 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1272 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1273 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1275 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1276 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1277 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1279 for(i8=0; i8<4; i8++){
1280 const int x8 = i8&1;
1281 const int y8 = i8>>1;
1283 const int16_t (*l1mv)[2]= l1mv0;
1285 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1287 h->sub_mb_type[i8] = sub_mb_type;
1288 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1289 if(IS_INTRA(mb_type_col)){
1290 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1291 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1296 ref0 = l1ref0[x8 + y8*h->b8_stride];
1298 ref0 = map_col_to_list0[0][ref0];
1300 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1303 scale = dist_scale_factor[ref0];
1305 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1306 if(IS_SUB_8X8(sub_mb_type)){
1307 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1308 int mx = (scale * mv_col[0] + 128) >> 8;
1309 int my = (scale * mv_col[1] + 128) >> 8;
1310 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1311 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1313 for(i4=0; i4<4; i4++){
1314 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1315 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1316 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1317 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1318 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1319 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1326 static inline void write_back_motion(H264Context *h, int mb_type){
1327 MpegEncContext * const s = &h->s;
1328 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1329 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1332 if(!USES_LIST(mb_type, 0))
1333 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1335 for(list=0; list<h->list_count; list++){
1337 if(!USES_LIST(mb_type, list))
1341 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1344 if( h->pps.cabac ) {
1345 if(IS_SKIP(mb_type))
1346 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1349 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1350 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1355 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1356 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1357 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1358 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1359 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1363 if(h->slice_type == FF_B_TYPE && h->pps.cabac){
1364 if(IS_8X8(mb_type)){
1365 uint8_t *direct_table = &h->direct_table[b8_xy];
1366 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1367 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1368 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1374 * Decodes a network abstraction layer unit.
1375 * @param consumed is the number of bytes used as input
1376 * @param length is the length of the array
1377 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1378 * @returns decoded bytes, might be src+1 if no escapes
1380 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1385 // src[0]&0x80; //forbidden bit
1386 h->nal_ref_idc= src[0]>>5;
1387 h->nal_unit_type= src[0]&0x1F;
1391 for(i=0; i<length; i++)
1392 printf("%2X ", src[i]);
1394 for(i=0; i+1<length; i+=2){
1395 if(src[i]) continue;
1396 if(i>0 && src[i-1]==0) i--;
1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1399 /* startcode, so we must be past the end */
1406 if(i>=length-1){ //no escaped 0
1407 *dst_length= length;
1408 *consumed= length+1; //+1 for the header
1412 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1413 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1414 dst= h->rbsp_buffer[bufidx];
1420 //printf("decoding esc\n");
1423 //remove escapes (very rare 1:2^22)
1424 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1425 if(src[si+2]==3){ //escape
1430 }else //next start code
1434 dst[di++]= src[si++];
1438 *consumed= si + 1;//+1 for the header
1439 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1444 * identifies the exact end of the bitstream
1445 * @return the length of the trailing, or 0 if damaged
1447 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1451 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1461 * idct tranforms the 16 dc values and dequantize them.
1462 * @param qp quantization parameter
1464 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1467 int temp[16]; //FIXME check if this is a good idea
1468 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1469 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1471 //memset(block, 64, 2*256);
1474 const int offset= y_offset[i];
1475 const int z0= block[offset+stride*0] + block[offset+stride*4];
1476 const int z1= block[offset+stride*0] - block[offset+stride*4];
1477 const int z2= block[offset+stride*1] - block[offset+stride*5];
1478 const int z3= block[offset+stride*1] + block[offset+stride*5];
1487 const int offset= x_offset[i];
1488 const int z0= temp[4*0+i] + temp[4*2+i];
1489 const int z1= temp[4*0+i] - temp[4*2+i];
1490 const int z2= temp[4*1+i] - temp[4*3+i];
1491 const int z3= temp[4*1+i] + temp[4*3+i];
1493 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1494 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1495 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1496 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1502 * dct tranforms the 16 dc values.
1503 * @param qp quantization parameter ??? FIXME
1505 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1506 // const int qmul= dequant_coeff[qp][0];
1508 int temp[16]; //FIXME check if this is a good idea
1509 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1510 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1513 const int offset= y_offset[i];
1514 const int z0= block[offset+stride*0] + block[offset+stride*4];
1515 const int z1= block[offset+stride*0] - block[offset+stride*4];
1516 const int z2= block[offset+stride*1] - block[offset+stride*5];
1517 const int z3= block[offset+stride*1] + block[offset+stride*5];
1526 const int offset= x_offset[i];
1527 const int z0= temp[4*0+i] + temp[4*2+i];
1528 const int z1= temp[4*0+i] - temp[4*2+i];
1529 const int z2= temp[4*1+i] - temp[4*3+i];
1530 const int z3= temp[4*1+i] + temp[4*3+i];
1532 block[stride*0 +offset]= (z0 + z3)>>1;
1533 block[stride*2 +offset]= (z1 + z2)>>1;
1534 block[stride*8 +offset]= (z1 - z2)>>1;
1535 block[stride*10+offset]= (z0 - z3)>>1;
1543 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1544 const int stride= 16*2;
1545 const int xStride= 16;
1548 a= block[stride*0 + xStride*0];
1549 b= block[stride*0 + xStride*1];
1550 c= block[stride*1 + xStride*0];
1551 d= block[stride*1 + xStride*1];
1558 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1559 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1560 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1561 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1565 static void chroma_dc_dct_c(DCTELEM *block){
1566 const int stride= 16*2;
1567 const int xStride= 16;
1570 a= block[stride*0 + xStride*0];
1571 b= block[stride*0 + xStride*1];
1572 c= block[stride*1 + xStride*0];
1573 d= block[stride*1 + xStride*1];
1580 block[stride*0 + xStride*0]= (a+c);
1581 block[stride*0 + xStride*1]= (e+b);
1582 block[stride*1 + xStride*0]= (a-c);
1583 block[stride*1 + xStride*1]= (e-b);
1588 * gets the chroma qp.
1590 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1591 return h->pps.chroma_qp_table[t][qscale & 0xff];
1594 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1595 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1596 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1598 const int * const quant_table= quant_coeff[qscale];
1599 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1600 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1601 const unsigned int threshold2= (threshold1<<1);
1607 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1608 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1609 const unsigned int dc_threshold2= (dc_threshold1<<1);
1611 int level= block[0]*quant_coeff[qscale+18][0];
1612 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1614 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1617 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1620 // last_non_zero = i;
1625 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1626 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1627 const unsigned int dc_threshold2= (dc_threshold1<<1);
1629 int level= block[0]*quant_table[0];
1630 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1632 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1635 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1638 // last_non_zero = i;
1651 const int j= scantable[i];
1652 int level= block[j]*quant_table[j];
1654 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1655 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1656 if(((unsigned)(level+threshold1))>threshold2){
1658 level= (bias + level)>>QUANT_SHIFT;
1661 level= (bias - level)>>QUANT_SHIFT;
1670 return last_non_zero;
1673 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1674 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1675 int src_x_offset, int src_y_offset,
1676 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1677 MpegEncContext * const s = &h->s;
1678 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1679 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1680 const int luma_xy= (mx&3) + ((my&3)<<2);
1681 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1682 uint8_t * src_cb, * src_cr;
1683 int extra_width= h->emu_edge_width;
1684 int extra_height= h->emu_edge_height;
1686 const int full_mx= mx>>2;
1687 const int full_my= my>>2;
1688 const int pic_width = 16*s->mb_width;
1689 const int pic_height = 16*s->mb_height >> MB_FIELD;
1691 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1694 if(mx&7) extra_width -= 3;
1695 if(my&7) extra_height -= 3;
1697 if( full_mx < 0-extra_width
1698 || full_my < 0-extra_height
1699 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1700 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1701 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1702 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1706 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1708 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1711 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1714 // chroma offset when predicting from a field of opposite parity
1715 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1716 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1718 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1719 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1722 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1723 src_cb= s->edge_emu_buffer;
1725 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1728 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1729 src_cr= s->edge_emu_buffer;
1731 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1734 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1735 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1736 int x_offset, int y_offset,
1737 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1738 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1739 int list0, int list1){
1740 MpegEncContext * const s = &h->s;
1741 qpel_mc_func *qpix_op= qpix_put;
1742 h264_chroma_mc_func chroma_op= chroma_put;
1744 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1745 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1746 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1747 x_offset += 8*s->mb_x;
1748 y_offset += 8*(s->mb_y >> MB_FIELD);
1751 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1752 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1753 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1754 qpix_op, chroma_op);
1757 chroma_op= chroma_avg;
1761 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1762 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1763 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1764 qpix_op, chroma_op);
1768 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1769 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1770 int x_offset, int y_offset,
1771 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1772 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1773 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1774 int list0, int list1){
1775 MpegEncContext * const s = &h->s;
1777 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1778 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1779 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1780 x_offset += 8*s->mb_x;
1781 y_offset += 8*(s->mb_y >> MB_FIELD);
1784 /* don't optimize for luma-only case, since B-frames usually
1785 * use implicit weights => chroma too. */
1786 uint8_t *tmp_cb = s->obmc_scratchpad;
1787 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1788 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1789 int refn0 = h->ref_cache[0][ scan8[n] ];
1790 int refn1 = h->ref_cache[1][ scan8[n] ];
1792 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1793 dest_y, dest_cb, dest_cr,
1794 x_offset, y_offset, qpix_put, chroma_put);
1795 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1796 tmp_y, tmp_cb, tmp_cr,
1797 x_offset, y_offset, qpix_put, chroma_put);
1799 if(h->use_weight == 2){
1800 int weight0 = h->implicit_weight[refn0][refn1];
1801 int weight1 = 64 - weight0;
1802 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1803 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1804 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1806 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1807 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1808 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1809 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1810 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1811 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1812 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1813 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1814 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1817 int list = list1 ? 1 : 0;
1818 int refn = h->ref_cache[list][ scan8[n] ];
1819 Picture *ref= &h->ref_list[list][refn];
1820 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1821 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1822 qpix_put, chroma_put);
1824 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1825 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1826 if(h->use_weight_chroma){
1827 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1828 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1829 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1835 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1836 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1837 int x_offset, int y_offset,
1838 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1839 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1840 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1841 int list0, int list1){
1842 if((h->use_weight==2 && list0 && list1
1843 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1844 || h->use_weight==1)
1845 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put,
1847 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1849 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1850 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1853 static inline void prefetch_motion(H264Context *h, int list){
1854 /* fetch pixels for estimated mv 4 macroblocks ahead
1855 * optimized for 64byte cache lines */
1856 MpegEncContext * const s = &h->s;
1857 const int refn = h->ref_cache[list][scan8[0]];
1859 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1860 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1861 uint8_t **src= h->ref_list[list][refn].data;
1862 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1863 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1864 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1865 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1869 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1870 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1871 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1872 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1873 MpegEncContext * const s = &h->s;
1874 const int mb_xy= h->mb_xy;
1875 const int mb_type= s->current_picture.mb_type[mb_xy];
1877 assert(IS_INTER(mb_type));
1879 prefetch_motion(h, 0);
1881 if(IS_16X16(mb_type)){
1882 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1883 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1884 &weight_op[0], &weight_avg[0],
1885 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1886 }else if(IS_16X8(mb_type)){
1887 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1891 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1892 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1893 &weight_op[1], &weight_avg[1],
1894 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1895 }else if(IS_8X16(mb_type)){
1896 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1900 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1901 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1902 &weight_op[2], &weight_avg[2],
1903 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1907 assert(IS_8X8(mb_type));
1910 const int sub_mb_type= h->sub_mb_type[i];
1912 int x_offset= (i&1)<<2;
1913 int y_offset= (i&2)<<1;
1915 if(IS_SUB_8X8(sub_mb_type)){
1916 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1917 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1918 &weight_op[3], &weight_avg[3],
1919 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1920 }else if(IS_SUB_8X4(sub_mb_type)){
1921 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1926 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1927 &weight_op[4], &weight_avg[4],
1928 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1929 }else if(IS_SUB_4X8(sub_mb_type)){
1930 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1934 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1935 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1936 &weight_op[5], &weight_avg[5],
1937 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1940 assert(IS_SUB_4X4(sub_mb_type));
1942 int sub_x_offset= x_offset + 2*(j&1);
1943 int sub_y_offset= y_offset + (j&2);
1944 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1945 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1946 &weight_op[6], &weight_avg[6],
1947 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1953 prefetch_motion(h, 1);
1956 static av_cold void decode_init_vlc(void){
1957 static int done = 0;
1963 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1964 &chroma_dc_coeff_token_len [0], 1, 1,
1965 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1968 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1969 &coeff_token_len [i][0], 1, 1,
1970 &coeff_token_bits[i][0], 1, 1, 1);
1974 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1975 &chroma_dc_total_zeros_len [i][0], 1, 1,
1976 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1978 for(i=0; i<15; i++){
1979 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1980 &total_zeros_len [i][0], 1, 1,
1981 &total_zeros_bits[i][0], 1, 1, 1);
1985 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1986 &run_len [i][0], 1, 1,
1987 &run_bits[i][0], 1, 1, 1);
1989 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1990 &run_len [6][0], 1, 1,
1991 &run_bits[6][0], 1, 1, 1);
1995 static void free_tables(H264Context *h){
1998 av_freep(&h->intra4x4_pred_mode);
1999 av_freep(&h->chroma_pred_mode_table);
2000 av_freep(&h->cbp_table);
2001 av_freep(&h->mvd_table[0]);
2002 av_freep(&h->mvd_table[1]);
2003 av_freep(&h->direct_table);
2004 av_freep(&h->non_zero_count);
2005 av_freep(&h->slice_table_base);
2006 h->slice_table= NULL;
2008 av_freep(&h->mb2b_xy);
2009 av_freep(&h->mb2b8_xy);
2011 for(i = 0; i < MAX_SPS_COUNT; i++)
2012 av_freep(h->sps_buffers + i);
2014 for(i = 0; i < MAX_PPS_COUNT; i++)
2015 av_freep(h->pps_buffers + i);
2017 for(i = 0; i < h->s.avctx->thread_count; i++) {
2018 hx = h->thread_context[i];
2020 av_freep(&hx->top_borders[1]);
2021 av_freep(&hx->top_borders[0]);
2022 av_freep(&hx->s.obmc_scratchpad);
2026 static void init_dequant8_coeff_table(H264Context *h){
2028 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2029 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2030 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2032 for(i=0; i<2; i++ ){
2033 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2034 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2038 for(q=0; q<52; q++){
2039 int shift = ff_div6[q];
2040 int idx = ff_rem6[q];
2042 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2043 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2044 h->pps.scaling_matrix8[i][x]) << shift;
2049 static void init_dequant4_coeff_table(H264Context *h){
2051 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2052 for(i=0; i<6; i++ ){
2053 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2055 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2056 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2063 for(q=0; q<52; q++){
2064 int shift = ff_div6[q] + 2;
2065 int idx = ff_rem6[q];
2067 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2068 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2069 h->pps.scaling_matrix4[i][x]) << shift;
2074 static void init_dequant_tables(H264Context *h){
2076 init_dequant4_coeff_table(h);
2077 if(h->pps.transform_8x8_mode)
2078 init_dequant8_coeff_table(h);
2079 if(h->sps.transform_bypass){
2082 h->dequant4_coeff[i][0][x] = 1<<6;
2083 if(h->pps.transform_8x8_mode)
2086 h->dequant8_coeff[i][0][x] = 1<<6;
2093 * needs width/height
2095 static int alloc_tables(H264Context *h){
2096 MpegEncContext * const s = &h->s;
2097 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2100 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2102 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2106 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2107 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2108 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2109 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2111 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2112 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2114 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2115 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2116 for(y=0; y<s->mb_height; y++){
2117 for(x=0; x<s->mb_width; x++){
2118 const int mb_xy= x + y*s->mb_stride;
2119 const int b_xy = 4*x + 4*y*h->b_stride;
2120 const int b8_xy= 2*x + 2*y*h->b8_stride;
2122 h->mb2b_xy [mb_xy]= b_xy;
2123 h->mb2b8_xy[mb_xy]= b8_xy;
2127 s->obmc_scratchpad = NULL;
2129 if(!h->dequant4_coeff[0])
2130 init_dequant_tables(h);
2139 * Mimic alloc_tables(), but for every context thread.
2141 static void clone_tables(H264Context *dst, H264Context *src){
2142 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2143 dst->non_zero_count = src->non_zero_count;
2144 dst->slice_table = src->slice_table;
2145 dst->cbp_table = src->cbp_table;
2146 dst->mb2b_xy = src->mb2b_xy;
2147 dst->mb2b8_xy = src->mb2b8_xy;
2148 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2149 dst->mvd_table[0] = src->mvd_table[0];
2150 dst->mvd_table[1] = src->mvd_table[1];
2151 dst->direct_table = src->direct_table;
2153 dst->s.obmc_scratchpad = NULL;
2154 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2159 * Allocate buffers which are not shared amongst multiple threads.
2161 static int context_init(H264Context *h){
2162 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2167 return -1; // free_tables will clean up for us
2170 static av_cold void common_init(H264Context *h){
2171 MpegEncContext * const s = &h->s;
2173 s->width = s->avctx->width;
2174 s->height = s->avctx->height;
2175 s->codec_id= s->avctx->codec->id;
2177 ff_h264_pred_init(&h->hpc, s->codec_id);
2179 h->dequant_coeff_pps= -1;
2180 s->unrestricted_mv=1;
2181 s->decode=1; //FIXME
2183 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2184 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2187 static av_cold int decode_init(AVCodecContext *avctx){
2188 H264Context *h= avctx->priv_data;
2189 MpegEncContext * const s = &h->s;
2191 MPV_decode_defaults(s);
2196 s->out_format = FMT_H264;
2197 s->workaround_bugs= avctx->workaround_bugs;
2200 // s->decode_mb= ff_h263_decode_mb;
2201 s->quarter_sample = 1;
2204 if(avctx->codec_id == CODEC_ID_SVQ3)
2205 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2207 avctx->pix_fmt= PIX_FMT_YUV420P;
2211 if(avctx->extradata_size > 0 && avctx->extradata &&
2212 *(char *)avctx->extradata == 1){
2219 h->thread_context[0] = h;
2223 static int frame_start(H264Context *h){
2224 MpegEncContext * const s = &h->s;
2227 if(MPV_frame_start(s, s->avctx) < 0)
2229 ff_er_frame_start(s);
2231 * MPV_frame_start uses pict_type to derive key_frame.
2232 * This is incorrect for H.264; IDR markings must be used.
2233 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2234 * See decode_nal_units().
2236 s->current_picture_ptr->key_frame= 0;
2238 assert(s->linesize && s->uvlinesize);
2240 for(i=0; i<16; i++){
2241 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2242 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2245 h->block_offset[16+i]=
2246 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 h->block_offset[24+16+i]=
2248 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2251 /* can't be in alloc_tables because linesize isn't known there.
2252 * FIXME: redo bipred weight to not require extra buffer? */
2253 for(i = 0; i < s->avctx->thread_count; i++)
2254 if(!h->thread_context[i]->s.obmc_scratchpad)
2255 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2257 /* some macroblocks will be accessed before they're available */
2258 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2259 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2261 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2262 s->current_picture_ptr->reference= 0;
2266 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2267 MpegEncContext * const s = &h->s;
2271 src_cb -= uvlinesize;
2272 src_cr -= uvlinesize;
2274 // There are two lines saved, the line above the the top macroblock of a pair,
2275 // and the line above the bottom macroblock
2276 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2277 for(i=1; i<17; i++){
2278 h->left_border[i]= src_y[15+i* linesize];
2281 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2282 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2284 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2285 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2286 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2288 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2289 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2296 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2297 MpegEncContext * const s = &h->s;
2304 if(h->deblocking_filter == 2) {
2306 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2307 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2309 deblock_left = (s->mb_x > 0);
2310 deblock_top = (s->mb_y > 0);
2313 src_y -= linesize + 1;
2314 src_cb -= uvlinesize + 1;
2315 src_cr -= uvlinesize + 1;
2317 #define XCHG(a,b,t,xchg)\
2324 for(i = !deblock_top; i<17; i++){
2325 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2330 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2331 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2332 if(s->mb_x+1 < s->mb_width){
2333 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2337 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2339 for(i = !deblock_top; i<9; i++){
2340 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2341 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2345 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2346 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2351 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2352 MpegEncContext * const s = &h->s;
2355 src_y -= 2 * linesize;
2356 src_cb -= 2 * uvlinesize;
2357 src_cr -= 2 * uvlinesize;
2359 // There are two lines saved, the line above the the top macroblock of a pair,
2360 // and the line above the bottom macroblock
2361 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2362 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2363 for(i=2; i<34; i++){
2364 h->left_border[i]= src_y[15+i* linesize];
2367 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2368 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2369 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2370 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2372 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2373 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2374 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2375 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2376 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2377 for(i=2; i<18; i++){
2378 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2379 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2381 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2382 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2383 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2384 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2388 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2389 MpegEncContext * const s = &h->s;
2392 int deblock_left = (s->mb_x > 0);
2393 int deblock_top = (s->mb_y > 1);
2395 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2397 src_y -= 2 * linesize + 1;
2398 src_cb -= 2 * uvlinesize + 1;
2399 src_cr -= 2 * uvlinesize + 1;
2401 #define XCHG(a,b,t,xchg)\
2408 for(i = (!deblock_top)<<1; i<34; i++){
2409 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2414 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2415 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2416 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2417 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2418 if(s->mb_x+1 < s->mb_width){
2419 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2420 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2424 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2426 for(i = (!deblock_top) << 1; i<18; i++){
2427 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2428 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2432 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2433 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2434 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2435 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2440 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2441 MpegEncContext * const s = &h->s;
2442 const int mb_x= s->mb_x;
2443 const int mb_y= s->mb_y;
2444 const int mb_xy= h->mb_xy;
2445 const int mb_type= s->current_picture.mb_type[mb_xy];
2446 uint8_t *dest_y, *dest_cb, *dest_cr;
2447 int linesize, uvlinesize /*dct_offset*/;
2449 int *block_offset = &h->block_offset[0];
2450 const unsigned int bottom = mb_y & 1;
2451 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2452 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2453 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2455 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2456 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2457 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2459 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2460 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2462 if (!simple && MB_FIELD) {
2463 linesize = h->mb_linesize = s->linesize * 2;
2464 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2465 block_offset = &h->block_offset[24];
2466 if(mb_y&1){ //FIXME move out of this func?
2467 dest_y -= s->linesize*15;
2468 dest_cb-= s->uvlinesize*7;
2469 dest_cr-= s->uvlinesize*7;
2473 for(list=0; list<h->list_count; list++){
2474 if(!USES_LIST(mb_type, list))
2476 if(IS_16X16(mb_type)){
2477 int8_t *ref = &h->ref_cache[list][scan8[0]];
2478 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2480 for(i=0; i<16; i+=4){
2481 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2482 int ref = h->ref_cache[list][scan8[i]];
2484 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2490 linesize = h->mb_linesize = s->linesize;
2491 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2492 // dct_offset = s->linesize * 16;
2495 if(transform_bypass){
2497 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2498 }else if(IS_8x8DCT(mb_type)){
2499 idct_dc_add = s->dsp.h264_idct8_dc_add;
2500 idct_add = s->dsp.h264_idct8_add;
2502 idct_dc_add = s->dsp.h264_idct_dc_add;
2503 idct_add = s->dsp.h264_idct_add;
2506 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2507 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2508 int mbt_y = mb_y&~1;
2509 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2510 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2511 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2512 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2515 if (!simple && IS_INTRA_PCM(mb_type)) {
2518 // The pixels are stored in h->mb array in the same order as levels,
2519 // copy them in output in the correct order.
2520 for(i=0; i<16; i++) {
2521 for (y=0; y<4; y++) {
2522 for (x=0; x<4; x++) {
2523 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2527 for(i=16; i<16+4; i++) {
2528 for (y=0; y<4; y++) {
2529 for (x=0; x<4; x++) {
2530 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2534 for(i=20; i<20+4; i++) {
2535 for (y=0; y<4; y++) {
2536 for (x=0; x<4; x++) {
2537 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2542 if(IS_INTRA(mb_type)){
2543 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2544 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2546 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2547 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2548 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2551 if(IS_INTRA4x4(mb_type)){
2552 if(simple || !s->encoding){
2553 if(IS_8x8DCT(mb_type)){
2554 for(i=0; i<16; i+=4){
2555 uint8_t * const ptr= dest_y + block_offset[i];
2556 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2557 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2558 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2559 (h->topright_samples_available<<i)&0x4000, linesize);
2561 if(nnz == 1 && h->mb[i*16])
2562 idct_dc_add(ptr, h->mb + i*16, linesize);
2564 idct_add(ptr, h->mb + i*16, linesize);
2568 for(i=0; i<16; i++){
2569 uint8_t * const ptr= dest_y + block_offset[i];
2571 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2574 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2575 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2576 assert(mb_y || linesize <= block_offset[i]);
2577 if(!topright_avail){
2578 tr= ptr[3 - linesize]*0x01010101;
2579 topright= (uint8_t*) &tr;
2581 topright= ptr + 4 - linesize;
2585 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2586 nnz = h->non_zero_count_cache[ scan8[i] ];
2589 if(nnz == 1 && h->mb[i*16])
2590 idct_dc_add(ptr, h->mb + i*16, linesize);
2592 idct_add(ptr, h->mb + i*16, linesize);
2594 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2599 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2601 if(!transform_bypass)
2602 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2604 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2606 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2607 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2609 hl_motion(h, dest_y, dest_cb, dest_cr,
2610 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2611 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2612 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2616 if(!IS_INTRA4x4(mb_type)){
2618 if(IS_INTRA16x16(mb_type)){
2619 for(i=0; i<16; i++){
2620 if(h->non_zero_count_cache[ scan8[i] ])
2621 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2622 else if(h->mb[i*16])
2623 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2626 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2627 for(i=0; i<16; i+=di){
2628 int nnz = h->non_zero_count_cache[ scan8[i] ];
2630 if(nnz==1 && h->mb[i*16])
2631 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2633 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2638 for(i=0; i<16; i++){
2639 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2640 uint8_t * const ptr= dest_y + block_offset[i];
2641 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2647 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2648 uint8_t *dest[2] = {dest_cb, dest_cr};
2649 if(transform_bypass){
2650 idct_add = idct_dc_add = s->dsp.add_pixels4;
2652 idct_add = s->dsp.h264_idct_add;
2653 idct_dc_add = s->dsp.h264_idct_dc_add;
2654 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2655 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2658 for(i=16; i<16+8; i++){
2659 if(h->non_zero_count_cache[ scan8[i] ])
2660 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2661 else if(h->mb[i*16])
2662 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2665 for(i=16; i<16+8; i++){
2666 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2667 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2668 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2674 if(h->deblocking_filter) {
2675 if (!simple && FRAME_MBAFF) {
2676 //FIXME try deblocking one mb at a time?
2677 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2678 const int mb_y = s->mb_y - 1;
2679 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2680 const int mb_xy= mb_x + mb_y*s->mb_stride;
2681 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2682 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2683 if (!bottom) return;
2684 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2685 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2686 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2688 if(IS_INTRA(mb_type_top | mb_type_bottom))
2689 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2691 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2694 s->mb_y--; h->mb_xy -= s->mb_stride;
2695 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2696 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2697 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2698 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2699 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2701 s->mb_y++; h->mb_xy += s->mb_stride;
2702 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2703 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2704 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2705 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2706 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2708 tprintf(h->s.avctx, "call filter_mb\n");
2709 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2710 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2711 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2717 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2719 static void hl_decode_mb_simple(H264Context *h){
2720 hl_decode_mb_internal(h, 1);
2724 * Process a macroblock; this handles edge cases, such as interlacing.
2726 static void av_noinline hl_decode_mb_complex(H264Context *h){
2727 hl_decode_mb_internal(h, 0);
2730 static void hl_decode_mb(H264Context *h){
2731 MpegEncContext * const s = &h->s;
2732 const int mb_xy= h->mb_xy;
2733 const int mb_type= s->current_picture.mb_type[mb_xy];
2734 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2735 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2737 if(ENABLE_H264_ENCODER && !s->decode)
2741 hl_decode_mb_complex(h);
2742 else hl_decode_mb_simple(h);
2745 static void pic_as_field(Picture *pic, const int parity){
2747 for (i = 0; i < 4; ++i) {
2748 if (parity == PICT_BOTTOM_FIELD)
2749 pic->data[i] += pic->linesize[i];
2750 pic->reference = parity;
2751 pic->linesize[i] *= 2;
2755 static int split_field_copy(Picture *dest, Picture *src,
2756 int parity, int id_add){
2757 int match = !!(src->reference & parity);
2761 pic_as_field(dest, parity);
2763 dest->pic_id += id_add;
2770 * Split one reference list into field parts, interleaving by parity
2771 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2772 * set to look at the actual start of data for that field.
2774 * @param dest output list
2775 * @param dest_len maximum number of fields to put in dest
2776 * @param src the source reference list containing fields and/or field pairs
2777 * (aka short_ref/long_ref, or
2778 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2779 * @param src_len number of Picture's in source (pairs and unmatched fields)
2780 * @param parity the parity of the picture being decoded/needing
2781 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2782 * @return number of fields placed in dest
2784 static int split_field_half_ref_list(Picture *dest, int dest_len,
2785 Picture *src, int src_len, int parity){
2786 int same_parity = 1;
2792 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2793 if (same_parity && same_i < src_len) {
2794 field_output = split_field_copy(dest + out_i, src + same_i,
2796 same_parity = !field_output;
2799 } else if (opp_i < src_len) {
2800 field_output = split_field_copy(dest + out_i, src + opp_i,
2801 PICT_FRAME - parity, 0);
2802 same_parity = field_output;
2814 * Split the reference frame list into a reference field list.
2815 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2816 * The input list contains both reference field pairs and
2817 * unmatched reference fields; it is ordered as spec describes
2818 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2819 * unmatched field pairs are also present. Conceptually this is equivalent
2820 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2822 * @param dest output reference list where ordered fields are to be placed
2823 * @param dest_len max number of fields to place at dest
2824 * @param src source reference list, as described above
2825 * @param src_len number of pictures (pairs and unmatched fields) in src
2826 * @param parity parity of field being currently decoded
2827 * (one of PICT_{TOP,BOTTOM}_FIELD)
2828 * @param long_i index into src array that holds first long reference picture,
2829 * or src_len if no long refs present.
2831 static int split_field_ref_list(Picture *dest, int dest_len,
2832 Picture *src, int src_len,
2833 int parity, int long_i){
2835 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2839 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2840 src_len - long_i, parity);
2845 * fills the default_ref_list.
2847 static int fill_default_ref_list(H264Context *h){
2848 MpegEncContext * const s = &h->s;
2850 int smallest_poc_greater_than_current = -1;
2852 Picture sorted_short_ref[32];
2853 Picture field_entry_list[2][32];
2854 Picture *frame_list[2];
2856 if (FIELD_PICTURE) {
2857 structure_sel = PICT_FRAME;
2858 frame_list[0] = field_entry_list[0];
2859 frame_list[1] = field_entry_list[1];
2862 frame_list[0] = h->default_ref_list[0];
2863 frame_list[1] = h->default_ref_list[1];
2866 if(h->slice_type==FF_B_TYPE){
2873 /* sort frame according to poc in B slice */
2874 for(out_i=0; out_i<h->short_ref_count; out_i++){
2876 int best_poc=INT_MAX;
2878 for(i=0; i<h->short_ref_count; i++){
2879 const int poc= h->short_ref[i]->poc;
2880 if(poc > limit && poc < best_poc){
2886 assert(best_i != INT_MIN);
2889 sorted_short_ref[out_i]= *h->short_ref[best_i];
2890 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2891 if (-1 == smallest_poc_greater_than_current) {
2892 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2893 smallest_poc_greater_than_current = out_i;
2898 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2900 // find the largest poc
2901 for(list=0; list<2; list++){
2904 int step= list ? -1 : 1;
2906 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2908 while(j<0 || j>= h->short_ref_count){
2909 if(j != -99 && step == (list ? -1 : 1))
2912 j= smallest_poc_greater_than_current + (step>>1);
2914 sel = sorted_short_ref[j].reference | structure_sel;
2915 if(sel != PICT_FRAME) continue;
2916 frame_list[list][index ]= sorted_short_ref[j];
2917 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2919 short_len[list] = index;
2921 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2923 if(h->long_ref[i] == NULL) continue;
2924 sel = h->long_ref[i]->reference | structure_sel;
2925 if(sel != PICT_FRAME) continue;
2927 frame_list[ list ][index ]= *h->long_ref[i];
2928 frame_list[ list ][index++].pic_id= i;
2933 for(list=0; list<2; list++){
2935 len[list] = split_field_ref_list(h->default_ref_list[list],
2939 s->picture_structure,
2942 // swap the two first elements of L1 when L0 and L1 are identical
2943 if(list && len[0] > 1 && len[0] == len[1])
2944 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2946 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2950 if(len[list] < h->ref_count[ list ])
2951 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2958 for(i=0; i<h->short_ref_count; i++){
2960 sel = h->short_ref[i]->reference | structure_sel;
2961 if(sel != PICT_FRAME) continue;
2962 frame_list[0][index ]= *h->short_ref[i];
2963 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2966 for(i = 0; i < 16; i++){
2968 if(h->long_ref[i] == NULL) continue;
2969 sel = h->long_ref[i]->reference | structure_sel;
2970 if(sel != PICT_FRAME) continue;
2971 frame_list[0][index ]= *h->long_ref[i];
2972 frame_list[0][index++].pic_id= i;
2976 index = split_field_ref_list(h->default_ref_list[0],
2977 h->ref_count[0], frame_list[0],
2978 index, s->picture_structure,
2981 if(index < h->ref_count[0])
2982 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2985 for (i=0; i<h->ref_count[0]; i++) {
2986 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2988 if(h->slice_type==FF_B_TYPE){
2989 for (i=0; i<h->ref_count[1]; i++) {
2990 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2997 static void print_short_term(H264Context *h);
2998 static void print_long_term(H264Context *h);
3001 * Extract structure information about the picture described by pic_num in
3002 * the current decoding context (frame or field). Note that pic_num is
3003 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3004 * @param pic_num picture number for which to extract structure information
3005 * @param structure one of PICT_XXX describing structure of picture
3007 * @return frame number (short term) or long term index of picture
3008 * described by pic_num
3010 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3011 MpegEncContext * const s = &h->s;
3013 *structure = s->picture_structure;
3016 /* opposite field */
3017 *structure ^= PICT_FRAME;
3024 static int decode_ref_pic_list_reordering(H264Context *h){
3025 MpegEncContext * const s = &h->s;
3026 int list, index, pic_structure;
3028 print_short_term(h);
3030 if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
3032 for(list=0; list<h->list_count; list++){
3033 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3035 if(get_bits1(&s->gb)){
3036 int pred= h->curr_pic_num;
3038 for(index=0; ; index++){
3039 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3040 unsigned int pic_id;
3042 Picture *ref = NULL;
3044 if(reordering_of_pic_nums_idc==3)
3047 if(index >= h->ref_count[list]){
3048 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3052 if(reordering_of_pic_nums_idc<3){
3053 if(reordering_of_pic_nums_idc<2){
3054 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3057 if(abs_diff_pic_num > h->max_pic_num){
3058 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3062 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3063 else pred+= abs_diff_pic_num;
3064 pred &= h->max_pic_num - 1;
3066 frame_num = pic_num_extract(h, pred, &pic_structure);
3068 for(i= h->short_ref_count-1; i>=0; i--){
3069 ref = h->short_ref[i];
3070 assert(ref->reference);
3071 assert(!ref->long_ref);
3072 if(ref->data[0] != NULL &&
3073 ref->frame_num == frame_num &&
3074 (ref->reference & pic_structure) &&
3075 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3082 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3084 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3087 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3090 ref = h->long_ref[long_idx];
3091 assert(!(ref && !ref->reference));
3092 if(ref && (ref->reference & pic_structure)){
3093 ref->pic_id= pic_id;
3094 assert(ref->long_ref);
3102 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3103 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3105 for(i=index; i+1<h->ref_count[list]; i++){
3106 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3109 for(; i > index; i--){
3110 h->ref_list[list][i]= h->ref_list[list][i-1];
3112 h->ref_list[list][index]= *ref;
3114 pic_as_field(&h->ref_list[list][index], pic_structure);
3118 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3124 for(list=0; list<h->list_count; list++){
3125 for(index= 0; index < h->ref_count[list]; index++){
3126 if(!h->ref_list[list][index].data[0])
3127 h->ref_list[list][index]= s->current_picture;
3131 if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
3132 direct_dist_scale_factor(h);
3133 direct_ref_list_init(h);
3137 static void fill_mbaff_ref_list(H264Context *h){
3139 for(list=0; list<2; list++){ //FIXME try list_count
3140 for(i=0; i<h->ref_count[list]; i++){
3141 Picture *frame = &h->ref_list[list][i];
3142 Picture *field = &h->ref_list[list][16+2*i];
3145 field[0].linesize[j] <<= 1;
3146 field[0].reference = PICT_TOP_FIELD;
3147 field[1] = field[0];
3149 field[1].data[j] += frame->linesize[j];
3150 field[1].reference = PICT_BOTTOM_FIELD;
3152 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3153 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3155 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3156 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3160 for(j=0; j<h->ref_count[1]; j++){
3161 for(i=0; i<h->ref_count[0]; i++)
3162 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3163 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3164 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3168 static int pred_weight_table(H264Context *h){
3169 MpegEncContext * const s = &h->s;
3171 int luma_def, chroma_def;
3174 h->use_weight_chroma= 0;
3175 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3176 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3177 luma_def = 1<<h->luma_log2_weight_denom;
3178 chroma_def = 1<<h->chroma_log2_weight_denom;
3180 for(list=0; list<2; list++){
3181 for(i=0; i<h->ref_count[list]; i++){
3182 int luma_weight_flag, chroma_weight_flag;
3184 luma_weight_flag= get_bits1(&s->gb);
3185 if(luma_weight_flag){
3186 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3187 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3188 if( h->luma_weight[list][i] != luma_def
3189 || h->luma_offset[list][i] != 0)
3192 h->luma_weight[list][i]= luma_def;
3193 h->luma_offset[list][i]= 0;
3196 chroma_weight_flag= get_bits1(&s->gb);
3197 if(chroma_weight_flag){
3200 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3201 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3202 if( h->chroma_weight[list][i][j] != chroma_def
3203 || h->chroma_offset[list][i][j] != 0)
3204 h->use_weight_chroma= 1;
3209 h->chroma_weight[list][i][j]= chroma_def;
3210 h->chroma_offset[list][i][j]= 0;
3214 if(h->slice_type != FF_B_TYPE) break;
3216 h->use_weight= h->use_weight || h->use_weight_chroma;
3220 static void implicit_weight_table(H264Context *h){
3221 MpegEncContext * const s = &h->s;
3223 int cur_poc = s->current_picture_ptr->poc;
3225 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3226 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3228 h->use_weight_chroma= 0;
3233 h->use_weight_chroma= 2;
3234 h->luma_log2_weight_denom= 5;
3235 h->chroma_log2_weight_denom= 5;
3237 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3238 int poc0 = h->ref_list[0][ref0].poc;
3239 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3240 int poc1 = h->ref_list[1][ref1].poc;
3241 int td = av_clip(poc1 - poc0, -128, 127);
3243 int tb = av_clip(cur_poc - poc0, -128, 127);
3244 int tx = (16384 + (FFABS(td) >> 1)) / td;
3245 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3246 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3247 h->implicit_weight[ref0][ref1] = 32;
3249 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3251 h->implicit_weight[ref0][ref1] = 32;
3257 * Mark a picture as no longer needed for reference. The refmask
3258 * argument allows unreferencing of individual fields or the whole frame.
3259 * If the picture becomes entirely unreferenced, but is being held for
3260 * display purposes, it is marked as such.
3261 * @param refmask mask of fields to unreference; the mask is bitwise
3262 * anded with the reference marking of pic
3263 * @return non-zero if pic becomes entirely unreferenced (except possibly
3264 * for display purposes) zero if one of the fields remains in
3267 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3269 if (pic->reference &= refmask) {
3272 for(i = 0; h->delayed_pic[i]; i++)
3273 if(pic == h->delayed_pic[i]){
3274 pic->reference=DELAYED_PIC_REF;
3282 * instantaneous decoder refresh.
3284 static void idr(H264Context *h){
3287 for(i=0; i<16; i++){
3288 if (h->long_ref[i] != NULL) {
3289 unreference_pic(h, h->long_ref[i], 0);
3290 h->long_ref[i]= NULL;
3293 h->long_ref_count=0;
3295 for(i=0; i<h->short_ref_count; i++){
3296 unreference_pic(h, h->short_ref[i], 0);
3297 h->short_ref[i]= NULL;
3299 h->short_ref_count=0;
3302 /* forget old pics after a seek */
3303 static void flush_dpb(AVCodecContext *avctx){
3304 H264Context *h= avctx->priv_data;
3306 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3307 if(h->delayed_pic[i])
3308 h->delayed_pic[i]->reference= 0;
3309 h->delayed_pic[i]= NULL;
3311 h->outputed_poc= INT_MIN;
3313 if(h->s.current_picture_ptr)
3314 h->s.current_picture_ptr->reference= 0;
3315 h->s.first_field= 0;
3316 ff_mpeg_flush(avctx);
3320 * Find a Picture in the short term reference list by frame number.
3321 * @param frame_num frame number to search for
3322 * @param idx the index into h->short_ref where returned picture is found
3323 * undefined if no picture found.
3324 * @return pointer to the found picture, or NULL if no pic with the provided
3325 * frame number is found
3327 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3328 MpegEncContext * const s = &h->s;
3331 for(i=0; i<h->short_ref_count; i++){
3332 Picture *pic= h->short_ref[i];
3333 if(s->avctx->debug&FF_DEBUG_MMCO)
3334 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3335 if(pic->frame_num == frame_num) {
3344 * Remove a picture from the short term reference list by its index in
3345 * that list. This does no checking on the provided index; it is assumed
3346 * to be valid. Other list entries are shifted down.
3347 * @param i index into h->short_ref of picture to remove.
3349 static void remove_short_at_index(H264Context *h, int i){
3350 assert(i >= 0 && i < h->short_ref_count);
3351 h->short_ref[i]= NULL;
3352 if (--h->short_ref_count)
3353 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3358 * @return the removed picture or NULL if an error occurs
3360 static Picture * remove_short(H264Context *h, int frame_num){
3361 MpegEncContext * const s = &h->s;
3365 if(s->avctx->debug&FF_DEBUG_MMCO)
3366 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3368 pic = find_short(h, frame_num, &i);
3370 remove_short_at_index(h, i);
3376 * Remove a picture from the long term reference list by its index in
3377 * that list. This does no checking on the provided index; it is assumed
3378 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3379 * @param i index into h->long_ref of picture to remove.
3381 static void remove_long_at_index(H264Context *h, int i){
3382 h->long_ref[i]= NULL;
3383 h->long_ref_count--;
3388 * @return the removed picture or NULL if an error occurs
3390 static Picture * remove_long(H264Context *h, int i){
3393 pic= h->long_ref[i];
3395 remove_long_at_index(h, i);
3401 * print short term list
3403 static void print_short_term(H264Context *h) {
3405 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3406 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3407 for(i=0; i<h->short_ref_count; i++){
3408 Picture *pic= h->short_ref[i];
3409 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3415 * print long term list
3417 static void print_long_term(H264Context *h) {
3419 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3420 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3421 for(i = 0; i < 16; i++){
3422 Picture *pic= h->long_ref[i];
3424 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3431 * Executes the reference picture marking (memory management control operations).
3433 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3434 MpegEncContext * const s = &h->s;
3436 int current_ref_assigned=0;
3439 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3440 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3442 for(i=0; i<mmco_count; i++){
3443 int structure, frame_num, unref_pic;
3444 if(s->avctx->debug&FF_DEBUG_MMCO)
3445 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3447 switch(mmco[i].opcode){
3448 case MMCO_SHORT2UNUSED:
3449 if(s->avctx->debug&FF_DEBUG_MMCO)
3450 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3451 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3452 pic = find_short(h, frame_num, &j);
3454 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3455 remove_short_at_index(h, j);
3456 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3457 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3459 case MMCO_SHORT2LONG:
3460 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3461 h->long_ref[mmco[i].long_arg]->frame_num ==
3462 mmco[i].short_pic_num / 2) {
3463 /* do nothing, we've already moved this field pair. */
3465 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3467 pic= remove_long(h, mmco[i].long_arg);
3468 if(pic) unreference_pic(h, pic, 0);
3470 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3471 if (h->long_ref[ mmco[i].long_arg ]){
3472 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3473 h->long_ref_count++;
3477 case MMCO_LONG2UNUSED:
3478 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3479 pic = h->long_ref[j];
3481 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3482 remove_long_at_index(h, j);
3483 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3484 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3488 if (FIELD_PICTURE && !s->first_field) {
3489 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3490 /* Just mark second field as referenced */
3492 } else if (s->current_picture_ptr->reference) {
3493 /* First field in pair is in short term list or
3494 * at a different long term index.
3495 * This is not allowed; see 7.4.3, notes 2 and 3.
3496 * Report the problem and keep the pair where it is,
3497 * and mark this field valid.
3499 av_log(h->s.avctx, AV_LOG_ERROR,
3500 "illegal long term reference assignment for second "
3501 "field in complementary field pair (first field is "
3502 "short term or has non-matching long index)\n");
3508 pic= remove_long(h, mmco[i].long_arg);
3509 if(pic) unreference_pic(h, pic, 0);
3511 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3512 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3513 h->long_ref_count++;
3516 s->current_picture_ptr->reference |= s->picture_structure;
3517 current_ref_assigned=1;
3519 case MMCO_SET_MAX_LONG:
3520 assert(mmco[i].long_arg <= 16);
3521 // just remove the long term which index is greater than new max
3522 for(j = mmco[i].long_arg; j<16; j++){
3523 pic = remove_long(h, j);
3524 if (pic) unreference_pic(h, pic, 0);
3528 while(h->short_ref_count){
3529 pic= remove_short(h, h->short_ref[0]->frame_num);
3530 if(pic) unreference_pic(h, pic, 0);
3532 for(j = 0; j < 16; j++) {
3533 pic= remove_long(h, j);
3534 if(pic) unreference_pic(h, pic, 0);
3541 if (!current_ref_assigned && FIELD_PICTURE &&
3542 !s->first_field && s->current_picture_ptr->reference) {
3544 /* Second field of complementary field pair; the first field of
3545 * which is already referenced. If short referenced, it
3546 * should be first entry in short_ref. If not, it must exist
3547 * in long_ref; trying to put it on the short list here is an
3548 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3550 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3551 /* Just mark the second field valid */
3552 s->current_picture_ptr->reference = PICT_FRAME;
3553 } else if (s->current_picture_ptr->long_ref) {
3554 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3555 "assignment for second field "
3556 "in complementary field pair "
3557 "(first field is long term)\n");
3560 * First field in reference, but not in any sensible place on our
3561 * reference lists. This shouldn't happen unless reference
3562 * handling somewhere else is wrong.
3566 current_ref_assigned = 1;
3569 if(!current_ref_assigned){
3570 pic= remove_short(h, s->current_picture_ptr->frame_num);
3572 unreference_pic(h, pic, 0);
3573 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3576 if(h->short_ref_count)
3577 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3579 h->short_ref[0]= s->current_picture_ptr;
3580 h->short_ref[0]->long_ref=0;
3581 h->short_ref_count++;
3582 s->current_picture_ptr->reference |= s->picture_structure;
3585 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3587 /* We have too many reference frames, probably due to corrupted
3588 * stream. Need to discard one frame. Prevents overrun of the
3589 * short_ref and long_ref buffers.
3591 av_log(h->s.avctx, AV_LOG_ERROR,
3592 "number of reference frames exceeds max (probably "
3593 "corrupt input), discarding one\n");
3595 if (h->long_ref_count) {
3596 for (i = 0; i < 16; ++i)
3601 pic = h->long_ref[i];
3602 remove_long_at_index(h, i);
3604 pic = h->short_ref[h->short_ref_count - 1];
3605 remove_short_at_index(h, h->short_ref_count - 1);
3607 unreference_pic(h, pic, 0);
3610 print_short_term(h);
3615 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3616 MpegEncContext * const s = &h->s;
3619 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3620 s->broken_link= get_bits1(gb) -1;
3621 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3622 if(h->mmco[0].long_arg == -1)
3625 h->mmco[0].opcode= MMCO_LONG;
3629 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3630 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3631 MMCOOpcode opcode= get_ue_golomb(gb);
3633 h->mmco[i].opcode= opcode;
3634 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3635 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3636 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3637 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3641 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3642 unsigned int long_arg= get_ue_golomb(gb);
3643 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3644 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3647 h->mmco[i].long_arg= long_arg;
3650 if(opcode > (unsigned)MMCO_LONG){
3651 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3654 if(opcode == MMCO_END)
3659 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3661 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3662 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3663 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3664 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3666 if (FIELD_PICTURE) {
3667 h->mmco[0].short_pic_num *= 2;
3668 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3669 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3680 static int init_poc(H264Context *h){
3681 MpegEncContext * const s = &h->s;
3682 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3685 if(h->nal_unit_type == NAL_IDR_SLICE){
3686 h->frame_num_offset= 0;
3688 if(h->frame_num < h->prev_frame_num)
3689 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3691 h->frame_num_offset= h->prev_frame_num_offset;
3694 if(h->sps.poc_type==0){
3695 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3697 if(h->nal_unit_type == NAL_IDR_SLICE){
3702 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3703 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3704 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3705 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3707 h->poc_msb = h->prev_poc_msb;
3708 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3710 field_poc[1] = h->poc_msb + h->poc_lsb;
3711 if(s->picture_structure == PICT_FRAME)
3712 field_poc[1] += h->delta_poc_bottom;
3713 }else if(h->sps.poc_type==1){
3714 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3717 if(h->sps.poc_cycle_length != 0)
3718 abs_frame_num = h->frame_num_offset + h->frame_num;
3722 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3725 expected_delta_per_poc_cycle = 0;
3726 for(i=0; i < h->sps.poc_cycle_length; i++)
3727 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3729 if(abs_frame_num > 0){
3730 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3731 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3733 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3734 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3735 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3739 if(h->nal_ref_idc == 0)
3740 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3742 field_poc[0] = expectedpoc + h->delta_poc[0];
3743 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3745 if(s->picture_structure == PICT_FRAME)
3746 field_poc[1] += h->delta_poc[1];
3749 if(h->nal_unit_type == NAL_IDR_SLICE){
3752 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3753 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3759 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3760 s->current_picture_ptr->field_poc[0]= field_poc[0];
3761 s->current_picture_ptr->poc = field_poc[0];
3763 if(s->picture_structure != PICT_TOP_FIELD) {
3764 s->current_picture_ptr->field_poc[1]= field_poc[1];
3765 s->current_picture_ptr->poc = field_poc[1];
3767 if(!FIELD_PICTURE || !s->first_field) {
3768 Picture *cur = s->current_picture_ptr;
3769 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3777 * initialize scan tables
3779 static void init_scan_tables(H264Context *h){
3780 MpegEncContext * const s = &h->s;
3782 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3783 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3784 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3786 for(i=0; i<16; i++){
3787 #define T(x) (x>>2) | ((x<<2) & 0xF)
3788 h->zigzag_scan[i] = T(zigzag_scan[i]);
3789 h-> field_scan[i] = T( field_scan[i]);
3793 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3794 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3795 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3796 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3797 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3799 for(i=0; i<64; i++){
3800 #define T(x) (x>>3) | ((x&7)<<3)
3801 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3802 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3803 h->field_scan8x8[i] = T(field_scan8x8[i]);
3804 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3808 if(h->sps.transform_bypass){ //FIXME same ugly
3809 h->zigzag_scan_q0 = zigzag_scan;
3810 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3811 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3812 h->field_scan_q0 = field_scan;
3813 h->field_scan8x8_q0 = field_scan8x8;
3814 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3816 h->zigzag_scan_q0 = h->zigzag_scan;
3817 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3818 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3819 h->field_scan_q0 = h->field_scan;
3820 h->field_scan8x8_q0 = h->field_scan8x8;
3821 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3826 * Replicates H264 "master" context to thread contexts.
3828 static void clone_slice(H264Context *dst, H264Context *src)
3830 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3831 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3832 dst->s.current_picture = src->s.current_picture;
3833 dst->s.linesize = src->s.linesize;
3834 dst->s.uvlinesize = src->s.uvlinesize;
3835 dst->s.first_field = src->s.first_field;
3837 dst->prev_poc_msb = src->prev_poc_msb;
3838 dst->prev_poc_lsb = src->prev_poc_lsb;
3839 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3840 dst->prev_frame_num = src->prev_frame_num;
3841 dst->short_ref_count = src->short_ref_count;
3843 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3844 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3845 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3846 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3848 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3849 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3853 * decodes a slice header.
3854 * This will also call MPV_common_init() and frame_start() as needed.
3856 * @param h h264context
3857 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3859 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3861 static int decode_slice_header(H264Context *h, H264Context *h0){
3862 MpegEncContext * const s = &h->s;
3863 MpegEncContext * const s0 = &h0->s;
3864 unsigned int first_mb_in_slice;
3865 unsigned int pps_id;
3866 int num_ref_idx_active_override_flag;
3867 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3868 unsigned int slice_type, tmp, i;
3869 int default_ref_list_done = 0;
3870 int last_pic_structure;
3872 s->dropable= h->nal_ref_idc == 0;
3874 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3875 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3876 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3878 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3879 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3882 first_mb_in_slice= get_ue_golomb(&s->gb);
3884 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3885 h0->current_slice = 0;
3886 if (!s0->first_field)
3887 s->current_picture_ptr= NULL;
3890 slice_type= get_ue_golomb(&s->gb);
3892 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3897 h->slice_type_fixed=1;
3899 h->slice_type_fixed=0;
3901 slice_type= slice_type_map[ slice_type ];
3902 if (slice_type == FF_I_TYPE
3903 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3904 default_ref_list_done = 1;
3906 h->slice_type= slice_type;
3908 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3909 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3910 av_log(h->s.avctx, AV_LOG_ERROR,
3911 "B picture before any references, skipping\n");
3915 pps_id= get_ue_golomb(&s->gb);
3916 if(pps_id>=MAX_PPS_COUNT){
3917 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3920 if(!h0->pps_buffers[pps_id]) {
3921 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3924 h->pps= *h0->pps_buffers[pps_id];
3926 if(!h0->sps_buffers[h->pps.sps_id]) {
3927 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3930 h->sps = *h0->sps_buffers[h->pps.sps_id];
3932 if(h == h0 && h->dequant_coeff_pps != pps_id){
3933 h->dequant_coeff_pps = pps_id;
3934 init_dequant_tables(h);
3937 s->mb_width= h->sps.mb_width;
3938 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3940 h->b_stride= s->mb_width*4;
3941 h->b8_stride= s->mb_width*2;
3943 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3944 if(h->sps.frame_mbs_only_flag)
3945 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3947 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3949 if (s->context_initialized
3950 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3952 return -1; // width / height changed during parallelized decoding
3956 if (!s->context_initialized) {
3958 return -1; // we cant (re-)initialize context during parallel decoding
3959 if (MPV_common_init(s) < 0)
3963 init_scan_tables(h);
3966 for(i = 1; i < s->avctx->thread_count; i++) {
3968 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3969 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3970 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3973 init_scan_tables(c);
3977 for(i = 0; i < s->avctx->thread_count; i++)
3978 if(context_init(h->thread_context[i]) < 0)
3981 s->avctx->width = s->width;
3982 s->avctx->height = s->height;
3983 s->avctx->sample_aspect_ratio= h->sps.sar;
3984 if(!s->avctx->sample_aspect_ratio.den)
3985 s->avctx->sample_aspect_ratio.den = 1;
3987 if(h->sps.timing_info_present_flag){
3988 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3989 if(h->x264_build > 0 && h->x264_build < 44)
3990 s->avctx->time_base.den *= 2;
3991 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3992 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3996 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3999 h->mb_aff_frame = 0;
4000 last_pic_structure = s0->picture_structure;
4001 if(h->sps.frame_mbs_only_flag){
4002 s->picture_structure= PICT_FRAME;
4004 if(get_bits1(&s->gb)) { //field_pic_flag
4005 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4007 s->picture_structure= PICT_FRAME;
4008 h->mb_aff_frame = h->sps.mb_aff;
4012 if(h0->current_slice == 0){
4013 /* See if we have a decoded first field looking for a pair... */
4014 if (s0->first_field) {
4015 assert(s0->current_picture_ptr);
4016 assert(s0->current_picture_ptr->data[0]);
4017 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4019 /* figure out if we have a complementary field pair */
4020 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4022 * Previous field is unmatched. Don't display it, but let it
4023 * remain for reference if marked as such.
4025 s0->current_picture_ptr = NULL;
4026 s0->first_field = FIELD_PICTURE;
4029 if (h->nal_ref_idc &&
4030 s0->current_picture_ptr->reference &&
4031 s0->current_picture_ptr->frame_num != h->frame_num) {
4033 * This and previous field were reference, but had
4034 * different frame_nums. Consider this field first in
4035 * pair. Throw away previous field except for reference
4038 s0->first_field = 1;
4039 s0->current_picture_ptr = NULL;
4042 /* Second field in complementary pair */
4043 s0->first_field = 0;
4048 /* Frame or first field in a potentially complementary pair */
4049 assert(!s0->current_picture_ptr);
4050 s0->first_field = FIELD_PICTURE;
4053 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4054 s0->first_field = 0;
4061 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4063 assert(s->mb_num == s->mb_width * s->mb_height);
4064 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4065 first_mb_in_slice >= s->mb_num){
4066 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4069 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4070 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4071 if (s->picture_structure == PICT_BOTTOM_FIELD)
4072 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4073 assert(s->mb_y < s->mb_height);
4075 if(s->picture_structure==PICT_FRAME){
4076 h->curr_pic_num= h->frame_num;
4077 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4079 h->curr_pic_num= 2*h->frame_num + 1;
4080 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4083 if(h->nal_unit_type == NAL_IDR_SLICE){
4084 get_ue_golomb(&s->gb); /* idr_pic_id */
4087 if(h->sps.poc_type==0){
4088 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4090 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4091 h->delta_poc_bottom= get_se_golomb(&s->gb);
4095 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4096 h->delta_poc[0]= get_se_golomb(&s->gb);
4098 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4099 h->delta_poc[1]= get_se_golomb(&s->gb);
4104 if(h->pps.redundant_pic_cnt_present){
4105 h->redundant_pic_count= get_ue_golomb(&s->gb);
4108 //set defaults, might be overriden a few line later
4109 h->ref_count[0]= h->pps.ref_count[0];
4110 h->ref_count[1]= h->pps.ref_count[1];
4112 if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
4113 if(h->slice_type == FF_B_TYPE){
4114 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4115 if(FIELD_PICTURE && h->direct_spatial_mv_pred)
4116 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF + spatial direct mode is not implemented\n");
4118 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4120 if(num_ref_idx_active_override_flag){
4121 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4122 if(h->slice_type==FF_B_TYPE)
4123 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4125 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4126 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4127 h->ref_count[0]= h->ref_count[1]= 1;
4131 if(h->slice_type == FF_B_TYPE)
4138 if(!default_ref_list_done){
4139 fill_default_ref_list(h);
4142 if(decode_ref_pic_list_reordering(h) < 0)
4145 if( (h->pps.weighted_pred && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
4146 || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
4147 pred_weight_table(h);
4148 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
4149 implicit_weight_table(h);
4154 decode_ref_pic_marking(h0, &s->gb);
4157 fill_mbaff_ref_list(h);
4159 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
4160 tmp = get_ue_golomb(&s->gb);
4162 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4165 h->cabac_init_idc= tmp;
4168 h->last_qscale_diff = 0;
4169 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4171 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4175 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4176 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4177 //FIXME qscale / qp ... stuff
4178 if(h->slice_type == FF_SP_TYPE){
4179 get_bits1(&s->gb); /* sp_for_switch_flag */
4181 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4182 get_se_golomb(&s->gb); /* slice_qs_delta */
4185 h->deblocking_filter = 1;
4186 h->slice_alpha_c0_offset = 0;
4187 h->slice_beta_offset = 0;
4188 if( h->pps.deblocking_filter_parameters_present ) {
4189 tmp= get_ue_golomb(&s->gb);
4191 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4194 h->deblocking_filter= tmp;
4195 if(h->deblocking_filter < 2)
4196 h->deblocking_filter^= 1; // 1<->0
4198 if( h->deblocking_filter ) {
4199 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4200 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4204 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4205 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
4206 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
4207 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4208 h->deblocking_filter= 0;
4210 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4211 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4212 /* Cheat slightly for speed:
4213 Do not bother to deblock across slices. */
4214 h->deblocking_filter = 2;
4216 h0->max_contexts = 1;
4217 if(!h0->single_decode_warning) {
4218 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4219 h0->single_decode_warning = 1;
4222 return 1; // deblocking switched inside frame
4227 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4228 slice_group_change_cycle= get_bits(&s->gb, ?);
4231 h0->last_slice_type = slice_type;
4232 h->slice_num = ++h0->current_slice;
4234 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4235 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4237 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4238 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4240 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4242 av_get_pict_type_char(h->slice_type),
4243 pps_id, h->frame_num,
4244 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4245 h->ref_count[0], h->ref_count[1],
4247 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4249 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4250 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4260 static inline int get_level_prefix(GetBitContext *gb){
4264 OPEN_READER(re, gb);
4265 UPDATE_CACHE(re, gb);
4266 buf=GET_CACHE(re, gb);
4268 log= 32 - av_log2(buf);
4270 print_bin(buf>>(32-log), log);
4271 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4274 LAST_SKIP_BITS(re, gb, log);
4275 CLOSE_READER(re, gb);
4280 static inline int get_dct8x8_allowed(H264Context *h){
4283 if(!IS_SUB_8X8(h->sub_mb_type[i])
4284 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4291 * decodes a residual block.
4292 * @param n block index
4293 * @param scantable scantable
4294 * @param max_coeff number of coefficients in the block
4295 * @return <0 if an error occurred
4297 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4298 MpegEncContext * const s = &h->s;
4299 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4301 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4303 //FIXME put trailing_onex into the context
4305 if(n == CHROMA_DC_BLOCK_INDEX){
4306 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4307 total_coeff= coeff_token>>2;
4309 if(n == LUMA_DC_BLOCK_INDEX){
4310 total_coeff= pred_non_zero_count(h, 0);
4311 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4312 total_coeff= coeff_token>>2;
4314 total_coeff= pred_non_zero_count(h, n);
4315 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4316 total_coeff= coeff_token>>2;
4317 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4321 //FIXME set last_non_zero?
4325 if(total_coeff > (unsigned)max_coeff) {
4326 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4330 trailing_ones= coeff_token&3;
4331 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4332 assert(total_coeff<=16);
4334 for(i=0; i<trailing_ones; i++){
4335 level[i]= 1 - 2*get_bits1(gb);
4339 int level_code, mask;
4340 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4341 int prefix= get_level_prefix(gb);
4343 //first coefficient has suffix_length equal to 0 or 1
4344 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4346 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4348 level_code= (prefix<<suffix_length); //part
4349 }else if(prefix==14){
4351 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4353 level_code= prefix + get_bits(gb, 4); //part
4355 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4356 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4358 level_code += (1<<(prefix-3))-4096;
4361 if(trailing_ones < 3) level_code += 2;
4366 mask= -(level_code&1);
4367 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4370 //remaining coefficients have suffix_length > 0
4371 for(;i<total_coeff;i++) {
4372 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4373 prefix = get_level_prefix(gb);
4375 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4377 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4379 level_code += (1<<(prefix-3))-4096;
4381 mask= -(level_code&1);
4382 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4383 if(level_code > suffix_limit[suffix_length])
4388 if(total_coeff == max_coeff)
4391 if(n == CHROMA_DC_BLOCK_INDEX)
4392 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4394 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4397 coeff_num = zeros_left + total_coeff - 1;
4398 j = scantable[coeff_num];
4400 block[j] = level[0];
4401 for(i=1;i<total_coeff;i++) {
4404 else if(zeros_left < 7){
4405 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4407 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4409 zeros_left -= run_before;
4410 coeff_num -= 1 + run_before;
4411 j= scantable[ coeff_num ];
4416 block[j] = (level[0] * qmul[j] + 32)>>6;
4417 for(i=1;i<total_coeff;i++) {
4420 else if(zeros_left < 7){
4421 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4423 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4425 zeros_left -= run_before;
4426 coeff_num -= 1 + run_before;
4427 j= scantable[ coeff_num ];
4429 block[j]= (level[i] * qmul[j] + 32)>>6;
4434 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4441 static void predict_field_decoding_flag(H264Context *h){
4442 MpegEncContext * const s = &h->s;
4443 const int mb_xy= h->mb_xy;
4444 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4445 ? s->current_picture.mb_type[mb_xy-1]
4446 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4447 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4449 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4453 * decodes a P_SKIP or B_SKIP macroblock
4455 static void decode_mb_skip(H264Context *h){
4456 MpegEncContext * const s = &h->s;
4457 const int mb_xy= h->mb_xy;
4460 memset(h->non_zero_count[mb_xy], 0, 16);
4461 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4464 mb_type|= MB_TYPE_INTERLACED;
4466 if( h->slice_type == FF_B_TYPE )
4468 // just for fill_caches. pred_direct_motion will set the real mb_type
4469 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4471 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4472 pred_direct_motion(h, &mb_type);
4473 mb_type|= MB_TYPE_SKIP;
4478 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4480 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4481 pred_pskip_motion(h, &mx, &my);
4482 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4483 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4486 write_back_motion(h, mb_type);
4487 s->current_picture.mb_type[mb_xy]= mb_type;
4488 s->current_picture.qscale_table[mb_xy]= s->qscale;
4489 h->slice_table[ mb_xy ]= h->slice_num;
4490 h->prev_mb_skipped= 1;
4494 * decodes a macroblock
4495 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4497 static int decode_mb_cavlc(H264Context *h){
4498 MpegEncContext * const s = &h->s;
4500 int partition_count;
4501 unsigned int mb_type, cbp;
4502 int dct8x8_allowed= h->pps.transform_8x8_mode;
4504 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4506 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4508 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4509 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4511 if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
4512 if(s->mb_skip_run==-1)
4513 s->mb_skip_run= get_ue_golomb(&s->gb);
4515 if (s->mb_skip_run--) {
4516 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4517 if(s->mb_skip_run==0)
4518 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4520 predict_field_decoding_flag(h);
4527 if( (s->mb_y&1) == 0 )
4528 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4530 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4532 h->prev_mb_skipped= 0;
4534 mb_type= get_ue_golomb(&s->gb);
4535 if(h->slice_type == FF_B_TYPE){
4537 partition_count= b_mb_type_info[mb_type].partition_count;
4538 mb_type= b_mb_type_info[mb_type].type;
4541 goto decode_intra_mb;
4543 }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
4545 partition_count= p_mb_type_info[mb_type].partition_count;
4546 mb_type= p_mb_type_info[mb_type].type;
4549 goto decode_intra_mb;
4552 assert(h->slice_type == FF_I_TYPE);
4555 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4559 cbp= i_mb_type_info[mb_type].cbp;
4560 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4561 mb_type= i_mb_type_info[mb_type].type;
4565 mb_type |= MB_TYPE_INTERLACED;
4567 h->slice_table[ mb_xy ]= h->slice_num;
4569 if(IS_INTRA_PCM(mb_type)){
4572 // We assume these blocks are very rare so we do not optimize it.
4573 align_get_bits(&s->gb);
4575 // The pixels are stored in the same order as levels in h->mb array.
4576 for(y=0; y<16; y++){
4577 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4578 for(x=0; x<16; x++){
4579 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4580 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4584 const int index= 256 + 4*(y&3) + 32*(y>>2);
4586 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4587 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4591 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4593 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4594 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4598 // In deblocking, the quantizer is 0
4599 s->current_picture.qscale_table[mb_xy]= 0;
4600 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4601 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4602 // All coeffs are present
4603 memset(h->non_zero_count[mb_xy], 16, 16);
4605 s->current_picture.mb_type[mb_xy]= mb_type;
4610 h->ref_count[0] <<= 1;
4611 h->ref_count[1] <<= 1;
4614 fill_caches(h, mb_type, 0);
4617 if(IS_INTRA(mb_type)){
4619 // init_top_left_availability(h);
4620 if(IS_INTRA4x4(mb_type)){
4623 if(dct8x8_allowed && get_bits1(&s->gb)){
4624 mb_type |= MB_TYPE_8x8DCT;
4628 // fill_intra4x4_pred_table(h);
4629 for(i=0; i<16; i+=di){
4630 int mode= pred_intra_mode(h, i);
4632 if(!get_bits1(&s->gb)){
4633 const int rem_mode= get_bits(&s->gb, 3);
4634 mode = rem_mode + (rem_mode >= mode);
4638 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4640 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4642 write_back_intra_pred_mode(h);
4643 if( check_intra4x4_pred_mode(h) < 0)
4646 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4647 if(h->intra16x16_pred_mode < 0)
4651 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4654 h->chroma_pred_mode= pred_mode;
4655 }else if(partition_count==4){
4656 int i, j, sub_partition_count[4], list, ref[2][4];
4658 if(h->slice_type == FF_B_TYPE){
4660 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4661 if(h->sub_mb_type[i] >=13){
4662 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4665 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4666 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4668 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4669 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4670 pred_direct_motion(h, &mb_type);
4671 h->ref_cache[0][scan8[4]] =
4672 h->ref_cache[1][scan8[4]] =
4673 h->ref_cache[0][scan8[12]] =
4674 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4677 assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
4679 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4680 if(h->sub_mb_type[i] >=4){
4681 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4684 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4685 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4689 for(list=0; list<h->list_count; list++){
4690 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4692 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4693 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4694 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4696 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4708 dct8x8_allowed = get_dct8x8_allowed(h);
4710 for(list=0; list<h->list_count; list++){
4712 if(IS_DIRECT(h->sub_mb_type[i])) {
4713 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4716 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4717 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4719 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4720 const int sub_mb_type= h->sub_mb_type[i];
4721 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4722 for(j=0; j<sub_partition_count[i]; j++){
4724 const int index= 4*i + block_width*j;
4725 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4726 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4727 mx += get_se_golomb(&s->gb);
4728 my += get_se_golomb(&s->gb);
4729 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4731 if(IS_SUB_8X8(sub_mb_type)){
4733 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4735 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4736 }else if(IS_SUB_8X4(sub_mb_type)){
4737 mv_cache[ 1 ][0]= mx;
4738 mv_cache[ 1 ][1]= my;
4739 }else if(IS_SUB_4X8(sub_mb_type)){
4740 mv_cache[ 8 ][0]= mx;
4741 mv_cache[ 8 ][1]= my;
4743 mv_cache[ 0 ][0]= mx;
4744 mv_cache[ 0 ][1]= my;
4747 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4753 }else if(IS_DIRECT(mb_type)){
4754 pred_direct_motion(h, &mb_type);
4755 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4757 int list, mx, my, i;
4758 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4759 if(IS_16X16(mb_type)){
4760 for(list=0; list<h->list_count; list++){
4762 if(IS_DIR(mb_type, 0, list)){
4763 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4764 if(val >= h->ref_count[list]){
4765 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4769 val= LIST_NOT_USED&0xFF;
4770 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4772 for(list=0; list<h->list_count; list++){
4774 if(IS_DIR(mb_type, 0, list)){
4775 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4776 mx += get_se_golomb(&s->gb);
4777 my += get_se_golomb(&s->gb);
4778 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4780 val= pack16to32(mx,my);
4783 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4786 else if(IS_16X8(mb_type)){
4787 for(list=0; list<h->list_count; list++){
4790 if(IS_DIR(mb_type, i, list)){
4791 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4792 if(val >= h->ref_count[list]){
4793 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4797 val= LIST_NOT_USED&0xFF;
4798 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4801 for(list=0; list<h->list_count; list++){
4804 if(IS_DIR(mb_type, i, list)){
4805 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4806 mx += get_se_golomb(&s->gb);
4807 my += get_se_golomb(&s->gb);
4808 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4810 val= pack16to32(mx,my);
4813 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4817 assert(IS_8X16(mb_type));
4818 for(list=0; list<h->list_count; list++){
4821 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4822 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4823 if(val >= h->ref_count[list]){
4824 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4828 val= LIST_NOT_USED&0xFF;
4829 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4832 for(list=0; list<h->list_count; list++){
4835 if(IS_DIR(mb_type, i, list)){
4836 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4837 mx += get_se_golomb(&s->gb);
4838 my += get_se_golomb(&s->gb);
4839 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4841 val= pack16to32(mx,my);
4844 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4850 if(IS_INTER(mb_type))
4851 write_back_motion(h, mb_type);
4853 if(!IS_INTRA16x16(mb_type)){
4854 cbp= get_ue_golomb(&s->gb);
4856 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4860 if(IS_INTRA4x4(mb_type))
4861 cbp= golomb_to_intra4x4_cbp[cbp];
4863 cbp= golomb_to_inter_cbp[cbp];
4867 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4868 if(get_bits1(&s->gb))
4869 mb_type |= MB_TYPE_8x8DCT;
4871 s->current_picture.mb_type[mb_xy]= mb_type;
4873 if(cbp || IS_INTRA16x16(mb_type)){
4874 int i8x8, i4x4, chroma_idx;
4876 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4877 const uint8_t *scan, *scan8x8, *dc_scan;
4879 // fill_non_zero_count_cache(h);
4881 if(IS_INTERLACED(mb_type)){
4882 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4883 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4884 dc_scan= luma_dc_field_scan;
4886 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4887 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4888 dc_scan= luma_dc_zigzag_scan;
4891 dquant= get_se_golomb(&s->gb);
4893 if( dquant > 25 || dquant < -26 ){
4894 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4898 s->qscale += dquant;
4899 if(((unsigned)s->qscale) > 51){
4900 if(s->qscale<0) s->qscale+= 52;
4901 else s->qscale-= 52;
4904 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4905 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4906 if(IS_INTRA16x16(mb_type)){
4907 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4908 return -1; //FIXME continue if partitioned and other return -1 too
4911 assert((cbp&15) == 0 || (cbp&15) == 15);
4914 for(i8x8=0; i8x8<4; i8x8++){
4915 for(i4x4=0; i4x4<4; i4x4++){
4916 const int index= i4x4 + 4*i8x8;
4917 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4923 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4926 for(i8x8=0; i8x8<4; i8x8++){
4927 if(cbp & (1<<i8x8)){
4928 if(IS_8x8DCT(mb_type)){
4929 DCTELEM *buf = &h->mb[64*i8x8];
4931 for(i4x4=0; i4x4<4; i4x4++){
4932 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4933 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4936 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4937 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4939 for(i4x4=0; i4x4<4; i4x4++){
4940 const int index= i4x4 + 4*i8x8;
4942 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4948 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4949 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4955 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4956 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4962 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4963 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4964 for(i4x4=0; i4x4<4; i4x4++){
4965 const int index= 16 + 4*chroma_idx + i4x4;
4966 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4972 uint8_t * const nnz= &h->non_zero_count_cache[0];
4973 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4974 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4977 uint8_t * const nnz= &h->non_zero_count_cache[0];
4978 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4979 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4980 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4982 s->current_picture.qscale_table[mb_xy]= s->qscale;
4983 write_back_non_zero_count(h);
4986 h->ref_count[0] >>= 1;
4987 h->ref_count[1] >>= 1;
4993 static int decode_cabac_field_decoding_flag(H264Context *h) {
4994 MpegEncContext * const s = &h->s;
4995 const int mb_x = s->mb_x;
4996 const int mb_y = s->mb_y & ~1;
4997 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4998 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5000 unsigned int ctx = 0;
5002 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5005 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5009 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5012 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5013 uint8_t *state= &h->cabac_state[ctx_base];
5017 MpegEncContext * const s = &h->s;
5018 const int mba_xy = h->left_mb_xy[0];
5019 const int mbb_xy = h->top_mb_xy;
5021 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5023 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5025 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5026 return 0; /* I4x4 */
5029 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5030 return 0; /* I4x4 */
5033 if( get_cabac_terminate( &h->cabac ) )
5034 return 25; /* PCM */
5036 mb_type = 1; /* I16x16 */
5037 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5038 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5039 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5040 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5041 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5045 static int decode_cabac_mb_type( H264Context *h ) {
5046 MpegEncContext * const s = &h->s;
5048 if( h->slice_type == FF_I_TYPE ) {
5049 return decode_cabac_intra_mb_type(h, 3, 1);
5050 } else if( h->slice_type == FF_P_TYPE ) {
5051 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5053 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5054 /* P_L0_D16x16, P_8x8 */
5055 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5057 /* P_L0_D8x16, P_L0_D16x8 */
5058 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5061 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5063 } else if( h->slice_type == FF_B_TYPE ) {
5064 const int mba_xy = h->left_mb_xy[0];
5065 const int mbb_xy = h->top_mb_xy;
5069 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5071 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5074 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5075 return 0; /* B_Direct_16x16 */
5077 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5078 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5081 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5082 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5083 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5084 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5086 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5087 else if( bits == 13 ) {
5088 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5089 } else if( bits == 14 )
5090 return 11; /* B_L1_L0_8x16 */
5091 else if( bits == 15 )
5092 return 22; /* B_8x8 */
5094 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5095 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5097 /* TODO SI/SP frames? */
5102 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5103 MpegEncContext * const s = &h->s;
5107 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5108 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5111 && h->slice_table[mba_xy] == h->slice_num
5112 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5113 mba_xy += s->mb_stride;
5115 mbb_xy = mb_xy - s->mb_stride;
5117 && h->slice_table[mbb_xy] == h->slice_num
5118 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5119 mbb_xy -= s->mb_stride;
5121 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5123 int mb_xy = h->mb_xy;
5125 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5128 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5130 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5133 if( h->slice_type == FF_B_TYPE )
5135 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5138 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5141 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5144 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5145 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5146 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5148 if( mode >= pred_mode )
5154 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5155 const int mba_xy = h->left_mb_xy[0];
5156 const int mbb_xy = h->top_mb_xy;
5160 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5161 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5164 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5167 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5170 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5172 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5178 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5179 int cbp_b, cbp_a, ctx, cbp = 0;
5181 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5182 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5184 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5185 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5186 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5187 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5188 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5189 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5190 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5191 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5194 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5198 cbp_a = (h->left_cbp>>4)&0x03;
5199 cbp_b = (h-> top_cbp>>4)&0x03;
5202 if( cbp_a > 0 ) ctx++;
5203 if( cbp_b > 0 ) ctx += 2;
5204 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5208 if( cbp_a == 2 ) ctx++;
5209 if( cbp_b == 2 ) ctx += 2;
5210 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5212 static int decode_cabac_mb_dqp( H264Context *h) {
5216 if( h->last_qscale_diff != 0 )
5219 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5225 if(val > 102) //prevent infinite loop
5232 return -(val + 1)/2;
5234 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5235 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5237 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5239 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5243 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5245 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5246 return 0; /* B_Direct_8x8 */
5247 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5248 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5250 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5251 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5252 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5255 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5256 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5260 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5261 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5264 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5265 int refa = h->ref_cache[list][scan8[n] - 1];
5266 int refb = h->ref_cache[list][scan8[n] - 8];
5270 if( h->slice_type == FF_B_TYPE) {
5271 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5273 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5282 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5288 if(ref >= 32 /*h->ref_list[list]*/){
5289 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5290 return 0; //FIXME we should return -1 and check the return everywhere
5296 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5297 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5298 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5299 int ctxbase = (l == 0) ? 40 : 47;
5304 else if( amvd > 32 )
5309 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5314 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5322 while( get_cabac_bypass( &h->cabac ) ) {
5326 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5331 if( get_cabac_bypass( &h->cabac ) )
5335 return get_cabac_bypass_sign( &h->cabac, -mvd );
5338 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5344 nza = h->left_cbp&0x100;
5345 nzb = h-> top_cbp&0x100;
5347 nza = (h->left_cbp>>(6+idx))&0x01;
5348 nzb = (h-> top_cbp>>(6+idx))&0x01;
5352 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5353 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5355 assert(cat == 1 || cat == 2);
5356 nza = h->non_zero_count_cache[scan8[idx] - 1];
5357 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5367 return ctx + 4 * cat;
5370 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5371 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5372 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5373 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5374 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5377 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5378 static const int significant_coeff_flag_offset[2][6] = {
5379 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5380 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5382 static const int last_coeff_flag_offset[2][6] = {
5383 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5384 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5386 static const int coeff_abs_level_m1_offset[6] = {
5387 227+0, 227+10, 227+20, 227+30, 227+39, 426
5389 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5390 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5391 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5392 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5393 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5394 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5395 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5396 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5397 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5399 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5400 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5401 * map node ctx => cabac ctx for level=1 */
5402 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5403 /* map node ctx => cabac ctx for level>1 */
5404 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5405 static const uint8_t coeff_abs_level_transition[2][8] = {
5406 /* update node ctx after decoding a level=1 */
5407 { 1, 2, 3, 3, 4, 5, 6, 7 },
5408 /* update node ctx after decoding a level>1 */
5409 { 4, 4, 4, 4, 5, 6, 7, 7 }
5415 int coeff_count = 0;
5418 uint8_t *significant_coeff_ctx_base;
5419 uint8_t *last_coeff_ctx_base;
5420 uint8_t *abs_level_m1_ctx_base;
5423 #define CABAC_ON_STACK
5425 #ifdef CABAC_ON_STACK
5428 cc.range = h->cabac.range;
5429 cc.low = h->cabac.low;
5430 cc.bytestream= h->cabac.bytestream;
5432 #define CC &h->cabac
5436 /* cat: 0-> DC 16x16 n = 0
5437 * 1-> AC 16x16 n = luma4x4idx
5438 * 2-> Luma4x4 n = luma4x4idx
5439 * 3-> DC Chroma n = iCbCr
5440 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5441 * 5-> Luma8x8 n = 4 * luma8x8idx
5444 /* read coded block flag */
5445 if( is_dc || cat != 5 ) {
5446 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5449 h->non_zero_count_cache[scan8[16+n]] = 0;
5451 h->non_zero_count_cache[scan8[n]] = 0;
5454 #ifdef CABAC_ON_STACK
5455 h->cabac.range = cc.range ;
5456 h->cabac.low = cc.low ;
5457 h->cabac.bytestream= cc.bytestream;
5463 significant_coeff_ctx_base = h->cabac_state
5464 + significant_coeff_flag_offset[MB_FIELD][cat];
5465 last_coeff_ctx_base = h->cabac_state
5466 + last_coeff_flag_offset[MB_FIELD][cat];
5467 abs_level_m1_ctx_base = h->cabac_state
5468 + coeff_abs_level_m1_offset[cat];
5470 if( !is_dc && cat == 5 ) {
5471 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5472 for(last= 0; last < coefs; last++) { \
5473 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5474 if( get_cabac( CC, sig_ctx )) { \
5475 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5476 index[coeff_count++] = last; \
5477 if( get_cabac( CC, last_ctx ) ) { \
5483 if( last == max_coeff -1 ) {\
5484 index[coeff_count++] = last;\
5486 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5487 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5488 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5490 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5492 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5494 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5497 assert(coeff_count > 0);
5501 h->cbp_table[h->mb_xy] |= 0x100;
5503 h->cbp_table[h->mb_xy] |= 0x40 << n;
5506 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5508 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5510 assert( cat == 1 || cat == 2 );
5511 h->non_zero_count_cache[scan8[n]] = coeff_count;
5515 while( coeff_count-- ) {
5516 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5518 int j= scantable[index[coeff_count]];
5520 if( get_cabac( CC, ctx ) == 0 ) {
5521 node_ctx = coeff_abs_level_transition[0][node_ctx];
5523 block[j] = get_cabac_bypass_sign( CC, -1);
5525 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5529 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5530 node_ctx = coeff_abs_level_transition[1][node_ctx];
5532 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5536 if( coeff_abs >= 15 ) {
5538 while( get_cabac_bypass( CC ) ) {
5544 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5550 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5552 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5556 #ifdef CABAC_ON_STACK
5557 h->cabac.range = cc.range ;
5558 h->cabac.low = cc.low ;
5559 h->cabac.bytestream= cc.bytestream;
5564 #ifndef CONFIG_SMALL
5565 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5566 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5569 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5570 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5574 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5576 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5578 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5579 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5583 static inline void compute_mb_neighbors(H264Context *h)
5585 MpegEncContext * const s = &h->s;
5586 const int mb_xy = h->mb_xy;
5587 h->top_mb_xy = mb_xy - s->mb_stride;
5588 h->left_mb_xy[0] = mb_xy - 1;
5590 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5591 const int top_pair_xy = pair_xy - s->mb_stride;
5592 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5593 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5594 const int curr_mb_frame_flag = !MB_FIELD;
5595 const int bottom = (s->mb_y & 1);
5597 ? !curr_mb_frame_flag // bottom macroblock
5598 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5600 h->top_mb_xy -= s->mb_stride;
5602 if (left_mb_frame_flag != curr_mb_frame_flag) {
5603 h->left_mb_xy[0] = pair_xy - 1;
5605 } else if (FIELD_PICTURE) {
5606 h->top_mb_xy -= s->mb_stride;
5612 * decodes a macroblock
5613 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5615 static int decode_mb_cabac(H264Context *h) {
5616 MpegEncContext * const s = &h->s;
5618 int mb_type, partition_count, cbp = 0;
5619 int dct8x8_allowed= h->pps.transform_8x8_mode;
5621 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5623 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5625 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5626 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
5628 /* a skipped mb needs the aff flag from the following mb */
5629 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5630 predict_field_decoding_flag(h);
5631 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5632 skip = h->next_mb_skipped;
5634 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5635 /* read skip flags */
5637 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5638 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5639 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5640 if(h->next_mb_skipped)
5641 predict_field_decoding_flag(h);
5643 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5648 h->cbp_table[mb_xy] = 0;
5649 h->chroma_pred_mode_table[mb_xy] = 0;
5650 h->last_qscale_diff = 0;
5657 if( (s->mb_y&1) == 0 )
5659 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5661 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5663 h->prev_mb_skipped = 0;
5665 compute_mb_neighbors(h);
5666 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5667 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5671 if( h->slice_type == FF_B_TYPE ) {
5673 partition_count= b_mb_type_info[mb_type].partition_count;
5674 mb_type= b_mb_type_info[mb_type].type;
5677 goto decode_intra_mb;
5679 } else if( h->slice_type == FF_P_TYPE ) {
5681 partition_count= p_mb_type_info[mb_type].partition_count;
5682 mb_type= p_mb_type_info[mb_type].type;
5685 goto decode_intra_mb;
5688 assert(h->slice_type == FF_I_TYPE);
5690 partition_count = 0;
5691 cbp= i_mb_type_info[mb_type].cbp;
5692 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5693 mb_type= i_mb_type_info[mb_type].type;
5696 mb_type |= MB_TYPE_INTERLACED;
5698 h->slice_table[ mb_xy ]= h->slice_num;
5700 if(IS_INTRA_PCM(mb_type)) {
5704 // We assume these blocks are very rare so we do not optimize it.
5705 // FIXME The two following lines get the bitstream position in the cabac
5706 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5707 ptr= h->cabac.bytestream;
5708 if(h->cabac.low&0x1) ptr--;
5710 if(h->cabac.low&0x1FF) ptr--;
5713 // The pixels are stored in the same order as levels in h->mb array.
5714 for(y=0; y<16; y++){
5715 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5716 for(x=0; x<16; x++){
5717 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5718 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5722 const int index= 256 + 4*(y&3) + 32*(y>>2);
5724 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5725 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5729 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5731 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5732 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5736 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5738 // All blocks are present
5739 h->cbp_table[mb_xy] = 0x1ef;
5740 h->chroma_pred_mode_table[mb_xy] = 0;
5741 // In deblocking, the quantizer is 0
5742 s->current_picture.qscale_table[mb_xy]= 0;
5743 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5744 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5745 // All coeffs are present
5746 memset(h->non_zero_count[mb_xy], 16, 16);
5747 s->current_picture.mb_type[mb_xy]= mb_type;
5748 h->last_qscale_diff = 0;
5753 h->ref_count[0] <<= 1;
5754 h->ref_count[1] <<= 1;
5757 fill_caches(h, mb_type, 0);
5759 if( IS_INTRA( mb_type ) ) {
5761 if( IS_INTRA4x4( mb_type ) ) {
5762 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5763 mb_type |= MB_TYPE_8x8DCT;
5764 for( i = 0; i < 16; i+=4 ) {
5765 int pred = pred_intra_mode( h, i );
5766 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5767 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5770 for( i = 0; i < 16; i++ ) {
5771 int pred = pred_intra_mode( h, i );
5772 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5774 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5777 write_back_intra_pred_mode(h);
5778 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5780 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5781 if( h->intra16x16_pred_mode < 0 ) return -1;
5783 h->chroma_pred_mode_table[mb_xy] =
5784 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5786 pred_mode= check_intra_pred_mode( h, pred_mode );
5787 if( pred_mode < 0 ) return -1;
5788 h->chroma_pred_mode= pred_mode;
5789 } else if( partition_count == 4 ) {
5790 int i, j, sub_partition_count[4], list, ref[2][4];
5792 if( h->slice_type == FF_B_TYPE ) {
5793 for( i = 0; i < 4; i++ ) {
5794 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5795 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5796 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5798 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5799 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5800 pred_direct_motion(h, &mb_type);
5801 h->ref_cache[0][scan8[4]] =
5802 h->ref_cache[1][scan8[4]] =
5803 h->ref_cache[0][scan8[12]] =
5804 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5805 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5806 for( i = 0; i < 4; i++ )
5807 if( IS_DIRECT(h->sub_mb_type[i]) )
5808 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5812 for( i = 0; i < 4; i++ ) {
5813 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5814 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5815 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5819 for( list = 0; list < h->list_count; list++ ) {
5820 for( i = 0; i < 4; i++ ) {
5821 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5822 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5823 if( h->ref_count[list] > 1 )
5824 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5830 h->ref_cache[list][ scan8[4*i]+1 ]=
5831 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5836 dct8x8_allowed = get_dct8x8_allowed(h);
5838 for(list=0; list<h->list_count; list++){
5840 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5841 if(IS_DIRECT(h->sub_mb_type[i])){
5842 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5846 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5847 const int sub_mb_type= h->sub_mb_type[i];
5848 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5849 for(j=0; j<sub_partition_count[i]; j++){
5852 const int index= 4*i + block_width*j;
5853 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5854 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5855 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5857 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5858 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5859 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5861 if(IS_SUB_8X8(sub_mb_type)){
5863 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5865 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5868 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5870 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5871 }else if(IS_SUB_8X4(sub_mb_type)){
5872 mv_cache[ 1 ][0]= mx;
5873 mv_cache[ 1 ][1]= my;
5875 mvd_cache[ 1 ][0]= mx - mpx;
5876 mvd_cache[ 1 ][1]= my - mpy;
5877 }else if(IS_SUB_4X8(sub_mb_type)){
5878 mv_cache[ 8 ][0]= mx;
5879 mv_cache[ 8 ][1]= my;
5881 mvd_cache[ 8 ][0]= mx - mpx;
5882 mvd_cache[ 8 ][1]= my - mpy;
5884 mv_cache[ 0 ][0]= mx;
5885 mv_cache[ 0 ][1]= my;
5887 mvd_cache[ 0 ][0]= mx - mpx;
5888 mvd_cache[ 0 ][1]= my - mpy;
5891 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5892 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5893 p[0] = p[1] = p[8] = p[9] = 0;
5894 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5898 } else if( IS_DIRECT(mb_type) ) {
5899 pred_direct_motion(h, &mb_type);
5900 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5901 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5902 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5904 int list, mx, my, i, mpx, mpy;
5905 if(IS_16X16(mb_type)){
5906 for(list=0; list<h->list_count; list++){
5907 if(IS_DIR(mb_type, 0, list)){
5908 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5909 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5911 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5913 for(list=0; list<h->list_count; list++){
5914 if(IS_DIR(mb_type, 0, list)){
5915 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5917 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5918 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5919 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5921 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5922 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5924 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5927 else if(IS_16X8(mb_type)){
5928 for(list=0; list<h->list_count; list++){
5930 if(IS_DIR(mb_type, i, list)){
5931 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5932 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5934 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5937 for(list=0; list<h->list_count; list++){
5939 if(IS_DIR(mb_type, i, list)){
5940 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5941 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5942 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5943 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5945 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5946 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5948 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5949 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5954 assert(IS_8X16(mb_type));
5955 for(list=0; list<h->list_count; list++){
5957 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5958 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5959 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5961 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5964 for(list=0; list<h->list_count; list++){
5966 if(IS_DIR(mb_type, i, list)){
5967 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5968 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5969 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5971 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5972 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5973 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5975 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5976 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5983 if( IS_INTER( mb_type ) ) {
5984 h->chroma_pred_mode_table[mb_xy] = 0;
5985 write_back_motion( h, mb_type );
5988 if( !IS_INTRA16x16( mb_type ) ) {
5989 cbp = decode_cabac_mb_cbp_luma( h );
5990 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5993 h->cbp_table[mb_xy] = h->cbp = cbp;
5995 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5996 if( decode_cabac_mb_transform_size( h ) )
5997 mb_type |= MB_TYPE_8x8DCT;
5999 s->current_picture.mb_type[mb_xy]= mb_type;
6001 if( cbp || IS_INTRA16x16( mb_type ) ) {
6002 const uint8_t *scan, *scan8x8, *dc_scan;
6003 const uint32_t *qmul;
6006 if(IS_INTERLACED(mb_type)){
6007 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6008 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6009 dc_scan= luma_dc_field_scan;
6011 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6012 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6013 dc_scan= luma_dc_zigzag_scan;
6016 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6017 if( dqp == INT_MIN ){
6018 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6022 if(((unsigned)s->qscale) > 51){
6023 if(s->qscale<0) s->qscale+= 52;
6024 else s->qscale-= 52;
6026 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6027 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6029 if( IS_INTRA16x16( mb_type ) ) {
6031 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6032 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6035 qmul = h->dequant4_coeff[0][s->qscale];
6036 for( i = 0; i < 16; i++ ) {
6037 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6038 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6041 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6045 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6046 if( cbp & (1<<i8x8) ) {
6047 if( IS_8x8DCT(mb_type) ) {
6048 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6049 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6051 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6052 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6053 const int index = 4*i8x8 + i4x4;
6054 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6056 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6057 //STOP_TIMER("decode_residual")
6061 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6062 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6069 for( c = 0; c < 2; c++ ) {
6070 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6071 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6077 for( c = 0; c < 2; c++ ) {
6078 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6079 for( i = 0; i < 4; i++ ) {
6080 const int index = 16 + 4 * c + i;
6081 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6082 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6086 uint8_t * const nnz= &h->non_zero_count_cache[0];
6087 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6088 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6091 uint8_t * const nnz= &h->non_zero_count_cache[0];
6092 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6093 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6094 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6095 h->last_qscale_diff = 0;
6098 s->current_picture.qscale_table[mb_xy]= s->qscale;
6099 write_back_non_zero_count(h);
6102 h->ref_count[0] >>= 1;
6103 h->ref_count[1] >>= 1;
6110 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6112 const int index_a = qp + h->slice_alpha_c0_offset;
6113 const int alpha = (alpha_table+52)[index_a];
6114 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6119 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6120 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6122 /* 16px edge length, because bS=4 is triggered by being at
6123 * the edge of an intra MB, so all 4 bS are the same */
6124 for( d = 0; d < 16; d++ ) {
6125 const int p0 = pix[-1];
6126 const int p1 = pix[-2];
6127 const int p2 = pix[-3];
6129 const int q0 = pix[0];
6130 const int q1 = pix[1];
6131 const int q2 = pix[2];
6133 if( FFABS( p0 - q0 ) < alpha &&
6134 FFABS( p1 - p0 ) < beta &&
6135 FFABS( q1 - q0 ) < beta ) {
6137 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6138 if( FFABS( p2 - p0 ) < beta)
6140 const int p3 = pix[-4];
6142 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6143 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6144 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6147 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6149 if( FFABS( q2 - q0 ) < beta)
6151 const int q3 = pix[3];
6153 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6154 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6155 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6158 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6162 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6163 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6165 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6171 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6173 const int index_a = qp + h->slice_alpha_c0_offset;
6174 const int alpha = (alpha_table+52)[index_a];
6175 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6180 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6181 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6183 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6187 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6189 for( i = 0; i < 16; i++, pix += stride) {
6195 int bS_index = (i >> 1);
6198 bS_index |= (i & 1);
6201 if( bS[bS_index] == 0 ) {
6205 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6206 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6207 alpha = (alpha_table+52)[index_a];
6208 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6210 if( bS[bS_index] < 4 ) {
6211 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6212 const int p0 = pix[-1];
6213 const int p1 = pix[-2];
6214 const int p2 = pix[-3];
6215 const int q0 = pix[0];
6216 const int q1 = pix[1];
6217 const int q2 = pix[2];
6219 if( FFABS( p0 - q0 ) < alpha &&
6220 FFABS( p1 - p0 ) < beta &&
6221 FFABS( q1 - q0 ) < beta ) {
6225 if( FFABS( p2 - p0 ) < beta ) {
6226 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6229 if( FFABS( q2 - q0 ) < beta ) {
6230 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6234 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6235 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6236 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6237 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6240 const int p0 = pix[-1];
6241 const int p1 = pix[-2];
6242 const int p2 = pix[-3];
6244 const int q0 = pix[0];
6245 const int q1 = pix[1];
6246 const int q2 = pix[2];
6248 if( FFABS( p0 - q0 ) < alpha &&
6249 FFABS( p1 - p0 ) < beta &&
6250 FFABS( q1 - q0 ) < beta ) {
6252 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6253 if( FFABS( p2 - p0 ) < beta)
6255 const int p3 = pix[-4];
6257 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6258 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6259 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6262 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6264 if( FFABS( q2 - q0 ) < beta)
6266 const int q3 = pix[3];
6268 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6269 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6270 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6273 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6277 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6278 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6280 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6285 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6287 for( i = 0; i < 8; i++, pix += stride) {
6295 if( bS[bS_index] == 0 ) {
6299 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6300 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6301 alpha = (alpha_table+52)[index_a];
6302 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6304 if( bS[bS_index] < 4 ) {
6305 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6306 const int p0 = pix[-1];
6307 const int p1 = pix[-2];
6308 const int q0 = pix[0];
6309 const int q1 = pix[1];
6311 if( FFABS( p0 - q0 ) < alpha &&
6312 FFABS( p1 - p0 ) < beta &&
6313 FFABS( q1 - q0 ) < beta ) {
6314 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6316 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6317 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6318 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6321 const int p0 = pix[-1];
6322 const int p1 = pix[-2];
6323 const int q0 = pix[0];
6324 const int q1 = pix[1];
6326 if( FFABS( p0 - q0 ) < alpha &&
6327 FFABS( p1 - p0 ) < beta &&
6328 FFABS( q1 - q0 ) < beta ) {
6330 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6331 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6332 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6338 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6340 const int index_a = qp + h->slice_alpha_c0_offset;
6341 const int alpha = (alpha_table+52)[index_a];
6342 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6343 const int pix_next = stride;
6348 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6349 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6351 /* 16px edge length, see filter_mb_edgev */
6352 for( d = 0; d < 16; d++ ) {
6353 const int p0 = pix[-1*pix_next];
6354 const int p1 = pix[-2*pix_next];
6355 const int p2 = pix[-3*pix_next];
6356 const int q0 = pix[0];
6357 const int q1 = pix[1*pix_next];
6358 const int q2 = pix[2*pix_next];
6360 if( FFABS( p0 - q0 ) < alpha &&
6361 FFABS( p1 - p0 ) < beta &&
6362 FFABS( q1 - q0 ) < beta ) {
6364 const int p3 = pix[-4*pix_next];
6365 const int q3 = pix[ 3*pix_next];
6367 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6368 if( FFABS( p2 - p0 ) < beta) {
6370 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6371 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6372 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6375 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6377 if( FFABS( q2 - q0 ) < beta) {
6379 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6380 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6381 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6384 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6388 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6389 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6391 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6398 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6400 const int index_a = qp + h->slice_alpha_c0_offset;
6401 const int alpha = (alpha_table+52)[index_a];
6402 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6407 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6408 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6410 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6414 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6415 MpegEncContext * const s = &h->s;
6416 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6418 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6422 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6423 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6424 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6425 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6428 assert(!FRAME_MBAFF);
6430 mb_type = s->current_picture.mb_type[mb_xy];
6431 qp = s->current_picture.qscale_table[mb_xy];
6432 qp0 = s->current_picture.qscale_table[mb_xy-1];
6433 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6434 qpc = get_chroma_qp( h, 0, qp );
6435 qpc0 = get_chroma_qp( h, 0, qp0 );
6436 qpc1 = get_chroma_qp( h, 0, qp1 );
6437 qp0 = (qp + qp0 + 1) >> 1;
6438 qp1 = (qp + qp1 + 1) >> 1;
6439 qpc0 = (qpc + qpc0 + 1) >> 1;
6440 qpc1 = (qpc + qpc1 + 1) >> 1;
6441 qp_thresh = 15 - h->slice_alpha_c0_offset;
6442 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6443 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6446 if( IS_INTRA(mb_type) ) {
6447 int16_t bS4[4] = {4,4,4,4};
6448 int16_t bS3[4] = {3,3,3,3};
6449 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6450 if( IS_8x8DCT(mb_type) ) {
6451 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6452 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6453 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6454 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6456 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6457 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6458 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6459 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6460 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6461 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6462 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6463 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6465 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6466 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6467 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6468 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6469 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6470 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6471 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6472 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6475 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6476 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6478 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6480 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6482 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6483 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6484 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6485 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6487 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6488 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6489 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6490 (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
6492 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6493 bSv[0][0] = 0x0004000400040004ULL;
6494 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6495 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6497 #define FILTER(hv,dir,edge)\
6498 if(bSv[dir][edge]) {\
6499 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6501 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6502 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6508 } else if( IS_8x8DCT(mb_type) ) {
6527 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6528 MpegEncContext * const s = &h->s;
6529 const int mb_xy= mb_x + mb_y*s->mb_stride;
6530 const int mb_type = s->current_picture.mb_type[mb_xy];
6531 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6532 int first_vertical_edge_done = 0;
6534 /* FIXME: A given frame may occupy more than one position in
6535 * the reference list. So ref2frm should be populated with
6536 * frame numbers, not indexes. */
6537 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6538 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6540 //for sufficiently low qp, filtering wouldn't do anything
6541 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6543 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6544 int qp = s->current_picture.qscale_table[mb_xy];
6546 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6547 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6553 // left mb is in picture
6554 && h->slice_table[mb_xy-1] != 255
6555 // and current and left pair do not have the same interlaced type
6556 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6557 // and left mb is in the same slice if deblocking_filter == 2
6558 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6559 /* First vertical edge is different in MBAFF frames
6560 * There are 8 different bS to compute and 2 different Qp
6562 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6563 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6568 int mb_qp, mbn0_qp, mbn1_qp;
6570 first_vertical_edge_done = 1;
6572 if( IS_INTRA(mb_type) )
6573 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6575 for( i = 0; i < 8; i++ ) {
6576 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6578 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6580 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6581 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6582 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6589 mb_qp = s->current_picture.qscale_table[mb_xy];
6590 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6591 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6592 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6593 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6594 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6595 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6596 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6597 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6598 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6599 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6600 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6601 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6604 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6605 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6606 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6607 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6608 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6610 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6611 for( dir = 0; dir < 2; dir++ )
6614 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6615 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6616 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6618 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6619 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6620 // how often to recheck mv-based bS when iterating between edges
6621 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6622 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6623 // how often to recheck mv-based bS when iterating along each edge
6624 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6626 if (first_vertical_edge_done) {
6628 first_vertical_edge_done = 0;
6631 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6634 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6635 && !IS_INTERLACED(mb_type)
6636 && IS_INTERLACED(mbm_type)
6638 // This is a special case in the norm where the filtering must
6639 // be done twice (one each of the field) even if we are in a
6640 // frame macroblock.
6642 static const int nnz_idx[4] = {4,5,6,3};
6643 unsigned int tmp_linesize = 2 * linesize;
6644 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6645 int mbn_xy = mb_xy - 2 * s->mb_stride;
6650 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6651 if( IS_INTRA(mb_type) ||
6652 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6653 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6655 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6656 for( i = 0; i < 4; i++ ) {
6657 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6658 mbn_nnz[nnz_idx[i]] != 0 )
6664 // Do not use s->qscale as luma quantizer because it has not the same
6665 // value in IPCM macroblocks.
6666 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6667 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6668 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6669 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6670 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6671 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6672 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6673 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6680 for( edge = start; edge < edges; edge++ ) {
6681 /* mbn_xy: neighbor macroblock */
6682 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6683 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6687 if( (edge&1) && IS_8x8DCT(mb_type) )
6690 if( IS_INTRA(mb_type) ||
6691 IS_INTRA(mbn_type) ) {
6694 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6695 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6704 bS[0] = bS[1] = bS[2] = bS[3] = value;
6709 if( edge & mask_edge ) {
6710 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6713 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6714 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6717 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6718 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6719 int bn_idx= b_idx - (dir ? 8:1);
6721 for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6722 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6723 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6724 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6726 bS[0] = bS[1] = bS[2] = bS[3] = v;
6732 for( i = 0; i < 4; i++ ) {
6733 int x = dir == 0 ? edge : i;
6734 int y = dir == 0 ? i : edge;
6735 int b_idx= 8 + 4 + x + 8*y;
6736 int bn_idx= b_idx - (dir ? 8:1);
6738 if( h->non_zero_count_cache[b_idx] != 0 ||
6739 h->non_zero_count_cache[bn_idx] != 0 ) {
6745 for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6746 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6747 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6748 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6756 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6761 // Do not use s->qscale as luma quantizer because it has not the same
6762 // value in IPCM macroblocks.
6763 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6764 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6765 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6766 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6768 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6769 if( (edge&1) == 0 ) {
6770 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6771 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6772 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6773 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6776 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6777 if( (edge&1) == 0 ) {
6778 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6779 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6780 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6781 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6788 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6789 MpegEncContext * const s = &h->s;
6790 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6794 if( h->pps.cabac ) {
6798 align_get_bits( &s->gb );
6801 ff_init_cabac_states( &h->cabac);
6802 ff_init_cabac_decoder( &h->cabac,
6803 s->gb.buffer + get_bits_count(&s->gb)/8,
6804 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6805 /* calculate pre-state */
6806 for( i= 0; i < 460; i++ ) {
6808 if( h->slice_type == FF_I_TYPE )
6809 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6811 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6814 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6816 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6821 int ret = decode_mb_cabac(h);
6823 //STOP_TIMER("decode_mb_cabac")
6825 if(ret>=0) hl_decode_mb(h);
6827 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6830 if(ret>=0) ret = decode_mb_cabac(h);
6832 if(ret>=0) hl_decode_mb(h);
6835 eos = get_cabac_terminate( &h->cabac );
6837 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6838 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6839 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6843 if( ++s->mb_x >= s->mb_width ) {
6845 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6847 if(FIELD_OR_MBAFF_PICTURE) {
6852 if( eos || s->mb_y >= s->mb_height ) {
6853 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6854 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6861 int ret = decode_mb_cavlc(h);
6863 if(ret>=0) hl_decode_mb(h);
6865 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6867 ret = decode_mb_cavlc(h);
6869 if(ret>=0) hl_decode_mb(h);
6874 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6875 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6880 if(++s->mb_x >= s->mb_width){
6882 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6884 if(FIELD_OR_MBAFF_PICTURE) {
6887 if(s->mb_y >= s->mb_height){
6888 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6890 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6891 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6895 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6902 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6903 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6904 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6905 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6909 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6918 for(;s->mb_y < s->mb_height; s->mb_y++){
6919 for(;s->mb_x < s->mb_width; s->mb_x++){
6920 int ret= decode_mb(h);
6925 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6926 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6931 if(++s->mb_x >= s->mb_width){
6933 if(++s->mb_y >= s->mb_height){
6934 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6935 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6939 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6946 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6947 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6948 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6952 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6959 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6962 return -1; //not reached
6965 static int decode_unregistered_user_data(H264Context *h, int size){
6966 MpegEncContext * const s = &h->s;
6967 uint8_t user_data[16+256];
6973 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6974 user_data[i]= get_bits(&s->gb, 8);
6978 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6979 if(e==1 && build>=0)
6980 h->x264_build= build;
6982 if(s->avctx->debug & FF_DEBUG_BUGS)
6983 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6986 skip_bits(&s->gb, 8);
6991 static int decode_sei(H264Context *h){
6992 MpegEncContext * const s = &h->s;
6994 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6999 type+= show_bits(&s->gb, 8);
7000 }while(get_bits(&s->gb, 8) == 255);
7004 size+= show_bits(&s->gb, 8);
7005 }while(get_bits(&s->gb, 8) == 255);
7009 if(decode_unregistered_user_data(h, size) < 0)
7013 skip_bits(&s->gb, 8*size);
7016 //FIXME check bits here
7017 align_get_bits(&s->gb);
7023 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7024 MpegEncContext * const s = &h->s;
7026 cpb_count = get_ue_golomb(&s->gb) + 1;
7027 get_bits(&s->gb, 4); /* bit_rate_scale */
7028 get_bits(&s->gb, 4); /* cpb_size_scale */
7029 for(i=0; i<cpb_count; i++){
7030 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7031 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7032 get_bits1(&s->gb); /* cbr_flag */
7034 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7035 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7036 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7037 get_bits(&s->gb, 5); /* time_offset_length */
7040 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7041 MpegEncContext * const s = &h->s;
7042 int aspect_ratio_info_present_flag;
7043 unsigned int aspect_ratio_idc;
7044 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7046 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7048 if( aspect_ratio_info_present_flag ) {
7049 aspect_ratio_idc= get_bits(&s->gb, 8);
7050 if( aspect_ratio_idc == EXTENDED_SAR ) {
7051 sps->sar.num= get_bits(&s->gb, 16);
7052 sps->sar.den= get_bits(&s->gb, 16);
7053 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7054 sps->sar= pixel_aspect[aspect_ratio_idc];
7056 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7063 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7065 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7066 get_bits1(&s->gb); /* overscan_appropriate_flag */
7069 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7070 get_bits(&s->gb, 3); /* video_format */
7071 get_bits1(&s->gb); /* video_full_range_flag */
7072 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7073 get_bits(&s->gb, 8); /* colour_primaries */
7074 get_bits(&s->gb, 8); /* transfer_characteristics */
7075 get_bits(&s->gb, 8); /* matrix_coefficients */
7079 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7080 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7081 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7084 sps->timing_info_present_flag = get_bits1(&s->gb);
7085 if(sps->timing_info_present_flag){
7086 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7087 sps->time_scale = get_bits_long(&s->gb, 32);
7088 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7091 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7092 if(nal_hrd_parameters_present_flag)
7093 decode_hrd_parameters(h, sps);
7094 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7095 if(vcl_hrd_parameters_present_flag)
7096 decode_hrd_parameters(h, sps);
7097 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7098 get_bits1(&s->gb); /* low_delay_hrd_flag */
7099 get_bits1(&s->gb); /* pic_struct_present_flag */
7101 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7102 if(sps->bitstream_restriction_flag){
7103 unsigned int num_reorder_frames;
7104 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7105 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7106 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7107 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7108 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7109 num_reorder_frames= get_ue_golomb(&s->gb);
7110 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7112 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7113 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7117 sps->num_reorder_frames= num_reorder_frames;
7123 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7124 const uint8_t *jvt_list, const uint8_t *fallback_list){
7125 MpegEncContext * const s = &h->s;
7126 int i, last = 8, next = 8;
7127 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7128 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7129 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7131 for(i=0;i<size;i++){
7133 next = (last + get_se_golomb(&s->gb)) & 0xff;
7134 if(!i && !next){ /* matrix not written, we use the preset one */
7135 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7138 last = factors[scan[i]] = next ? next : last;
7142 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7143 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7144 MpegEncContext * const s = &h->s;
7145 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7146 const uint8_t *fallback[4] = {
7147 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7148 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7149 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7150 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7152 if(get_bits1(&s->gb)){
7153 sps->scaling_matrix_present |= is_sps;
7154 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7155 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7156 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7157 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7158 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7159 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7160 if(is_sps || pps->transform_8x8_mode){
7161 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7162 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7164 } else if(fallback_sps) {
7165 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7166 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7171 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7174 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7175 const size_t size, const char *name)
7178 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7183 vec[id] = av_mallocz(size);
7185 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7190 static inline int decode_seq_parameter_set(H264Context *h){
7191 MpegEncContext * const s = &h->s;
7192 int profile_idc, level_idc;
7193 unsigned int sps_id, tmp, mb_width, mb_height;
7197 profile_idc= get_bits(&s->gb, 8);
7198 get_bits1(&s->gb); //constraint_set0_flag
7199 get_bits1(&s->gb); //constraint_set1_flag
7200 get_bits1(&s->gb); //constraint_set2_flag
7201 get_bits1(&s->gb); //constraint_set3_flag
7202 get_bits(&s->gb, 4); // reserved
7203 level_idc= get_bits(&s->gb, 8);
7204 sps_id= get_ue_golomb(&s->gb);
7206 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7210 sps->profile_idc= profile_idc;
7211 sps->level_idc= level_idc;
7213 if(sps->profile_idc >= 100){ //high profile
7214 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7215 get_bits1(&s->gb); //residual_color_transform_flag
7216 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7217 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7218 sps->transform_bypass = get_bits1(&s->gb);
7219 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7221 sps->scaling_matrix_present = 0;
7223 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7224 sps->poc_type= get_ue_golomb(&s->gb);
7226 if(sps->poc_type == 0){ //FIXME #define
7227 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7228 } else if(sps->poc_type == 1){//FIXME #define
7229 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7230 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7231 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7232 tmp= get_ue_golomb(&s->gb);
7234 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7235 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7238 sps->poc_cycle_length= tmp;
7240 for(i=0; i<sps->poc_cycle_length; i++)
7241 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7242 }else if(sps->poc_type != 2){
7243 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7247 tmp= get_ue_golomb(&s->gb);
7248 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7249 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7252 sps->ref_frame_count= tmp;
7253 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7254 mb_width= get_ue_golomb(&s->gb) + 1;
7255 mb_height= get_ue_golomb(&s->gb) + 1;
7256 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7257 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7258 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7261 sps->mb_width = mb_width;
7262 sps->mb_height= mb_height;
7264 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7265 if(!sps->frame_mbs_only_flag)
7266 sps->mb_aff= get_bits1(&s->gb);
7270 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7272 #ifndef ALLOW_INTERLACE
7274 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7276 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7277 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7279 sps->crop= get_bits1(&s->gb);
7281 sps->crop_left = get_ue_golomb(&s->gb);
7282 sps->crop_right = get_ue_golomb(&s->gb);
7283 sps->crop_top = get_ue_golomb(&s->gb);
7284 sps->crop_bottom= get_ue_golomb(&s->gb);
7285 if(sps->crop_left || sps->crop_top){
7286 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7288 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7289 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7295 sps->crop_bottom= 0;
7298 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7299 if( sps->vui_parameters_present_flag )
7300 decode_vui_parameters(h, sps);
7302 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7303 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7304 sps_id, sps->profile_idc, sps->level_idc,
7306 sps->ref_frame_count,
7307 sps->mb_width, sps->mb_height,
7308 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7309 sps->direct_8x8_inference_flag ? "8B8" : "",
7310 sps->crop_left, sps->crop_right,
7311 sps->crop_top, sps->crop_bottom,
7312 sps->vui_parameters_present_flag ? "VUI" : ""
7319 build_qp_table(PPS *pps, int t, int index)
7322 for(i = 0; i < 255; i++)
7323 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7326 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7327 MpegEncContext * const s = &h->s;
7328 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7331 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7335 tmp= get_ue_golomb(&s->gb);
7336 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7337 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7342 pps->cabac= get_bits1(&s->gb);
7343 pps->pic_order_present= get_bits1(&s->gb);
7344 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7345 if(pps->slice_group_count > 1 ){
7346 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7347 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7348 switch(pps->mb_slice_group_map_type){
7351 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7352 | run_length[ i ] |1 |ue(v) |
7357 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7359 | top_left_mb[ i ] |1 |ue(v) |
7360 | bottom_right_mb[ i ] |1 |ue(v) |
7368 | slice_group_change_direction_flag |1 |u(1) |
7369 | slice_group_change_rate_minus1 |1 |ue(v) |
7374 | slice_group_id_cnt_minus1 |1 |ue(v) |
7375 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7377 | slice_group_id[ i ] |1 |u(v) |
7382 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7383 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7384 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7385 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7386 pps->ref_count[0]= pps->ref_count[1]= 1;
7390 pps->weighted_pred= get_bits1(&s->gb);
7391 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7392 pps->init_qp= get_se_golomb(&s->gb) + 26;
7393 pps->init_qs= get_se_golomb(&s->gb) + 26;
7394 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7395 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7396 pps->constrained_intra_pred= get_bits1(&s->gb);
7397 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7399 pps->transform_8x8_mode= 0;
7400 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7401 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7402 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7404 if(get_bits_count(&s->gb) < bit_length){
7405 pps->transform_8x8_mode= get_bits1(&s->gb);
7406 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7407 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7409 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7412 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7413 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7414 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7415 h->pps.chroma_qp_diff= 1;
7417 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7419 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7420 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7421 pps_id, pps->sps_id,
7422 pps->cabac ? "CABAC" : "CAVLC",
7423 pps->slice_group_count,
7424 pps->ref_count[0], pps->ref_count[1],
7425 pps->weighted_pred ? "weighted" : "",
7426 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7427 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7428 pps->constrained_intra_pred ? "CONSTR" : "",
7429 pps->redundant_pic_cnt_present ? "REDU" : "",
7430 pps->transform_8x8_mode ? "8x8DCT" : ""
7438 * Call decode_slice() for each context.
7440 * @param h h264 master context
7441 * @param context_count number of contexts to execute
7443 static void execute_decode_slices(H264Context *h, int context_count){
7444 MpegEncContext * const s = &h->s;
7445 AVCodecContext * const avctx= s->avctx;
7449 if(context_count == 1) {
7450 decode_slice(avctx, h);
7452 for(i = 1; i < context_count; i++) {
7453 hx = h->thread_context[i];
7454 hx->s.error_resilience = avctx->error_resilience;
7455 hx->s.error_count = 0;
7458 avctx->execute(avctx, (void *)decode_slice,
7459 (void **)h->thread_context, NULL, context_count);
7461 /* pull back stuff from slices to master context */
7462 hx = h->thread_context[context_count - 1];
7463 s->mb_x = hx->s.mb_x;
7464 s->mb_y = hx->s.mb_y;
7465 s->dropable = hx->s.dropable;
7466 s->picture_structure = hx->s.picture_structure;
7467 for(i = 1; i < context_count; i++)
7468 h->s.error_count += h->thread_context[i]->s.error_count;
7473 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7474 MpegEncContext * const s = &h->s;
7475 AVCodecContext * const avctx= s->avctx;
7477 H264Context *hx; ///< thread context
7478 int context_count = 0;
7480 h->max_contexts = avctx->thread_count;
7483 for(i=0; i<50; i++){
7484 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7487 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7488 h->current_slice = 0;
7489 if (!s->first_field)
7490 s->current_picture_ptr= NULL;
7502 if(buf_index >= buf_size) break;
7504 for(i = 0; i < h->nal_length_size; i++)
7505 nalsize = (nalsize << 8) | buf[buf_index++];
7506 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7511 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7516 // start code prefix search
7517 for(; buf_index + 3 < buf_size; buf_index++){
7518 // This should always succeed in the first iteration.
7519 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7523 if(buf_index+3 >= buf_size) break;
7528 hx = h->thread_context[context_count];
7530 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7531 if (ptr==NULL || dst_length < 0){
7534 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7536 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7538 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7539 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7542 if (h->is_avc && (nalsize != consumed)){
7543 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7547 buf_index += consumed;
7549 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7550 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7555 switch(hx->nal_unit_type){
7557 if (h->nal_unit_type != NAL_IDR_SLICE) {
7558 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7561 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7563 init_get_bits(&hx->s.gb, ptr, bit_length);
7565 hx->inter_gb_ptr= &hx->s.gb;
7566 hx->s.data_partitioning = 0;
7568 if((err = decode_slice_header(hx, h)))
7571 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7572 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7573 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7574 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7575 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7576 && avctx->skip_frame < AVDISCARD_ALL)
7580 init_get_bits(&hx->s.gb, ptr, bit_length);
7582 hx->inter_gb_ptr= NULL;
7583 hx->s.data_partitioning = 1;
7585 err = decode_slice_header(hx, h);
7588 init_get_bits(&hx->intra_gb, ptr, bit_length);
7589 hx->intra_gb_ptr= &hx->intra_gb;
7592 init_get_bits(&hx->inter_gb, ptr, bit_length);
7593 hx->inter_gb_ptr= &hx->inter_gb;
7595 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7596 && s->context_initialized
7598 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7599 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7600 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7601 && avctx->skip_frame < AVDISCARD_ALL)
7605 init_get_bits(&s->gb, ptr, bit_length);
7609 init_get_bits(&s->gb, ptr, bit_length);
7610 decode_seq_parameter_set(h);
7612 if(s->flags& CODEC_FLAG_LOW_DELAY)
7615 if(avctx->has_b_frames < 2)
7616 avctx->has_b_frames= !s->low_delay;
7619 init_get_bits(&s->gb, ptr, bit_length);
7621 decode_picture_parameter_set(h, bit_length);
7625 case NAL_END_SEQUENCE:
7626 case NAL_END_STREAM:
7627 case NAL_FILLER_DATA:
7629 case NAL_AUXILIARY_SLICE:
7632 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7635 if(context_count == h->max_contexts) {
7636 execute_decode_slices(h, context_count);
7641 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7643 /* Slice could not be decoded in parallel mode, copy down
7644 * NAL unit stuff to context 0 and restart. Note that
7645 * rbsp_buffer is not transfered, but since we no longer
7646 * run in parallel mode this should not be an issue. */
7647 h->nal_unit_type = hx->nal_unit_type;
7648 h->nal_ref_idc = hx->nal_ref_idc;
7654 execute_decode_slices(h, context_count);
7659 * returns the number of bytes consumed for building the current frame
7661 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7662 if(s->flags&CODEC_FLAG_TRUNCATED){
7663 pos -= s->parse_context.last_index;
7664 if(pos<0) pos=0; // FIXME remove (unneeded?)
7668 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7669 if(pos+10>buf_size) pos=buf_size; // oops ;)
7675 static int decode_frame(AVCodecContext *avctx,
7676 void *data, int *data_size,
7677 const uint8_t *buf, int buf_size)
7679 H264Context *h = avctx->priv_data;
7680 MpegEncContext *s = &h->s;
7681 AVFrame *pict = data;
7684 s->flags= avctx->flags;
7685 s->flags2= avctx->flags2;
7687 if(s->flags&CODEC_FLAG_TRUNCATED){
7688 const int next= ff_h264_find_frame_end(h, buf, buf_size);
7689 assert((buf_size > 0) || (next == END_NOT_FOUND));
7691 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7693 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7696 /* no supplementary picture */
7697 if (buf_size == 0) {
7701 //FIXME factorize this with the output code below
7702 out = h->delayed_pic[0];
7704 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7705 if(h->delayed_pic[i]->poc < out->poc){
7706 out = h->delayed_pic[i];
7710 for(i=out_idx; h->delayed_pic[i]; i++)
7711 h->delayed_pic[i] = h->delayed_pic[i+1];
7714 *data_size = sizeof(AVFrame);
7715 *pict= *(AVFrame*)out;
7721 if(h->is_avc && !h->got_avcC) {
7722 int i, cnt, nalsize;
7723 unsigned char *p = avctx->extradata;
7724 if(avctx->extradata_size < 7) {
7725 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7729 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7732 /* sps and pps in the avcC always have length coded with 2 bytes,
7733 so put a fake nal_length_size = 2 while parsing them */
7734 h->nal_length_size = 2;
7735 // Decode sps from avcC
7736 cnt = *(p+5) & 0x1f; // Number of sps
7738 for (i = 0; i < cnt; i++) {
7739 nalsize = AV_RB16(p) + 2;
7740 if(decode_nal_units(h, p, nalsize) < 0) {
7741 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7746 // Decode pps from avcC
7747 cnt = *(p++); // Number of pps
7748 for (i = 0; i < cnt; i++) {
7749 nalsize = AV_RB16(p) + 2;
7750 if(decode_nal_units(h, p, nalsize) != nalsize) {
7751 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7756 // Now store right nal length size, that will be use to parse all other nals
7757 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7758 // Do not reparse avcC
7762 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7763 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7767 buf_index=decode_nal_units(h, buf, buf_size);
7771 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7772 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7773 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7777 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7778 Picture *out = s->current_picture_ptr;
7779 Picture *cur = s->current_picture_ptr;
7780 int i, pics, cross_idr, out_of_order, out_idx;
7784 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7785 s->current_picture_ptr->pict_type= s->pict_type;
7787 h->prev_frame_num_offset= h->frame_num_offset;
7788 h->prev_frame_num= h->frame_num;
7790 h->prev_poc_msb= h->poc_msb;
7791 h->prev_poc_lsb= h->poc_lsb;
7792 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7796 * FIXME: Error handling code does not seem to support interlaced
7797 * when slices span multiple rows
7798 * The ff_er_add_slice calls don't work right for bottom
7799 * fields; they cause massive erroneous error concealing
7800 * Error marking covers both fields (top and bottom).
7801 * This causes a mismatched s->error_count
7802 * and a bad error table. Further, the error count goes to
7803 * INT_MAX when called for bottom field, because mb_y is
7804 * past end by one (callers fault) and resync_mb_y != 0
7805 * causes problems for the first MB line, too.
7812 if (s->first_field) {
7813 /* Wait for second field. */
7817 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7818 /* Derive top_field_first from field pocs. */
7819 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7821 //FIXME do something with unavailable reference frames
7823 #if 0 //decode order
7824 *data_size = sizeof(AVFrame);
7826 /* Sort B-frames into display order */
7828 if(h->sps.bitstream_restriction_flag
7829 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7830 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7835 while(h->delayed_pic[pics]) pics++;
7837 assert(pics <= MAX_DELAYED_PIC_COUNT);
7839 h->delayed_pic[pics++] = cur;
7840 if(cur->reference == 0)
7841 cur->reference = DELAYED_PIC_REF;
7844 for(i=0; h->delayed_pic[i]; i++)
7845 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7848 out = h->delayed_pic[0];
7850 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7851 if(h->delayed_pic[i]->poc < out->poc){
7852 out = h->delayed_pic[i];
7856 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7858 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7860 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7862 ((!cross_idr && out && out->poc > h->outputed_poc + 2)
7863 || cur->pict_type == FF_B_TYPE)))
7866 s->avctx->has_b_frames++;
7869 if(pics <= s->avctx->has_b_frames || out_of_order)
7872 if(out_of_order || pics > s->avctx->has_b_frames){
7873 for(i=out_idx; h->delayed_pic[i]; i++)
7874 h->delayed_pic[i] = h->delayed_pic[i+1];
7878 *data_size = sizeof(AVFrame);
7880 out->reference &= ~DELAYED_PIC_REF;
7881 h->outputed_poc = out->poc;
7886 *pict= *(AVFrame*)out;
7888 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7892 assert(pict->data[0] || !*data_size);
7893 ff_print_debug_info(s, pict);
7894 //printf("out %d\n", (int)pict->data[0]);
7897 /* Return the Picture timestamp as the frame number */
7898 /* we subtract 1 because it is added on utils.c */
7899 avctx->frame_number = s->picture_number - 1;
7901 return get_consumed_bytes(s, buf_index, buf_size);
7904 static inline void fill_mb_avail(H264Context *h){
7905 MpegEncContext * const s = &h->s;
7906 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7909 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7910 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7911 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7917 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7918 h->mb_avail[4]= 1; //FIXME move out
7919 h->mb_avail[5]= 0; //FIXME move out
7927 #define SIZE (COUNT*40)
7933 // int int_temp[10000];
7935 AVCodecContext avctx;
7937 dsputil_init(&dsp, &avctx);
7939 init_put_bits(&pb, temp, SIZE);
7940 printf("testing unsigned exp golomb\n");
7941 for(i=0; i<COUNT; i++){
7943 set_ue_golomb(&pb, i);
7944 STOP_TIMER("set_ue_golomb");
7946 flush_put_bits(&pb);
7948 init_get_bits(&gb, temp, 8*SIZE);
7949 for(i=0; i<COUNT; i++){
7952 s= show_bits(&gb, 24);
7955 j= get_ue_golomb(&gb);
7957 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7960 STOP_TIMER("get_ue_golomb");
7964 init_put_bits(&pb, temp, SIZE);
7965 printf("testing signed exp golomb\n");
7966 for(i=0; i<COUNT; i++){
7968 set_se_golomb(&pb, i - COUNT/2);
7969 STOP_TIMER("set_se_golomb");
7971 flush_put_bits(&pb);
7973 init_get_bits(&gb, temp, 8*SIZE);
7974 for(i=0; i<COUNT; i++){
7977 s= show_bits(&gb, 24);
7980 j= get_se_golomb(&gb);
7981 if(j != i - COUNT/2){
7982 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7985 STOP_TIMER("get_se_golomb");
7989 printf("testing 4x4 (I)DCT\n");
7992 uint8_t src[16], ref[16];
7993 uint64_t error= 0, max_error=0;
7995 for(i=0; i<COUNT; i++){
7997 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7998 for(j=0; j<16; j++){
7999 ref[j]= random()%255;
8000 src[j]= random()%255;
8003 h264_diff_dct_c(block, src, ref, 4);
8006 for(j=0; j<16; j++){
8007 // printf("%d ", block[j]);
8008 block[j]= block[j]*4;
8009 if(j&1) block[j]= (block[j]*4 + 2)/5;
8010 if(j&4) block[j]= (block[j]*4 + 2)/5;
8014 s->dsp.h264_idct_add(ref, block, 4);
8015 /* for(j=0; j<16; j++){
8016 printf("%d ", ref[j]);
8020 for(j=0; j<16; j++){
8021 int diff= FFABS(src[j] - ref[j]);
8024 max_error= FFMAX(max_error, diff);
8027 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8028 printf("testing quantizer\n");
8029 for(qp=0; qp<52; qp++){
8031 src1_block[i]= src2_block[i]= random()%255;
8034 printf("Testing NAL layer\n");
8036 uint8_t bitstream[COUNT];
8037 uint8_t nal[COUNT*2];
8039 memset(&h, 0, sizeof(H264Context));
8041 for(i=0; i<COUNT; i++){
8049 for(j=0; j<COUNT; j++){
8050 bitstream[j]= (random() % 255) + 1;
8053 for(j=0; j<zeros; j++){
8054 int pos= random() % COUNT;
8055 while(bitstream[pos] == 0){
8064 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8066 printf("encoding failed\n");
8070 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8074 if(out_length != COUNT){
8075 printf("incorrect length %d %d\n", out_length, COUNT);
8079 if(consumed != nal_length){
8080 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8084 if(memcmp(bitstream, out, COUNT)){
8085 printf("mismatch\n");
8091 printf("Testing RBSP\n");
8099 static av_cold int decode_end(AVCodecContext *avctx)
8101 H264Context *h = avctx->priv_data;
8102 MpegEncContext *s = &h->s;
8104 av_freep(&h->rbsp_buffer[0]);
8105 av_freep(&h->rbsp_buffer[1]);
8106 free_tables(h); //FIXME cleanup init stuff perhaps
8109 // memset(h, 0, sizeof(H264Context));
8115 AVCodec h264_decoder = {
8119 sizeof(H264Context),
8124 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8126 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),