2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type_nos == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
573 if(!(h->top_samples_available&0x8000)){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if(!(h->left_samples_available&0x8000)){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
667 // store all luma nnzs, for deblocking
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
785 }else if(top_ref==ref){
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
891 pred_motion(h, 0, 4, 0, 0, mx, my);
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
984 *mb_type |= MB_TYPE_DIRECT2;
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1004 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1009 if(ref[0] < 0 && ref[1] < 0){
1010 ref[0] = ref[1] = 0;
1011 mv[0][0] = mv[0][1] =
1012 mv[1][0] = mv[1][1] = 0;
1014 for(list=0; list<2; list++){
1016 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1018 mv[list][0] = mv[list][1] = 0;
1024 *mb_type &= ~MB_TYPE_L1;
1025 sub_mb_type &= ~MB_TYPE_L1;
1026 }else if(ref[0] < 0){
1028 *mb_type &= ~MB_TYPE_L0;
1029 sub_mb_type &= ~MB_TYPE_L0;
1032 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1033 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1034 int mb_types_col[2];
1035 int b8_stride = h->b8_stride;
1036 int b4_stride = h->b_stride;
1038 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1040 if(IS_INTERLACED(*mb_type)){
1041 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1042 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1044 l1ref0 -= 2*b8_stride;
1045 l1ref1 -= 2*b8_stride;
1046 l1mv0 -= 4*b4_stride;
1047 l1mv1 -= 4*b4_stride;
1052 int cur_poc = s->current_picture_ptr->poc;
1053 int *col_poc = h->ref_list[1]->field_poc;
1054 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1055 int dy = 2*col_parity - (s->mb_y&1);
1057 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1058 l1ref0 += dy*b8_stride;
1059 l1ref1 += dy*b8_stride;
1060 l1mv0 += 2*dy*b4_stride;
1061 l1mv1 += 2*dy*b4_stride;
1065 for(i8=0; i8<4; i8++){
1068 int xy8 = x8+y8*b8_stride;
1069 int xy4 = 3*x8+y8*b4_stride;
1072 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1074 h->sub_mb_type[i8] = sub_mb_type;
1076 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1077 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1078 if(!IS_INTRA(mb_types_col[y8])
1079 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1080 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1082 a= pack16to32(mv[0][0],mv[0][1]);
1084 b= pack16to32(mv[1][0],mv[1][1]);
1086 a= pack16to32(mv[0][0],mv[0][1]);
1087 b= pack16to32(mv[1][0],mv[1][1]);
1089 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1090 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1092 }else if(IS_16X16(*mb_type)){
1095 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1096 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1097 if(!IS_INTRA(mb_type_col)
1098 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1099 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1100 && (h->x264_build>33 || !h->x264_build)))){
1102 a= pack16to32(mv[0][0],mv[0][1]);
1104 b= pack16to32(mv[1][0],mv[1][1]);
1106 a= pack16to32(mv[0][0],mv[0][1]);
1107 b= pack16to32(mv[1][0],mv[1][1]);
1109 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1110 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1112 for(i8=0; i8<4; i8++){
1113 const int x8 = i8&1;
1114 const int y8 = i8>>1;
1116 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1118 h->sub_mb_type[i8] = sub_mb_type;
1120 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1121 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1122 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1123 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1126 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1127 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1128 && (h->x264_build>33 || !h->x264_build)))){
1129 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1130 if(IS_SUB_8X8(sub_mb_type)){
1131 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1132 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1134 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1139 for(i4=0; i4<4; i4++){
1140 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1141 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1143 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1145 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1151 }else{ /* direct temporal mv pred */
1152 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1153 const int *dist_scale_factor = h->dist_scale_factor;
1156 if(IS_INTERLACED(*mb_type)){
1157 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1158 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1159 dist_scale_factor = h->dist_scale_factor_field;
1161 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1162 /* FIXME assumes direct_8x8_inference == 1 */
1163 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1164 int mb_types_col[2];
1167 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1168 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1169 | (*mb_type & MB_TYPE_INTERLACED);
1170 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1172 if(IS_INTERLACED(*mb_type)){
1173 /* frame to field scaling */
1174 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1175 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1177 l1ref0 -= 2*h->b8_stride;
1178 l1ref1 -= 2*h->b8_stride;
1179 l1mv0 -= 4*h->b_stride;
1180 l1mv1 -= 4*h->b_stride;
1184 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1185 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1187 *mb_type |= MB_TYPE_16x8;
1189 *mb_type |= MB_TYPE_8x8;
1191 /* field to frame scaling */
1192 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1193 * but in MBAFF, top and bottom POC are equal */
1194 int dy = (s->mb_y&1) ? 1 : 2;
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1197 l1ref0 += dy*h->b8_stride;
1198 l1ref1 += dy*h->b8_stride;
1199 l1mv0 += 2*dy*h->b_stride;
1200 l1mv1 += 2*dy*h->b_stride;
1203 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1205 *mb_type |= MB_TYPE_16x16;
1207 *mb_type |= MB_TYPE_8x8;
1210 for(i8=0; i8<4; i8++){
1211 const int x8 = i8&1;
1212 const int y8 = i8>>1;
1214 const int16_t (*l1mv)[2]= l1mv0;
1216 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 h->sub_mb_type[i8] = sub_mb_type;
1220 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1221 if(IS_INTRA(mb_types_col[y8])){
1222 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1223 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1224 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1230 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1232 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1235 scale = dist_scale_factor[ref0];
1236 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1239 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1240 int my_col = (mv_col[1]<<y_shift)/2;
1241 int mx = (scale * mv_col[0] + 128) >> 8;
1242 int my = (scale * my_col + 128) >> 8;
1243 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1244 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col)){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1261 : map_col_to_list0[1][l1ref1[0]];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col)){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*h->b8_stride];
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * idct tranforms the 16 dc values and dequantize them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * dct tranforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale & 0xff];
1590 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1591 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1616 // last_non_zero = i;
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1634 // last_non_zero = i;
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1650 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1654 level= (bias + level)>>QUANT_SHIFT;
1657 level= (bias - level)>>QUANT_SHIFT;
1666 return last_non_zero;
1669 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1676 const int luma_xy= (mx&3) + ((my&3)<<2);
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
1684 const int pic_width = 16*s->mb_width;
1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
1687 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1710 // chroma offset when predicting from a field of opposite parity
1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1719 src_cb= s->edge_emu_buffer;
1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cr= s->edge_emu_buffer;
1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1743 x_offset += 8*s->mb_x;
1744 y_offset += 8*(s->mb_y >> MB_FIELD);
1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1753 chroma_op= chroma_avg;
1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1764 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1776 x_offset += 8*s->mb_x;
1777 y_offset += 8*(s->mb_y >> MB_FIELD);
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1831 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1849 static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1865 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1869 MpegEncContext * const s = &h->s;
1870 const int mb_xy= h->mb_xy;
1871 const int mb_type= s->current_picture.mb_type[mb_xy];
1873 assert(IS_INTER(mb_type));
1875 prefetch_motion(h, 0);
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1880 &weight_op[0], &weight_avg[0],
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1885 &weight_op[1], &weight_avg[1],
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1894 &weight_op[2], &weight_avg[2],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1903 assert(IS_8X8(mb_type));
1906 const int sub_mb_type= h->sub_mb_type[i];
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1914 &weight_op[3], &weight_avg[3],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1919 &weight_op[4], &weight_avg[4],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1928 &weight_op[5], &weight_avg[5],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 assert(IS_SUB_4X4(sub_mb_type));
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[6], &weight_avg[6],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 prefetch_motion(h, 1);
1952 static av_cold void decode_init_vlc(void){
1953 static int done = 0;
1959 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1960 &chroma_dc_coeff_token_len [0], 1, 1,
1961 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1964 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1965 &coeff_token_len [i][0], 1, 1,
1966 &coeff_token_bits[i][0], 1, 1, 1);
1970 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1971 &chroma_dc_total_zeros_len [i][0], 1, 1,
1972 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1974 for(i=0; i<15; i++){
1975 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1976 &total_zeros_len [i][0], 1, 1,
1977 &total_zeros_bits[i][0], 1, 1, 1);
1981 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1982 &run_len [i][0], 1, 1,
1983 &run_bits[i][0], 1, 1, 1);
1985 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1986 &run_len [6][0], 1, 1,
1987 &run_bits[6][0], 1, 1, 1);
1991 static void free_tables(H264Context *h){
1994 av_freep(&h->intra4x4_pred_mode);
1995 av_freep(&h->chroma_pred_mode_table);
1996 av_freep(&h->cbp_table);
1997 av_freep(&h->mvd_table[0]);
1998 av_freep(&h->mvd_table[1]);
1999 av_freep(&h->direct_table);
2000 av_freep(&h->non_zero_count);
2001 av_freep(&h->slice_table_base);
2002 h->slice_table= NULL;
2004 av_freep(&h->mb2b_xy);
2005 av_freep(&h->mb2b8_xy);
2007 for(i = 0; i < MAX_SPS_COUNT; i++)
2008 av_freep(h->sps_buffers + i);
2010 for(i = 0; i < MAX_PPS_COUNT; i++)
2011 av_freep(h->pps_buffers + i);
2013 for(i = 0; i < h->s.avctx->thread_count; i++) {
2014 hx = h->thread_context[i];
2016 av_freep(&hx->top_borders[1]);
2017 av_freep(&hx->top_borders[0]);
2018 av_freep(&hx->s.obmc_scratchpad);
2022 static void init_dequant8_coeff_table(H264Context *h){
2024 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2025 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2026 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2028 for(i=0; i<2; i++ ){
2029 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2030 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2034 for(q=0; q<52; q++){
2035 int shift = ff_div6[q];
2036 int idx = ff_rem6[q];
2038 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2039 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2040 h->pps.scaling_matrix8[i][x]) << shift;
2045 static void init_dequant4_coeff_table(H264Context *h){
2047 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2048 for(i=0; i<6; i++ ){
2049 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2051 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2052 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2059 for(q=0; q<52; q++){
2060 int shift = ff_div6[q] + 2;
2061 int idx = ff_rem6[q];
2063 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2064 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2065 h->pps.scaling_matrix4[i][x]) << shift;
2070 static void init_dequant_tables(H264Context *h){
2072 init_dequant4_coeff_table(h);
2073 if(h->pps.transform_8x8_mode)
2074 init_dequant8_coeff_table(h);
2075 if(h->sps.transform_bypass){
2078 h->dequant4_coeff[i][0][x] = 1<<6;
2079 if(h->pps.transform_8x8_mode)
2082 h->dequant8_coeff[i][0][x] = 1<<6;
2089 * needs width/height
2091 static int alloc_tables(H264Context *h){
2092 MpegEncContext * const s = &h->s;
2093 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2096 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2098 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2102 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2104 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2107 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2108 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2110 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2111 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2112 for(y=0; y<s->mb_height; y++){
2113 for(x=0; x<s->mb_width; x++){
2114 const int mb_xy= x + y*s->mb_stride;
2115 const int b_xy = 4*x + 4*y*h->b_stride;
2116 const int b8_xy= 2*x + 2*y*h->b8_stride;
2118 h->mb2b_xy [mb_xy]= b_xy;
2119 h->mb2b8_xy[mb_xy]= b8_xy;
2123 s->obmc_scratchpad = NULL;
2125 if(!h->dequant4_coeff[0])
2126 init_dequant_tables(h);
2135 * Mimic alloc_tables(), but for every context thread.
2137 static void clone_tables(H264Context *dst, H264Context *src){
2138 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2139 dst->non_zero_count = src->non_zero_count;
2140 dst->slice_table = src->slice_table;
2141 dst->cbp_table = src->cbp_table;
2142 dst->mb2b_xy = src->mb2b_xy;
2143 dst->mb2b8_xy = src->mb2b8_xy;
2144 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2145 dst->mvd_table[0] = src->mvd_table[0];
2146 dst->mvd_table[1] = src->mvd_table[1];
2147 dst->direct_table = src->direct_table;
2149 dst->s.obmc_scratchpad = NULL;
2150 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2155 * Allocate buffers which are not shared amongst multiple threads.
2157 static int context_init(H264Context *h){
2158 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2159 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 return -1; // free_tables will clean up for us
2166 static av_cold void common_init(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2169 s->width = s->avctx->width;
2170 s->height = s->avctx->height;
2171 s->codec_id= s->avctx->codec->id;
2173 ff_h264_pred_init(&h->hpc, s->codec_id);
2175 h->dequant_coeff_pps= -1;
2176 s->unrestricted_mv=1;
2177 s->decode=1; //FIXME
2179 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2180 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2183 static av_cold int decode_init(AVCodecContext *avctx){
2184 H264Context *h= avctx->priv_data;
2185 MpegEncContext * const s = &h->s;
2187 MPV_decode_defaults(s);
2192 s->out_format = FMT_H264;
2193 s->workaround_bugs= avctx->workaround_bugs;
2196 // s->decode_mb= ff_h263_decode_mb;
2197 s->quarter_sample = 1;
2200 if(avctx->codec_id == CODEC_ID_SVQ3)
2201 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2203 avctx->pix_fmt= PIX_FMT_YUV420P;
2207 if(avctx->extradata_size > 0 && avctx->extradata &&
2208 *(char *)avctx->extradata == 1){
2215 h->thread_context[0] = h;
2219 static int frame_start(H264Context *h){
2220 MpegEncContext * const s = &h->s;
2223 if(MPV_frame_start(s, s->avctx) < 0)
2225 ff_er_frame_start(s);
2227 * MPV_frame_start uses pict_type to derive key_frame.
2228 * This is incorrect for H.264; IDR markings must be used.
2229 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2230 * See decode_nal_units().
2232 s->current_picture_ptr->key_frame= 0;
2234 assert(s->linesize && s->uvlinesize);
2236 for(i=0; i<16; i++){
2237 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[16+i]=
2242 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2243 h->block_offset[24+16+i]=
2244 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 /* can't be in alloc_tables because linesize isn't known there.
2248 * FIXME: redo bipred weight to not require extra buffer? */
2249 for(i = 0; i < s->avctx->thread_count; i++)
2250 if(!h->thread_context[i]->s.obmc_scratchpad)
2251 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2253 /* some macroblocks will be accessed before they're available */
2254 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2255 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2257 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2259 // We mark the current picture as non reference after allocating it, so
2260 // that if we break out due to an error it can be released automatically
2261 // in the next MPV_frame_start().
2262 // SVQ3 as well as most other codecs have only last/next/current and thus
2263 // get released even with set reference, besides SVQ3 and others do not
2264 // mark frames as reference later "naturally".
2265 if(s->codec_id != CODEC_ID_SVQ3)
2266 s->current_picture_ptr->reference= 0;
2270 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2271 MpegEncContext * const s = &h->s;
2275 src_cb -= uvlinesize;
2276 src_cr -= uvlinesize;
2278 // There are two lines saved, the line above the the top macroblock of a pair,
2279 // and the line above the bottom macroblock
2280 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2281 for(i=1; i<17; i++){
2282 h->left_border[i]= src_y[15+i* linesize];
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2288 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2289 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2290 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2292 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2293 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2300 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2301 MpegEncContext * const s = &h->s;
2308 if(h->deblocking_filter == 2) {
2310 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2311 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2313 deblock_left = (s->mb_x > 0);
2314 deblock_top = (s->mb_y > 0);
2317 src_y -= linesize + 1;
2318 src_cb -= uvlinesize + 1;
2319 src_cr -= uvlinesize + 1;
2321 #define XCHG(a,b,t,xchg)\
2328 for(i = !deblock_top; i<17; i++){
2329 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2334 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2335 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2336 if(s->mb_x+1 < s->mb_width){
2337 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2341 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2343 for(i = !deblock_top; i<9; i++){
2344 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2345 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2350 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2355 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2356 MpegEncContext * const s = &h->s;
2359 src_y -= 2 * linesize;
2360 src_cb -= 2 * uvlinesize;
2361 src_cr -= 2 * uvlinesize;
2363 // There are two lines saved, the line above the the top macroblock of a pair,
2364 // and the line above the bottom macroblock
2365 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2366 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2367 for(i=2; i<34; i++){
2368 h->left_border[i]= src_y[15+i* linesize];
2371 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2372 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2373 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2374 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2376 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2377 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2378 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2379 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2380 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2381 for(i=2; i<18; i++){
2382 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2383 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2385 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2386 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2387 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2388 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2392 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2393 MpegEncContext * const s = &h->s;
2396 int deblock_left = (s->mb_x > 0);
2397 int deblock_top = (s->mb_y > 1);
2399 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2401 src_y -= 2 * linesize + 1;
2402 src_cb -= 2 * uvlinesize + 1;
2403 src_cr -= 2 * uvlinesize + 1;
2405 #define XCHG(a,b,t,xchg)\
2412 for(i = (!deblock_top)<<1; i<34; i++){
2413 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2418 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2419 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2420 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2421 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2422 if(s->mb_x+1 < s->mb_width){
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2428 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2430 for(i = (!deblock_top) << 1; i<18; i++){
2431 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2432 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2439 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2444 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2445 MpegEncContext * const s = &h->s;
2446 const int mb_x= s->mb_x;
2447 const int mb_y= s->mb_y;
2448 const int mb_xy= h->mb_xy;
2449 const int mb_type= s->current_picture.mb_type[mb_xy];
2450 uint8_t *dest_y, *dest_cb, *dest_cr;
2451 int linesize, uvlinesize /*dct_offset*/;
2453 int *block_offset = &h->block_offset[0];
2454 const unsigned int bottom = mb_y & 1;
2455 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2456 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2457 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2459 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2460 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2461 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2463 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2464 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2466 if (!simple && MB_FIELD) {
2467 linesize = h->mb_linesize = s->linesize * 2;
2468 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2469 block_offset = &h->block_offset[24];
2470 if(mb_y&1){ //FIXME move out of this func?
2471 dest_y -= s->linesize*15;
2472 dest_cb-= s->uvlinesize*7;
2473 dest_cr-= s->uvlinesize*7;
2477 for(list=0; list<h->list_count; list++){
2478 if(!USES_LIST(mb_type, list))
2480 if(IS_16X16(mb_type)){
2481 int8_t *ref = &h->ref_cache[list][scan8[0]];
2482 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2484 for(i=0; i<16; i+=4){
2485 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2486 int ref = h->ref_cache[list][scan8[i]];
2488 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2494 linesize = h->mb_linesize = s->linesize;
2495 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2496 // dct_offset = s->linesize * 16;
2499 if(transform_bypass){
2501 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2502 }else if(IS_8x8DCT(mb_type)){
2503 idct_dc_add = s->dsp.h264_idct8_dc_add;
2504 idct_add = s->dsp.h264_idct8_add;
2506 idct_dc_add = s->dsp.h264_idct_dc_add;
2507 idct_add = s->dsp.h264_idct_add;
2510 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2511 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2512 int mbt_y = mb_y&~1;
2513 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2514 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2515 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2516 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2519 if (!simple && IS_INTRA_PCM(mb_type)) {
2522 // The pixels are stored in h->mb array in the same order as levels,
2523 // copy them in output in the correct order.
2524 for(i=0; i<16; i++) {
2525 for (y=0; y<4; y++) {
2526 for (x=0; x<4; x++) {
2527 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2531 for(i=16; i<16+4; i++) {
2532 for (y=0; y<4; y++) {
2533 for (x=0; x<4; x++) {
2534 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2538 for(i=20; i<20+4; i++) {
2539 for (y=0; y<4; y++) {
2540 for (x=0; x<4; x++) {
2541 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2546 if(IS_INTRA(mb_type)){
2547 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2548 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2550 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2551 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2552 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2555 if(IS_INTRA4x4(mb_type)){
2556 if(simple || !s->encoding){
2557 if(IS_8x8DCT(mb_type)){
2558 for(i=0; i<16; i+=4){
2559 uint8_t * const ptr= dest_y + block_offset[i];
2560 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2561 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2562 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2563 (h->topright_samples_available<<i)&0x4000, linesize);
2565 if(nnz == 1 && h->mb[i*16])
2566 idct_dc_add(ptr, h->mb + i*16, linesize);
2568 idct_add(ptr, h->mb + i*16, linesize);
2572 for(i=0; i<16; i++){
2573 uint8_t * const ptr= dest_y + block_offset[i];
2575 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2579 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2580 assert(mb_y || linesize <= block_offset[i]);
2581 if(!topright_avail){
2582 tr= ptr[3 - linesize]*0x01010101;
2583 topright= (uint8_t*) &tr;
2585 topright= ptr + 4 - linesize;
2589 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2590 nnz = h->non_zero_count_cache[ scan8[i] ];
2593 if(nnz == 1 && h->mb[i*16])
2594 idct_dc_add(ptr, h->mb + i*16, linesize);
2596 idct_add(ptr, h->mb + i*16, linesize);
2598 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2603 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2605 if(!transform_bypass)
2606 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2608 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2610 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2611 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2613 hl_motion(h, dest_y, dest_cb, dest_cr,
2614 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2615 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2616 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2620 if(!IS_INTRA4x4(mb_type)){
2622 if(IS_INTRA16x16(mb_type)){
2623 for(i=0; i<16; i++){
2624 if(h->non_zero_count_cache[ scan8[i] ])
2625 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2626 else if(h->mb[i*16])
2627 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2630 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2631 for(i=0; i<16; i+=di){
2632 int nnz = h->non_zero_count_cache[ scan8[i] ];
2634 if(nnz==1 && h->mb[i*16])
2635 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2637 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2642 for(i=0; i<16; i++){
2643 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2644 uint8_t * const ptr= dest_y + block_offset[i];
2645 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2651 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2652 uint8_t *dest[2] = {dest_cb, dest_cr};
2653 if(transform_bypass){
2654 idct_add = idct_dc_add = s->dsp.add_pixels4;
2656 idct_add = s->dsp.h264_idct_add;
2657 idct_dc_add = s->dsp.h264_idct_dc_add;
2658 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2659 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2662 for(i=16; i<16+8; i++){
2663 if(h->non_zero_count_cache[ scan8[i] ])
2664 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2665 else if(h->mb[i*16])
2666 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2669 for(i=16; i<16+8; i++){
2670 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2671 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2672 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2678 if(h->deblocking_filter) {
2679 if (!simple && FRAME_MBAFF) {
2680 //FIXME try deblocking one mb at a time?
2681 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2682 const int mb_y = s->mb_y - 1;
2683 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2684 const int mb_xy= mb_x + mb_y*s->mb_stride;
2685 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2686 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2687 if (!bottom) return;
2688 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2689 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2690 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2692 if(IS_INTRA(mb_type_top | mb_type_bottom))
2693 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2695 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2698 s->mb_y--; h->mb_xy -= s->mb_stride;
2699 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2700 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2701 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2702 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2703 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2705 s->mb_y++; h->mb_xy += s->mb_stride;
2706 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2707 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2708 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2709 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2710 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2712 tprintf(h->s.avctx, "call filter_mb\n");
2713 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2714 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2715 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2716 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2717 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2723 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2725 static void hl_decode_mb_simple(H264Context *h){
2726 hl_decode_mb_internal(h, 1);
2730 * Process a macroblock; this handles edge cases, such as interlacing.
2732 static void av_noinline hl_decode_mb_complex(H264Context *h){
2733 hl_decode_mb_internal(h, 0);
2736 static void hl_decode_mb(H264Context *h){
2737 MpegEncContext * const s = &h->s;
2738 const int mb_xy= h->mb_xy;
2739 const int mb_type= s->current_picture.mb_type[mb_xy];
2740 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2741 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2743 if(ENABLE_H264_ENCODER && !s->decode)
2747 hl_decode_mb_complex(h);
2748 else hl_decode_mb_simple(h);
2751 static void pic_as_field(Picture *pic, const int parity){
2753 for (i = 0; i < 4; ++i) {
2754 if (parity == PICT_BOTTOM_FIELD)
2755 pic->data[i] += pic->linesize[i];
2756 pic->reference = parity;
2757 pic->linesize[i] *= 2;
2761 static int split_field_copy(Picture *dest, Picture *src,
2762 int parity, int id_add){
2763 int match = !!(src->reference & parity);
2767 pic_as_field(dest, parity);
2769 dest->pic_id += id_add;
2776 * Split one reference list into field parts, interleaving by parity
2777 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2778 * set to look at the actual start of data for that field.
2780 * @param dest output list
2781 * @param dest_len maximum number of fields to put in dest
2782 * @param src the source reference list containing fields and/or field pairs
2783 * (aka short_ref/long_ref, or
2784 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2785 * @param src_len number of Picture's in source (pairs and unmatched fields)
2786 * @param parity the parity of the picture being decoded/needing
2787 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2788 * @return number of fields placed in dest
2790 static int split_field_half_ref_list(Picture *dest, int dest_len,
2791 Picture *src, int src_len, int parity){
2792 int same_parity = 1;
2798 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2799 if (same_parity && same_i < src_len) {
2800 field_output = split_field_copy(dest + out_i, src + same_i,
2802 same_parity = !field_output;
2805 } else if (opp_i < src_len) {
2806 field_output = split_field_copy(dest + out_i, src + opp_i,
2807 PICT_FRAME - parity, 0);
2808 same_parity = field_output;
2820 * Split the reference frame list into a reference field list.
2821 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2822 * The input list contains both reference field pairs and
2823 * unmatched reference fields; it is ordered as spec describes
2824 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2825 * unmatched field pairs are also present. Conceptually this is equivalent
2826 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2828 * @param dest output reference list where ordered fields are to be placed
2829 * @param dest_len max number of fields to place at dest
2830 * @param src source reference list, as described above
2831 * @param src_len number of pictures (pairs and unmatched fields) in src
2832 * @param parity parity of field being currently decoded
2833 * (one of PICT_{TOP,BOTTOM}_FIELD)
2834 * @param long_i index into src array that holds first long reference picture,
2835 * or src_len if no long refs present.
2837 static int split_field_ref_list(Picture *dest, int dest_len,
2838 Picture *src, int src_len,
2839 int parity, int long_i){
2841 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2845 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2846 src_len - long_i, parity);
2851 * fills the default_ref_list.
2853 static int fill_default_ref_list(H264Context *h){
2854 MpegEncContext * const s = &h->s;
2856 int smallest_poc_greater_than_current = -1;
2858 Picture sorted_short_ref[32];
2859 Picture field_entry_list[2][32];
2860 Picture *frame_list[2];
2862 if (FIELD_PICTURE) {
2863 structure_sel = PICT_FRAME;
2864 frame_list[0] = field_entry_list[0];
2865 frame_list[1] = field_entry_list[1];
2868 frame_list[0] = h->default_ref_list[0];
2869 frame_list[1] = h->default_ref_list[1];
2872 if(h->slice_type_nos==FF_B_TYPE){
2879 /* sort frame according to poc in B slice */
2880 for(out_i=0; out_i<h->short_ref_count; out_i++){
2882 int best_poc=INT_MAX;
2884 for(i=0; i<h->short_ref_count; i++){
2885 const int poc= h->short_ref[i]->poc;
2886 if(poc > limit && poc < best_poc){
2892 assert(best_i != INT_MIN);
2895 sorted_short_ref[out_i]= *h->short_ref[best_i];
2896 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2897 if (-1 == smallest_poc_greater_than_current) {
2898 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2899 smallest_poc_greater_than_current = out_i;
2904 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2906 // find the largest poc
2907 for(list=0; list<2; list++){
2910 int step= list ? -1 : 1;
2912 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2914 while(j<0 || j>= h->short_ref_count){
2915 if(j != -99 && step == (list ? -1 : 1))
2918 j= smallest_poc_greater_than_current + (step>>1);
2920 sel = sorted_short_ref[j].reference | structure_sel;
2921 if(sel != PICT_FRAME) continue;
2922 frame_list[list][index ]= sorted_short_ref[j];
2923 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2925 short_len[list] = index;
2927 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2929 if(h->long_ref[i] == NULL) continue;
2930 sel = h->long_ref[i]->reference | structure_sel;
2931 if(sel != PICT_FRAME) continue;
2933 frame_list[ list ][index ]= *h->long_ref[i];
2934 frame_list[ list ][index++].pic_id= i;
2939 for(list=0; list<2; list++){
2941 len[list] = split_field_ref_list(h->default_ref_list[list],
2945 s->picture_structure,
2948 // swap the two first elements of L1 when L0 and L1 are identical
2949 if(list && len[0] > 1 && len[0] == len[1])
2950 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2952 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2956 if(len[list] < h->ref_count[ list ])
2957 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2964 for(i=0; i<h->short_ref_count; i++){
2966 sel = h->short_ref[i]->reference | structure_sel;
2967 if(sel != PICT_FRAME) continue;
2968 frame_list[0][index ]= *h->short_ref[i];
2969 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2972 for(i = 0; i < 16; i++){
2974 if(h->long_ref[i] == NULL) continue;
2975 sel = h->long_ref[i]->reference | structure_sel;
2976 if(sel != PICT_FRAME) continue;
2977 frame_list[0][index ]= *h->long_ref[i];
2978 frame_list[0][index++].pic_id= i;
2982 index = split_field_ref_list(h->default_ref_list[0],
2983 h->ref_count[0], frame_list[0],
2984 index, s->picture_structure,
2987 if(index < h->ref_count[0])
2988 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2991 for (i=0; i<h->ref_count[0]; i++) {
2992 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2994 if(h->slice_type_nos==FF_B_TYPE){
2995 for (i=0; i<h->ref_count[1]; i++) {
2996 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
3003 static void print_short_term(H264Context *h);
3004 static void print_long_term(H264Context *h);
3007 * Extract structure information about the picture described by pic_num in
3008 * the current decoding context (frame or field). Note that pic_num is
3009 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3010 * @param pic_num picture number for which to extract structure information
3011 * @param structure one of PICT_XXX describing structure of picture
3013 * @return frame number (short term) or long term index of picture
3014 * described by pic_num
3016 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3017 MpegEncContext * const s = &h->s;
3019 *structure = s->picture_structure;
3022 /* opposite field */
3023 *structure ^= PICT_FRAME;
3030 static int decode_ref_pic_list_reordering(H264Context *h){
3031 MpegEncContext * const s = &h->s;
3032 int list, index, pic_structure;
3034 print_short_term(h);
3036 if(h->slice_type_nos==FF_I_TYPE) return 0; //FIXME move before func
3038 for(list=0; list<h->list_count; list++){
3039 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3041 if(get_bits1(&s->gb)){
3042 int pred= h->curr_pic_num;
3044 for(index=0; ; index++){
3045 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3046 unsigned int pic_id;
3048 Picture *ref = NULL;
3050 if(reordering_of_pic_nums_idc==3)
3053 if(index >= h->ref_count[list]){
3054 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3058 if(reordering_of_pic_nums_idc<3){
3059 if(reordering_of_pic_nums_idc<2){
3060 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3063 if(abs_diff_pic_num > h->max_pic_num){
3064 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3068 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3069 else pred+= abs_diff_pic_num;
3070 pred &= h->max_pic_num - 1;
3072 frame_num = pic_num_extract(h, pred, &pic_structure);
3074 for(i= h->short_ref_count-1; i>=0; i--){
3075 ref = h->short_ref[i];
3076 assert(ref->reference);
3077 assert(!ref->long_ref);
3078 if(ref->data[0] != NULL &&
3079 ref->frame_num == frame_num &&
3080 (ref->reference & pic_structure) &&
3081 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3088 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3090 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3093 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3096 ref = h->long_ref[long_idx];
3097 assert(!(ref && !ref->reference));
3098 if(ref && (ref->reference & pic_structure)){
3099 ref->pic_id= pic_id;
3100 assert(ref->long_ref);
3108 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3109 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3111 for(i=index; i+1<h->ref_count[list]; i++){
3112 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3115 for(; i > index; i--){
3116 h->ref_list[list][i]= h->ref_list[list][i-1];
3118 h->ref_list[list][index]= *ref;
3120 pic_as_field(&h->ref_list[list][index], pic_structure);
3124 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3130 for(list=0; list<h->list_count; list++){
3131 for(index= 0; index < h->ref_count[list]; index++){
3132 if(!h->ref_list[list][index].data[0])
3133 h->ref_list[list][index]= s->current_picture;
3137 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3138 direct_dist_scale_factor(h);
3139 direct_ref_list_init(h);
3143 static void fill_mbaff_ref_list(H264Context *h){
3145 for(list=0; list<2; list++){ //FIXME try list_count
3146 for(i=0; i<h->ref_count[list]; i++){
3147 Picture *frame = &h->ref_list[list][i];
3148 Picture *field = &h->ref_list[list][16+2*i];
3151 field[0].linesize[j] <<= 1;
3152 field[0].reference = PICT_TOP_FIELD;
3153 field[1] = field[0];
3155 field[1].data[j] += frame->linesize[j];
3156 field[1].reference = PICT_BOTTOM_FIELD;
3158 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3159 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3161 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3162 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3166 for(j=0; j<h->ref_count[1]; j++){
3167 for(i=0; i<h->ref_count[0]; i++)
3168 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3169 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3170 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3174 static int pred_weight_table(H264Context *h){
3175 MpegEncContext * const s = &h->s;
3177 int luma_def, chroma_def;
3180 h->use_weight_chroma= 0;
3181 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3182 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3183 luma_def = 1<<h->luma_log2_weight_denom;
3184 chroma_def = 1<<h->chroma_log2_weight_denom;
3186 for(list=0; list<2; list++){
3187 for(i=0; i<h->ref_count[list]; i++){
3188 int luma_weight_flag, chroma_weight_flag;
3190 luma_weight_flag= get_bits1(&s->gb);
3191 if(luma_weight_flag){
3192 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3193 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3194 if( h->luma_weight[list][i] != luma_def
3195 || h->luma_offset[list][i] != 0)
3198 h->luma_weight[list][i]= luma_def;
3199 h->luma_offset[list][i]= 0;
3202 chroma_weight_flag= get_bits1(&s->gb);
3203 if(chroma_weight_flag){
3206 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3207 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3208 if( h->chroma_weight[list][i][j] != chroma_def
3209 || h->chroma_offset[list][i][j] != 0)
3210 h->use_weight_chroma= 1;
3215 h->chroma_weight[list][i][j]= chroma_def;
3216 h->chroma_offset[list][i][j]= 0;
3220 if(h->slice_type_nos != FF_B_TYPE) break;
3222 h->use_weight= h->use_weight || h->use_weight_chroma;
3226 static void implicit_weight_table(H264Context *h){
3227 MpegEncContext * const s = &h->s;
3229 int cur_poc = s->current_picture_ptr->poc;
3231 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3232 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3234 h->use_weight_chroma= 0;
3239 h->use_weight_chroma= 2;
3240 h->luma_log2_weight_denom= 5;
3241 h->chroma_log2_weight_denom= 5;
3243 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3244 int poc0 = h->ref_list[0][ref0].poc;
3245 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3246 int poc1 = h->ref_list[1][ref1].poc;
3247 int td = av_clip(poc1 - poc0, -128, 127);
3249 int tb = av_clip(cur_poc - poc0, -128, 127);
3250 int tx = (16384 + (FFABS(td) >> 1)) / td;
3251 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3252 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3253 h->implicit_weight[ref0][ref1] = 32;
3255 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3257 h->implicit_weight[ref0][ref1] = 32;
3263 * Mark a picture as no longer needed for reference. The refmask
3264 * argument allows unreferencing of individual fields or the whole frame.
3265 * If the picture becomes entirely unreferenced, but is being held for
3266 * display purposes, it is marked as such.
3267 * @param refmask mask of fields to unreference; the mask is bitwise
3268 * anded with the reference marking of pic
3269 * @return non-zero if pic becomes entirely unreferenced (except possibly
3270 * for display purposes) zero if one of the fields remains in
3273 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3275 if (pic->reference &= refmask) {
3278 for(i = 0; h->delayed_pic[i]; i++)
3279 if(pic == h->delayed_pic[i]){
3280 pic->reference=DELAYED_PIC_REF;
3288 * instantaneous decoder refresh.
3290 static void idr(H264Context *h){
3293 for(i=0; i<16; i++){
3294 if (h->long_ref[i] != NULL) {
3295 unreference_pic(h, h->long_ref[i], 0);
3296 h->long_ref[i]= NULL;
3299 h->long_ref_count=0;
3301 for(i=0; i<h->short_ref_count; i++){
3302 unreference_pic(h, h->short_ref[i], 0);
3303 h->short_ref[i]= NULL;
3305 h->short_ref_count=0;
3308 /* forget old pics after a seek */
3309 static void flush_dpb(AVCodecContext *avctx){
3310 H264Context *h= avctx->priv_data;
3312 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3313 if(h->delayed_pic[i])
3314 h->delayed_pic[i]->reference= 0;
3315 h->delayed_pic[i]= NULL;
3317 h->outputed_poc= INT_MIN;
3319 if(h->s.current_picture_ptr)
3320 h->s.current_picture_ptr->reference= 0;
3321 h->s.first_field= 0;
3322 ff_mpeg_flush(avctx);
3326 * Find a Picture in the short term reference list by frame number.
3327 * @param frame_num frame number to search for
3328 * @param idx the index into h->short_ref where returned picture is found
3329 * undefined if no picture found.
3330 * @return pointer to the found picture, or NULL if no pic with the provided
3331 * frame number is found
3333 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3334 MpegEncContext * const s = &h->s;
3337 for(i=0; i<h->short_ref_count; i++){
3338 Picture *pic= h->short_ref[i];
3339 if(s->avctx->debug&FF_DEBUG_MMCO)
3340 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3341 if(pic->frame_num == frame_num) {
3350 * Remove a picture from the short term reference list by its index in
3351 * that list. This does no checking on the provided index; it is assumed
3352 * to be valid. Other list entries are shifted down.
3353 * @param i index into h->short_ref of picture to remove.
3355 static void remove_short_at_index(H264Context *h, int i){
3356 assert(i >= 0 && i < h->short_ref_count);
3357 h->short_ref[i]= NULL;
3358 if (--h->short_ref_count)
3359 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3364 * @return the removed picture or NULL if an error occurs
3366 static Picture * remove_short(H264Context *h, int frame_num){
3367 MpegEncContext * const s = &h->s;
3371 if(s->avctx->debug&FF_DEBUG_MMCO)
3372 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3374 pic = find_short(h, frame_num, &i);
3376 remove_short_at_index(h, i);
3382 * Remove a picture from the long term reference list by its index in
3383 * that list. This does no checking on the provided index; it is assumed
3384 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3385 * @param i index into h->long_ref of picture to remove.
3387 static void remove_long_at_index(H264Context *h, int i){
3388 h->long_ref[i]= NULL;
3389 h->long_ref_count--;
3394 * @return the removed picture or NULL if an error occurs
3396 static Picture * remove_long(H264Context *h, int i){
3399 pic= h->long_ref[i];
3401 remove_long_at_index(h, i);
3407 * print short term list
3409 static void print_short_term(H264Context *h) {
3411 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3412 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3413 for(i=0; i<h->short_ref_count; i++){
3414 Picture *pic= h->short_ref[i];
3415 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3421 * print long term list
3423 static void print_long_term(H264Context *h) {
3425 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3426 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3427 for(i = 0; i < 16; i++){
3428 Picture *pic= h->long_ref[i];
3430 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3437 * Executes the reference picture marking (memory management control operations).
3439 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3440 MpegEncContext * const s = &h->s;
3442 int current_ref_assigned=0;
3445 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3446 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3448 for(i=0; i<mmco_count; i++){
3449 int structure, frame_num, unref_pic;
3450 if(s->avctx->debug&FF_DEBUG_MMCO)
3451 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3453 switch(mmco[i].opcode){
3454 case MMCO_SHORT2UNUSED:
3455 if(s->avctx->debug&FF_DEBUG_MMCO)
3456 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3457 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3458 pic = find_short(h, frame_num, &j);
3460 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3461 remove_short_at_index(h, j);
3462 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3463 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3465 case MMCO_SHORT2LONG:
3466 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3467 h->long_ref[mmco[i].long_arg]->frame_num ==
3468 mmco[i].short_pic_num / 2) {
3469 /* do nothing, we've already moved this field pair. */
3471 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3473 pic= remove_long(h, mmco[i].long_arg);
3474 if(pic) unreference_pic(h, pic, 0);
3476 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3477 if (h->long_ref[ mmco[i].long_arg ]){
3478 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3479 h->long_ref_count++;
3483 case MMCO_LONG2UNUSED:
3484 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3485 pic = h->long_ref[j];
3487 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3488 remove_long_at_index(h, j);
3489 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3490 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3494 if (FIELD_PICTURE && !s->first_field) {
3495 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3496 /* Just mark second field as referenced */
3498 } else if (s->current_picture_ptr->reference) {
3499 /* First field in pair is in short term list or
3500 * at a different long term index.
3501 * This is not allowed; see 7.4.3, notes 2 and 3.
3502 * Report the problem and keep the pair where it is,
3503 * and mark this field valid.
3505 av_log(h->s.avctx, AV_LOG_ERROR,
3506 "illegal long term reference assignment for second "
3507 "field in complementary field pair (first field is "
3508 "short term or has non-matching long index)\n");
3514 pic= remove_long(h, mmco[i].long_arg);
3515 if(pic) unreference_pic(h, pic, 0);
3517 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3518 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3519 h->long_ref_count++;
3522 s->current_picture_ptr->reference |= s->picture_structure;
3523 current_ref_assigned=1;
3525 case MMCO_SET_MAX_LONG:
3526 assert(mmco[i].long_arg <= 16);
3527 // just remove the long term which index is greater than new max
3528 for(j = mmco[i].long_arg; j<16; j++){
3529 pic = remove_long(h, j);
3530 if (pic) unreference_pic(h, pic, 0);
3534 while(h->short_ref_count){
3535 pic= remove_short(h, h->short_ref[0]->frame_num);
3536 if(pic) unreference_pic(h, pic, 0);
3538 for(j = 0; j < 16; j++) {
3539 pic= remove_long(h, j);
3540 if(pic) unreference_pic(h, pic, 0);
3547 if (!current_ref_assigned && FIELD_PICTURE &&
3548 !s->first_field && s->current_picture_ptr->reference) {
3550 /* Second field of complementary field pair; the first field of
3551 * which is already referenced. If short referenced, it
3552 * should be first entry in short_ref. If not, it must exist
3553 * in long_ref; trying to put it on the short list here is an
3554 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3556 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3557 /* Just mark the second field valid */
3558 s->current_picture_ptr->reference = PICT_FRAME;
3559 } else if (s->current_picture_ptr->long_ref) {
3560 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3561 "assignment for second field "
3562 "in complementary field pair "
3563 "(first field is long term)\n");
3566 * First field in reference, but not in any sensible place on our
3567 * reference lists. This shouldn't happen unless reference
3568 * handling somewhere else is wrong.
3572 current_ref_assigned = 1;
3575 if(!current_ref_assigned){
3576 pic= remove_short(h, s->current_picture_ptr->frame_num);
3578 unreference_pic(h, pic, 0);
3579 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3582 if(h->short_ref_count)
3583 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3585 h->short_ref[0]= s->current_picture_ptr;
3586 h->short_ref[0]->long_ref=0;
3587 h->short_ref_count++;
3588 s->current_picture_ptr->reference |= s->picture_structure;
3591 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3593 /* We have too many reference frames, probably due to corrupted
3594 * stream. Need to discard one frame. Prevents overrun of the
3595 * short_ref and long_ref buffers.
3597 av_log(h->s.avctx, AV_LOG_ERROR,
3598 "number of reference frames exceeds max (probably "
3599 "corrupt input), discarding one\n");
3601 if (h->long_ref_count) {
3602 for (i = 0; i < 16; ++i)
3607 pic = h->long_ref[i];
3608 remove_long_at_index(h, i);
3610 pic = h->short_ref[h->short_ref_count - 1];
3611 remove_short_at_index(h, h->short_ref_count - 1);
3613 unreference_pic(h, pic, 0);
3616 print_short_term(h);
3621 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3622 MpegEncContext * const s = &h->s;
3625 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3626 s->broken_link= get_bits1(gb) -1;
3627 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3628 if(h->mmco[0].long_arg == -1)
3631 h->mmco[0].opcode= MMCO_LONG;
3635 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3636 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3637 MMCOOpcode opcode= get_ue_golomb(gb);
3639 h->mmco[i].opcode= opcode;
3640 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3641 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3642 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3643 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3647 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3648 unsigned int long_arg= get_ue_golomb(gb);
3649 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3650 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3653 h->mmco[i].long_arg= long_arg;
3656 if(opcode > (unsigned)MMCO_LONG){
3657 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3660 if(opcode == MMCO_END)
3665 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3667 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3668 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3669 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3670 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3672 if (FIELD_PICTURE) {
3673 h->mmco[0].short_pic_num *= 2;
3674 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3675 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3686 static int init_poc(H264Context *h){
3687 MpegEncContext * const s = &h->s;
3688 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3691 if(h->nal_unit_type == NAL_IDR_SLICE){
3692 h->frame_num_offset= 0;
3694 if(h->frame_num < h->prev_frame_num)
3695 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3697 h->frame_num_offset= h->prev_frame_num_offset;
3700 if(h->sps.poc_type==0){
3701 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3703 if(h->nal_unit_type == NAL_IDR_SLICE){
3708 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3709 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3710 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3711 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3713 h->poc_msb = h->prev_poc_msb;
3714 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3716 field_poc[1] = h->poc_msb + h->poc_lsb;
3717 if(s->picture_structure == PICT_FRAME)
3718 field_poc[1] += h->delta_poc_bottom;
3719 }else if(h->sps.poc_type==1){
3720 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3723 if(h->sps.poc_cycle_length != 0)
3724 abs_frame_num = h->frame_num_offset + h->frame_num;
3728 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3731 expected_delta_per_poc_cycle = 0;
3732 for(i=0; i < h->sps.poc_cycle_length; i++)
3733 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3735 if(abs_frame_num > 0){
3736 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3737 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3739 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3740 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3741 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3745 if(h->nal_ref_idc == 0)
3746 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3748 field_poc[0] = expectedpoc + h->delta_poc[0];
3749 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3751 if(s->picture_structure == PICT_FRAME)
3752 field_poc[1] += h->delta_poc[1];
3755 if(h->nal_unit_type == NAL_IDR_SLICE){
3758 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3759 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3765 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3766 s->current_picture_ptr->field_poc[0]= field_poc[0];
3767 s->current_picture_ptr->poc = field_poc[0];
3769 if(s->picture_structure != PICT_TOP_FIELD) {
3770 s->current_picture_ptr->field_poc[1]= field_poc[1];
3771 s->current_picture_ptr->poc = field_poc[1];
3773 if(!FIELD_PICTURE || !s->first_field) {
3774 Picture *cur = s->current_picture_ptr;
3775 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3783 * initialize scan tables
3785 static void init_scan_tables(H264Context *h){
3786 MpegEncContext * const s = &h->s;
3788 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3789 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3790 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3792 for(i=0; i<16; i++){
3793 #define T(x) (x>>2) | ((x<<2) & 0xF)
3794 h->zigzag_scan[i] = T(zigzag_scan[i]);
3795 h-> field_scan[i] = T( field_scan[i]);
3799 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3800 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3801 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3802 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3803 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3805 for(i=0; i<64; i++){
3806 #define T(x) (x>>3) | ((x&7)<<3)
3807 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3808 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3809 h->field_scan8x8[i] = T(field_scan8x8[i]);
3810 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3814 if(h->sps.transform_bypass){ //FIXME same ugly
3815 h->zigzag_scan_q0 = zigzag_scan;
3816 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3817 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3818 h->field_scan_q0 = field_scan;
3819 h->field_scan8x8_q0 = field_scan8x8;
3820 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3822 h->zigzag_scan_q0 = h->zigzag_scan;
3823 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3824 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3825 h->field_scan_q0 = h->field_scan;
3826 h->field_scan8x8_q0 = h->field_scan8x8;
3827 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3832 * Replicates H264 "master" context to thread contexts.
3834 static void clone_slice(H264Context *dst, H264Context *src)
3836 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3837 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3838 dst->s.current_picture = src->s.current_picture;
3839 dst->s.linesize = src->s.linesize;
3840 dst->s.uvlinesize = src->s.uvlinesize;
3841 dst->s.first_field = src->s.first_field;
3843 dst->prev_poc_msb = src->prev_poc_msb;
3844 dst->prev_poc_lsb = src->prev_poc_lsb;
3845 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3846 dst->prev_frame_num = src->prev_frame_num;
3847 dst->short_ref_count = src->short_ref_count;
3849 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3850 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3851 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3852 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3854 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3855 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3859 * decodes a slice header.
3860 * This will also call MPV_common_init() and frame_start() as needed.
3862 * @param h h264context
3863 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3865 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3867 static int decode_slice_header(H264Context *h, H264Context *h0){
3868 MpegEncContext * const s = &h->s;
3869 MpegEncContext * const s0 = &h0->s;
3870 unsigned int first_mb_in_slice;
3871 unsigned int pps_id;
3872 int num_ref_idx_active_override_flag;
3873 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3874 unsigned int slice_type, tmp, i, j;
3875 int default_ref_list_done = 0;
3876 int last_pic_structure;
3878 s->dropable= h->nal_ref_idc == 0;
3880 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3881 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3882 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3884 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3885 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3888 first_mb_in_slice= get_ue_golomb(&s->gb);
3890 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3891 h0->current_slice = 0;
3892 if (!s0->first_field)
3893 s->current_picture_ptr= NULL;
3896 slice_type= get_ue_golomb(&s->gb);
3898 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3903 h->slice_type_fixed=1;
3905 h->slice_type_fixed=0;
3907 slice_type= slice_type_map[ slice_type ];
3908 if (slice_type == FF_I_TYPE
3909 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3910 default_ref_list_done = 1;
3912 h->slice_type= slice_type;
3913 h->slice_type_nos= slice_type & 3;
3915 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3916 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3917 av_log(h->s.avctx, AV_LOG_ERROR,
3918 "B picture before any references, skipping\n");
3922 pps_id= get_ue_golomb(&s->gb);
3923 if(pps_id>=MAX_PPS_COUNT){
3924 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3927 if(!h0->pps_buffers[pps_id]) {
3928 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3931 h->pps= *h0->pps_buffers[pps_id];
3933 if(!h0->sps_buffers[h->pps.sps_id]) {
3934 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3937 h->sps = *h0->sps_buffers[h->pps.sps_id];
3939 if(h == h0 && h->dequant_coeff_pps != pps_id){
3940 h->dequant_coeff_pps = pps_id;
3941 init_dequant_tables(h);
3944 s->mb_width= h->sps.mb_width;
3945 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3947 h->b_stride= s->mb_width*4;
3948 h->b8_stride= s->mb_width*2;
3950 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3951 if(h->sps.frame_mbs_only_flag)
3952 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3954 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3956 if (s->context_initialized
3957 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3959 return -1; // width / height changed during parallelized decoding
3963 if (!s->context_initialized) {
3965 return -1; // we cant (re-)initialize context during parallel decoding
3966 if (MPV_common_init(s) < 0)
3970 init_scan_tables(h);
3973 for(i = 1; i < s->avctx->thread_count; i++) {
3975 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3976 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3977 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3980 init_scan_tables(c);
3984 for(i = 0; i < s->avctx->thread_count; i++)
3985 if(context_init(h->thread_context[i]) < 0)
3988 s->avctx->width = s->width;
3989 s->avctx->height = s->height;
3990 s->avctx->sample_aspect_ratio= h->sps.sar;
3991 if(!s->avctx->sample_aspect_ratio.den)
3992 s->avctx->sample_aspect_ratio.den = 1;
3994 if(h->sps.timing_info_present_flag){
3995 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3996 if(h->x264_build > 0 && h->x264_build < 44)
3997 s->avctx->time_base.den *= 2;
3998 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3999 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4003 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4006 h->mb_aff_frame = 0;
4007 last_pic_structure = s0->picture_structure;
4008 if(h->sps.frame_mbs_only_flag){
4009 s->picture_structure= PICT_FRAME;
4011 if(get_bits1(&s->gb)) { //field_pic_flag
4012 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4014 s->picture_structure= PICT_FRAME;
4015 h->mb_aff_frame = h->sps.mb_aff;
4019 if(h0->current_slice == 0){
4020 /* See if we have a decoded first field looking for a pair... */
4021 if (s0->first_field) {
4022 assert(s0->current_picture_ptr);
4023 assert(s0->current_picture_ptr->data[0]);
4024 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4026 /* figure out if we have a complementary field pair */
4027 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4029 * Previous field is unmatched. Don't display it, but let it
4030 * remain for reference if marked as such.
4032 s0->current_picture_ptr = NULL;
4033 s0->first_field = FIELD_PICTURE;
4036 if (h->nal_ref_idc &&
4037 s0->current_picture_ptr->reference &&
4038 s0->current_picture_ptr->frame_num != h->frame_num) {
4040 * This and previous field were reference, but had
4041 * different frame_nums. Consider this field first in
4042 * pair. Throw away previous field except for reference
4045 s0->first_field = 1;
4046 s0->current_picture_ptr = NULL;
4049 /* Second field in complementary pair */
4050 s0->first_field = 0;
4055 /* Frame or first field in a potentially complementary pair */
4056 assert(!s0->current_picture_ptr);
4057 s0->first_field = FIELD_PICTURE;
4060 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4061 s0->first_field = 0;
4068 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4070 assert(s->mb_num == s->mb_width * s->mb_height);
4071 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4072 first_mb_in_slice >= s->mb_num){
4073 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4076 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4077 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4078 if (s->picture_structure == PICT_BOTTOM_FIELD)
4079 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4080 assert(s->mb_y < s->mb_height);
4082 if(s->picture_structure==PICT_FRAME){
4083 h->curr_pic_num= h->frame_num;
4084 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4086 h->curr_pic_num= 2*h->frame_num + 1;
4087 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4090 if(h->nal_unit_type == NAL_IDR_SLICE){
4091 get_ue_golomb(&s->gb); /* idr_pic_id */
4094 if(h->sps.poc_type==0){
4095 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4097 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4098 h->delta_poc_bottom= get_se_golomb(&s->gb);
4102 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4103 h->delta_poc[0]= get_se_golomb(&s->gb);
4105 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4106 h->delta_poc[1]= get_se_golomb(&s->gb);
4111 if(h->pps.redundant_pic_cnt_present){
4112 h->redundant_pic_count= get_ue_golomb(&s->gb);
4115 //set defaults, might be overriden a few line later
4116 h->ref_count[0]= h->pps.ref_count[0];
4117 h->ref_count[1]= h->pps.ref_count[1];
4119 if(h->slice_type_nos != FF_I_TYPE){
4120 if(h->slice_type_nos == FF_B_TYPE){
4121 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4123 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4125 if(num_ref_idx_active_override_flag){
4126 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4127 if(h->slice_type_nos==FF_B_TYPE)
4128 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4130 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4131 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4132 h->ref_count[0]= h->ref_count[1]= 1;
4136 if(h->slice_type_nos == FF_B_TYPE)
4143 if(!default_ref_list_done){
4144 fill_default_ref_list(h);
4147 if(decode_ref_pic_list_reordering(h) < 0)
4150 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4151 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4152 pred_weight_table(h);
4153 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4154 implicit_weight_table(h);
4159 decode_ref_pic_marking(h0, &s->gb);
4162 fill_mbaff_ref_list(h);
4164 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4165 tmp = get_ue_golomb(&s->gb);
4167 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4170 h->cabac_init_idc= tmp;
4173 h->last_qscale_diff = 0;
4174 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4176 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4180 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4181 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4182 //FIXME qscale / qp ... stuff
4183 if(h->slice_type == FF_SP_TYPE){
4184 get_bits1(&s->gb); /* sp_for_switch_flag */
4186 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4187 get_se_golomb(&s->gb); /* slice_qs_delta */
4190 h->deblocking_filter = 1;
4191 h->slice_alpha_c0_offset = 0;
4192 h->slice_beta_offset = 0;
4193 if( h->pps.deblocking_filter_parameters_present ) {
4194 tmp= get_ue_golomb(&s->gb);
4196 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4199 h->deblocking_filter= tmp;
4200 if(h->deblocking_filter < 2)
4201 h->deblocking_filter^= 1; // 1<->0
4203 if( h->deblocking_filter ) {
4204 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4205 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4209 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4210 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4211 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4212 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4213 h->deblocking_filter= 0;
4215 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4216 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4217 /* Cheat slightly for speed:
4218 Do not bother to deblock across slices. */
4219 h->deblocking_filter = 2;
4221 h0->max_contexts = 1;
4222 if(!h0->single_decode_warning) {
4223 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4224 h0->single_decode_warning = 1;
4227 return 1; // deblocking switched inside frame
4232 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4233 slice_group_change_cycle= get_bits(&s->gb, ?);
4236 h0->last_slice_type = slice_type;
4237 h->slice_num = ++h0->current_slice;
4240 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4244 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4245 +(h->ref_list[j][i].reference&3);
4248 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4249 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4251 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4252 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4254 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4256 av_get_pict_type_char(h->slice_type),
4257 pps_id, h->frame_num,
4258 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4259 h->ref_count[0], h->ref_count[1],
4261 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4263 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4264 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4274 static inline int get_level_prefix(GetBitContext *gb){
4278 OPEN_READER(re, gb);
4279 UPDATE_CACHE(re, gb);
4280 buf=GET_CACHE(re, gb);
4282 log= 32 - av_log2(buf);
4284 print_bin(buf>>(32-log), log);
4285 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4288 LAST_SKIP_BITS(re, gb, log);
4289 CLOSE_READER(re, gb);
4294 static inline int get_dct8x8_allowed(H264Context *h){
4297 if(!IS_SUB_8X8(h->sub_mb_type[i])
4298 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4305 * decodes a residual block.
4306 * @param n block index
4307 * @param scantable scantable
4308 * @param max_coeff number of coefficients in the block
4309 * @return <0 if an error occurred
4311 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4312 MpegEncContext * const s = &h->s;
4313 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4315 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4317 //FIXME put trailing_onex into the context
4319 if(n == CHROMA_DC_BLOCK_INDEX){
4320 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4321 total_coeff= coeff_token>>2;
4323 if(n == LUMA_DC_BLOCK_INDEX){
4324 total_coeff= pred_non_zero_count(h, 0);
4325 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4326 total_coeff= coeff_token>>2;
4328 total_coeff= pred_non_zero_count(h, n);
4329 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4330 total_coeff= coeff_token>>2;
4331 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4335 //FIXME set last_non_zero?
4339 if(total_coeff > (unsigned)max_coeff) {
4340 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4344 trailing_ones= coeff_token&3;
4345 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4346 assert(total_coeff<=16);
4348 for(i=0; i<trailing_ones; i++){
4349 level[i]= 1 - 2*get_bits1(gb);
4353 int level_code, mask;
4354 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4355 int prefix= get_level_prefix(gb);
4357 //first coefficient has suffix_length equal to 0 or 1
4358 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4360 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4362 level_code= (prefix<<suffix_length); //part
4363 }else if(prefix==14){
4365 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4367 level_code= prefix + get_bits(gb, 4); //part
4369 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4370 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4372 level_code += (1<<(prefix-3))-4096;
4375 if(trailing_ones < 3) level_code += 2;
4380 mask= -(level_code&1);
4381 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4384 //remaining coefficients have suffix_length > 0
4385 for(;i<total_coeff;i++) {
4386 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4387 prefix = get_level_prefix(gb);
4389 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4391 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4393 level_code += (1<<(prefix-3))-4096;
4395 mask= -(level_code&1);
4396 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4397 if(level_code > suffix_limit[suffix_length])
4402 if(total_coeff == max_coeff)
4405 if(n == CHROMA_DC_BLOCK_INDEX)
4406 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4408 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4411 coeff_num = zeros_left + total_coeff - 1;
4412 j = scantable[coeff_num];
4414 block[j] = level[0];
4415 for(i=1;i<total_coeff;i++) {
4418 else if(zeros_left < 7){
4419 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4421 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4423 zeros_left -= run_before;
4424 coeff_num -= 1 + run_before;
4425 j= scantable[ coeff_num ];
4430 block[j] = (level[0] * qmul[j] + 32)>>6;
4431 for(i=1;i<total_coeff;i++) {
4434 else if(zeros_left < 7){
4435 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4437 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4439 zeros_left -= run_before;
4440 coeff_num -= 1 + run_before;
4441 j= scantable[ coeff_num ];
4443 block[j]= (level[i] * qmul[j] + 32)>>6;
4448 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4455 static void predict_field_decoding_flag(H264Context *h){
4456 MpegEncContext * const s = &h->s;
4457 const int mb_xy= h->mb_xy;
4458 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4459 ? s->current_picture.mb_type[mb_xy-1]
4460 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4461 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4463 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4467 * decodes a P_SKIP or B_SKIP macroblock
4469 static void decode_mb_skip(H264Context *h){
4470 MpegEncContext * const s = &h->s;
4471 const int mb_xy= h->mb_xy;
4474 memset(h->non_zero_count[mb_xy], 0, 16);
4475 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4478 mb_type|= MB_TYPE_INTERLACED;
4480 if( h->slice_type_nos == FF_B_TYPE )
4482 // just for fill_caches. pred_direct_motion will set the real mb_type
4483 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4485 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4486 pred_direct_motion(h, &mb_type);
4487 mb_type|= MB_TYPE_SKIP;
4492 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4494 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4495 pred_pskip_motion(h, &mx, &my);
4496 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4497 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4500 write_back_motion(h, mb_type);
4501 s->current_picture.mb_type[mb_xy]= mb_type;
4502 s->current_picture.qscale_table[mb_xy]= s->qscale;
4503 h->slice_table[ mb_xy ]= h->slice_num;
4504 h->prev_mb_skipped= 1;
4508 * decodes a macroblock
4509 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4511 static int decode_mb_cavlc(H264Context *h){
4512 MpegEncContext * const s = &h->s;
4514 int partition_count;
4515 unsigned int mb_type, cbp;
4516 int dct8x8_allowed= h->pps.transform_8x8_mode;
4518 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4520 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4522 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4523 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4525 if(h->slice_type_nos != FF_I_TYPE){
4526 if(s->mb_skip_run==-1)
4527 s->mb_skip_run= get_ue_golomb(&s->gb);
4529 if (s->mb_skip_run--) {
4530 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4531 if(s->mb_skip_run==0)
4532 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4534 predict_field_decoding_flag(h);
4541 if( (s->mb_y&1) == 0 )
4542 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4544 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4546 h->prev_mb_skipped= 0;
4548 mb_type= get_ue_golomb(&s->gb);
4549 if(h->slice_type_nos == FF_B_TYPE){
4551 partition_count= b_mb_type_info[mb_type].partition_count;
4552 mb_type= b_mb_type_info[mb_type].type;
4555 goto decode_intra_mb;
4557 }else if(h->slice_type_nos == FF_P_TYPE){
4559 partition_count= p_mb_type_info[mb_type].partition_count;
4560 mb_type= p_mb_type_info[mb_type].type;
4563 goto decode_intra_mb;
4566 assert(h->slice_type_nos == FF_I_TYPE);
4567 if(h->slice_type == FF_SI_TYPE && mb_type)
4571 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4575 cbp= i_mb_type_info[mb_type].cbp;
4576 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4577 mb_type= i_mb_type_info[mb_type].type;
4581 mb_type |= MB_TYPE_INTERLACED;
4583 h->slice_table[ mb_xy ]= h->slice_num;
4585 if(IS_INTRA_PCM(mb_type)){
4588 // We assume these blocks are very rare so we do not optimize it.
4589 align_get_bits(&s->gb);
4591 // The pixels are stored in the same order as levels in h->mb array.
4592 for(y=0; y<16; y++){
4593 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4594 for(x=0; x<16; x++){
4595 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4596 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4600 const int index= 256 + 4*(y&3) + 32*(y>>2);
4602 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4603 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4607 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4609 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4610 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4614 // In deblocking, the quantizer is 0
4615 s->current_picture.qscale_table[mb_xy]= 0;
4616 // All coeffs are present
4617 memset(h->non_zero_count[mb_xy], 16, 16);
4619 s->current_picture.mb_type[mb_xy]= mb_type;
4624 h->ref_count[0] <<= 1;
4625 h->ref_count[1] <<= 1;
4628 fill_caches(h, mb_type, 0);
4631 if(IS_INTRA(mb_type)){
4633 // init_top_left_availability(h);
4634 if(IS_INTRA4x4(mb_type)){
4637 if(dct8x8_allowed && get_bits1(&s->gb)){
4638 mb_type |= MB_TYPE_8x8DCT;
4642 // fill_intra4x4_pred_table(h);
4643 for(i=0; i<16; i+=di){
4644 int mode= pred_intra_mode(h, i);
4646 if(!get_bits1(&s->gb)){
4647 const int rem_mode= get_bits(&s->gb, 3);
4648 mode = rem_mode + (rem_mode >= mode);
4652 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4654 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4656 write_back_intra_pred_mode(h);
4657 if( check_intra4x4_pred_mode(h) < 0)
4660 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4661 if(h->intra16x16_pred_mode < 0)
4665 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4668 h->chroma_pred_mode= pred_mode;
4669 }else if(partition_count==4){
4670 int i, j, sub_partition_count[4], list, ref[2][4];
4672 if(h->slice_type_nos == FF_B_TYPE){
4674 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4675 if(h->sub_mb_type[i] >=13){
4676 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4679 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4680 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4682 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4683 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4684 pred_direct_motion(h, &mb_type);
4685 h->ref_cache[0][scan8[4]] =
4686 h->ref_cache[1][scan8[4]] =
4687 h->ref_cache[0][scan8[12]] =
4688 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4691 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4693 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4694 if(h->sub_mb_type[i] >=4){
4695 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4698 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4699 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4703 for(list=0; list<h->list_count; list++){
4704 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4706 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4707 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4708 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4710 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4722 dct8x8_allowed = get_dct8x8_allowed(h);
4724 for(list=0; list<h->list_count; list++){
4726 if(IS_DIRECT(h->sub_mb_type[i])) {
4727 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4730 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4731 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4733 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4734 const int sub_mb_type= h->sub_mb_type[i];
4735 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4736 for(j=0; j<sub_partition_count[i]; j++){
4738 const int index= 4*i + block_width*j;
4739 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4740 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4741 mx += get_se_golomb(&s->gb);
4742 my += get_se_golomb(&s->gb);
4743 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4745 if(IS_SUB_8X8(sub_mb_type)){
4747 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4749 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4750 }else if(IS_SUB_8X4(sub_mb_type)){
4751 mv_cache[ 1 ][0]= mx;
4752 mv_cache[ 1 ][1]= my;
4753 }else if(IS_SUB_4X8(sub_mb_type)){
4754 mv_cache[ 8 ][0]= mx;
4755 mv_cache[ 8 ][1]= my;
4757 mv_cache[ 0 ][0]= mx;
4758 mv_cache[ 0 ][1]= my;
4761 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4767 }else if(IS_DIRECT(mb_type)){
4768 pred_direct_motion(h, &mb_type);
4769 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4771 int list, mx, my, i;
4772 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4773 if(IS_16X16(mb_type)){
4774 for(list=0; list<h->list_count; list++){
4776 if(IS_DIR(mb_type, 0, list)){
4777 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4778 if(val >= h->ref_count[list]){
4779 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4783 val= LIST_NOT_USED&0xFF;
4784 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4786 for(list=0; list<h->list_count; list++){
4788 if(IS_DIR(mb_type, 0, list)){
4789 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4790 mx += get_se_golomb(&s->gb);
4791 my += get_se_golomb(&s->gb);
4792 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4794 val= pack16to32(mx,my);
4797 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4800 else if(IS_16X8(mb_type)){
4801 for(list=0; list<h->list_count; list++){
4804 if(IS_DIR(mb_type, i, list)){
4805 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4806 if(val >= h->ref_count[list]){
4807 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4811 val= LIST_NOT_USED&0xFF;
4812 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4815 for(list=0; list<h->list_count; list++){
4818 if(IS_DIR(mb_type, i, list)){
4819 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4820 mx += get_se_golomb(&s->gb);
4821 my += get_se_golomb(&s->gb);
4822 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4824 val= pack16to32(mx,my);
4827 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4831 assert(IS_8X16(mb_type));
4832 for(list=0; list<h->list_count; list++){
4835 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4836 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4837 if(val >= h->ref_count[list]){
4838 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4842 val= LIST_NOT_USED&0xFF;
4843 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4846 for(list=0; list<h->list_count; list++){
4849 if(IS_DIR(mb_type, i, list)){
4850 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4851 mx += get_se_golomb(&s->gb);
4852 my += get_se_golomb(&s->gb);
4853 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4855 val= pack16to32(mx,my);
4858 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4864 if(IS_INTER(mb_type))
4865 write_back_motion(h, mb_type);
4867 if(!IS_INTRA16x16(mb_type)){
4868 cbp= get_ue_golomb(&s->gb);
4870 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4874 if(IS_INTRA4x4(mb_type))
4875 cbp= golomb_to_intra4x4_cbp[cbp];
4877 cbp= golomb_to_inter_cbp[cbp];
4881 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4882 if(get_bits1(&s->gb))
4883 mb_type |= MB_TYPE_8x8DCT;
4885 s->current_picture.mb_type[mb_xy]= mb_type;
4887 if(cbp || IS_INTRA16x16(mb_type)){
4888 int i8x8, i4x4, chroma_idx;
4890 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4891 const uint8_t *scan, *scan8x8, *dc_scan;
4893 // fill_non_zero_count_cache(h);
4895 if(IS_INTERLACED(mb_type)){
4896 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4897 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4898 dc_scan= luma_dc_field_scan;
4900 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4901 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4902 dc_scan= luma_dc_zigzag_scan;
4905 dquant= get_se_golomb(&s->gb);
4907 if( dquant > 25 || dquant < -26 ){
4908 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4912 s->qscale += dquant;
4913 if(((unsigned)s->qscale) > 51){
4914 if(s->qscale<0) s->qscale+= 52;
4915 else s->qscale-= 52;
4918 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4919 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4920 if(IS_INTRA16x16(mb_type)){
4921 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4922 return -1; //FIXME continue if partitioned and other return -1 too
4925 assert((cbp&15) == 0 || (cbp&15) == 15);
4928 for(i8x8=0; i8x8<4; i8x8++){
4929 for(i4x4=0; i4x4<4; i4x4++){
4930 const int index= i4x4 + 4*i8x8;
4931 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4937 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4940 for(i8x8=0; i8x8<4; i8x8++){
4941 if(cbp & (1<<i8x8)){
4942 if(IS_8x8DCT(mb_type)){
4943 DCTELEM *buf = &h->mb[64*i8x8];
4945 for(i4x4=0; i4x4<4; i4x4++){
4946 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4947 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4950 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4951 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4953 for(i4x4=0; i4x4<4; i4x4++){
4954 const int index= i4x4 + 4*i8x8;
4956 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4962 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4963 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4969 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4970 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4976 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4977 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4978 for(i4x4=0; i4x4<4; i4x4++){
4979 const int index= 16 + 4*chroma_idx + i4x4;
4980 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4986 uint8_t * const nnz= &h->non_zero_count_cache[0];
4987 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4988 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4991 uint8_t * const nnz= &h->non_zero_count_cache[0];
4992 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4993 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4994 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4996 s->current_picture.qscale_table[mb_xy]= s->qscale;
4997 write_back_non_zero_count(h);
5000 h->ref_count[0] >>= 1;
5001 h->ref_count[1] >>= 1;
5007 static int decode_cabac_field_decoding_flag(H264Context *h) {
5008 MpegEncContext * const s = &h->s;
5009 const int mb_x = s->mb_x;
5010 const int mb_y = s->mb_y & ~1;
5011 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5012 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5014 unsigned int ctx = 0;
5016 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5019 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5023 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5026 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5027 uint8_t *state= &h->cabac_state[ctx_base];
5031 MpegEncContext * const s = &h->s;
5032 const int mba_xy = h->left_mb_xy[0];
5033 const int mbb_xy = h->top_mb_xy;
5035 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5037 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5039 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5040 return 0; /* I4x4 */
5043 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5044 return 0; /* I4x4 */
5047 if( get_cabac_terminate( &h->cabac ) )
5048 return 25; /* PCM */
5050 mb_type = 1; /* I16x16 */
5051 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5052 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5053 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5054 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5055 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5059 static int decode_cabac_mb_type( H264Context *h ) {
5060 MpegEncContext * const s = &h->s;
5062 if( h->slice_type_nos == FF_I_TYPE ) {
5063 return decode_cabac_intra_mb_type(h, 3, 1);
5064 } else if( h->slice_type_nos == FF_P_TYPE ) {
5065 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5067 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5068 /* P_L0_D16x16, P_8x8 */
5069 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5071 /* P_L0_D8x16, P_L0_D16x8 */
5072 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5075 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5077 } else if( h->slice_type_nos == FF_B_TYPE ) {
5078 const int mba_xy = h->left_mb_xy[0];
5079 const int mbb_xy = h->top_mb_xy;
5083 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5085 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5088 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5089 return 0; /* B_Direct_16x16 */
5091 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5092 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5095 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5096 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5097 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5098 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5100 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5101 else if( bits == 13 ) {
5102 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5103 } else if( bits == 14 )
5104 return 11; /* B_L1_L0_8x16 */
5105 else if( bits == 15 )
5106 return 22; /* B_8x8 */
5108 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5109 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5111 /* TODO SI/SP frames? */
5116 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5117 MpegEncContext * const s = &h->s;
5121 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5122 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5125 && h->slice_table[mba_xy] == h->slice_num
5126 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5127 mba_xy += s->mb_stride;
5129 mbb_xy = mb_xy - s->mb_stride;
5131 && h->slice_table[mbb_xy] == h->slice_num
5132 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5133 mbb_xy -= s->mb_stride;
5135 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5137 int mb_xy = h->mb_xy;
5139 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5142 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5144 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5147 if( h->slice_type_nos == FF_B_TYPE )
5149 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5152 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5155 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5158 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5159 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5160 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5162 if( mode >= pred_mode )
5168 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5169 const int mba_xy = h->left_mb_xy[0];
5170 const int mbb_xy = h->top_mb_xy;
5174 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5175 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5178 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5181 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5184 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5186 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5192 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5193 int cbp_b, cbp_a, ctx, cbp = 0;
5195 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5196 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5198 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5199 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5200 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5201 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5202 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5203 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5204 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5205 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5208 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5212 cbp_a = (h->left_cbp>>4)&0x03;
5213 cbp_b = (h-> top_cbp>>4)&0x03;
5216 if( cbp_a > 0 ) ctx++;
5217 if( cbp_b > 0 ) ctx += 2;
5218 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5222 if( cbp_a == 2 ) ctx++;
5223 if( cbp_b == 2 ) ctx += 2;
5224 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5226 static int decode_cabac_mb_dqp( H264Context *h) {
5230 if( h->last_qscale_diff != 0 )
5233 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5239 if(val > 102) //prevent infinite loop
5246 return -(val + 1)/2;
5248 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5249 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5251 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5253 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5257 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5259 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5260 return 0; /* B_Direct_8x8 */
5261 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5262 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5264 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5265 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5266 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5269 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5270 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5274 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5275 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5278 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5279 int refa = h->ref_cache[list][scan8[n] - 1];
5280 int refb = h->ref_cache[list][scan8[n] - 8];
5284 if( h->slice_type_nos == FF_B_TYPE) {
5285 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5287 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5296 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5302 if(ref >= 32 /*h->ref_list[list]*/){
5303 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5304 return 0; //FIXME we should return -1 and check the return everywhere
5310 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5311 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5312 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5313 int ctxbase = (l == 0) ? 40 : 47;
5318 else if( amvd > 32 )
5323 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5328 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5336 while( get_cabac_bypass( &h->cabac ) ) {
5340 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5345 if( get_cabac_bypass( &h->cabac ) )
5349 return get_cabac_bypass_sign( &h->cabac, -mvd );
5352 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5358 nza = h->left_cbp&0x100;
5359 nzb = h-> top_cbp&0x100;
5361 nza = (h->left_cbp>>(6+idx))&0x01;
5362 nzb = (h-> top_cbp>>(6+idx))&0x01;
5366 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5367 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5369 assert(cat == 1 || cat == 2);
5370 nza = h->non_zero_count_cache[scan8[idx] - 1];
5371 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5381 return ctx + 4 * cat;
5384 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5385 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5386 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5387 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5388 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5391 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5392 static const int significant_coeff_flag_offset[2][6] = {
5393 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5394 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5396 static const int last_coeff_flag_offset[2][6] = {
5397 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5398 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5400 static const int coeff_abs_level_m1_offset[6] = {
5401 227+0, 227+10, 227+20, 227+30, 227+39, 426
5403 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5404 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5405 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5406 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5407 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5408 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5409 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5410 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5411 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5413 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5414 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5415 * map node ctx => cabac ctx for level=1 */
5416 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5417 /* map node ctx => cabac ctx for level>1 */
5418 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5419 static const uint8_t coeff_abs_level_transition[2][8] = {
5420 /* update node ctx after decoding a level=1 */
5421 { 1, 2, 3, 3, 4, 5, 6, 7 },
5422 /* update node ctx after decoding a level>1 */
5423 { 4, 4, 4, 4, 5, 6, 7, 7 }
5429 int coeff_count = 0;
5432 uint8_t *significant_coeff_ctx_base;
5433 uint8_t *last_coeff_ctx_base;
5434 uint8_t *abs_level_m1_ctx_base;
5437 #define CABAC_ON_STACK
5439 #ifdef CABAC_ON_STACK
5442 cc.range = h->cabac.range;
5443 cc.low = h->cabac.low;
5444 cc.bytestream= h->cabac.bytestream;
5446 #define CC &h->cabac
5450 /* cat: 0-> DC 16x16 n = 0
5451 * 1-> AC 16x16 n = luma4x4idx
5452 * 2-> Luma4x4 n = luma4x4idx
5453 * 3-> DC Chroma n = iCbCr
5454 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5455 * 5-> Luma8x8 n = 4 * luma8x8idx
5458 /* read coded block flag */
5459 if( is_dc || cat != 5 ) {
5460 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5463 h->non_zero_count_cache[scan8[16+n]] = 0;
5465 h->non_zero_count_cache[scan8[n]] = 0;
5468 #ifdef CABAC_ON_STACK
5469 h->cabac.range = cc.range ;
5470 h->cabac.low = cc.low ;
5471 h->cabac.bytestream= cc.bytestream;
5477 significant_coeff_ctx_base = h->cabac_state
5478 + significant_coeff_flag_offset[MB_FIELD][cat];
5479 last_coeff_ctx_base = h->cabac_state
5480 + last_coeff_flag_offset[MB_FIELD][cat];
5481 abs_level_m1_ctx_base = h->cabac_state
5482 + coeff_abs_level_m1_offset[cat];
5484 if( !is_dc && cat == 5 ) {
5485 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5486 for(last= 0; last < coefs; last++) { \
5487 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5488 if( get_cabac( CC, sig_ctx )) { \
5489 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5490 index[coeff_count++] = last; \
5491 if( get_cabac( CC, last_ctx ) ) { \
5497 if( last == max_coeff -1 ) {\
5498 index[coeff_count++] = last;\
5500 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5501 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5502 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5504 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5506 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5508 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5511 assert(coeff_count > 0);
5515 h->cbp_table[h->mb_xy] |= 0x100;
5517 h->cbp_table[h->mb_xy] |= 0x40 << n;
5520 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5522 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5524 assert( cat == 1 || cat == 2 );
5525 h->non_zero_count_cache[scan8[n]] = coeff_count;
5529 while( coeff_count-- ) {
5530 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5532 int j= scantable[index[coeff_count]];
5534 if( get_cabac( CC, ctx ) == 0 ) {
5535 node_ctx = coeff_abs_level_transition[0][node_ctx];
5537 block[j] = get_cabac_bypass_sign( CC, -1);
5539 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5543 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5544 node_ctx = coeff_abs_level_transition[1][node_ctx];
5546 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5550 if( coeff_abs >= 15 ) {
5552 while( get_cabac_bypass( CC ) ) {
5558 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5564 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5566 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5570 #ifdef CABAC_ON_STACK
5571 h->cabac.range = cc.range ;
5572 h->cabac.low = cc.low ;
5573 h->cabac.bytestream= cc.bytestream;
5578 #ifndef CONFIG_SMALL
5579 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5580 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5583 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5584 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5588 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5590 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5592 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5593 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5597 static inline void compute_mb_neighbors(H264Context *h)
5599 MpegEncContext * const s = &h->s;
5600 const int mb_xy = h->mb_xy;
5601 h->top_mb_xy = mb_xy - s->mb_stride;
5602 h->left_mb_xy[0] = mb_xy - 1;
5604 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5605 const int top_pair_xy = pair_xy - s->mb_stride;
5606 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5607 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5608 const int curr_mb_frame_flag = !MB_FIELD;
5609 const int bottom = (s->mb_y & 1);
5611 ? !curr_mb_frame_flag // bottom macroblock
5612 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5614 h->top_mb_xy -= s->mb_stride;
5616 if (left_mb_frame_flag != curr_mb_frame_flag) {
5617 h->left_mb_xy[0] = pair_xy - 1;
5619 } else if (FIELD_PICTURE) {
5620 h->top_mb_xy -= s->mb_stride;
5626 * decodes a macroblock
5627 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5629 static int decode_mb_cabac(H264Context *h) {
5630 MpegEncContext * const s = &h->s;
5632 int mb_type, partition_count, cbp = 0;
5633 int dct8x8_allowed= h->pps.transform_8x8_mode;
5635 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5637 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5639 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5640 if( h->slice_type_nos != FF_I_TYPE ) {
5642 /* a skipped mb needs the aff flag from the following mb */
5643 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5644 predict_field_decoding_flag(h);
5645 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5646 skip = h->next_mb_skipped;
5648 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5649 /* read skip flags */
5651 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5652 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5653 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5654 if(h->next_mb_skipped)
5655 predict_field_decoding_flag(h);
5657 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5662 h->cbp_table[mb_xy] = 0;
5663 h->chroma_pred_mode_table[mb_xy] = 0;
5664 h->last_qscale_diff = 0;
5671 if( (s->mb_y&1) == 0 )
5673 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5675 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5677 h->prev_mb_skipped = 0;
5679 compute_mb_neighbors(h);
5680 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5681 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5685 if( h->slice_type_nos == FF_B_TYPE ) {
5687 partition_count= b_mb_type_info[mb_type].partition_count;
5688 mb_type= b_mb_type_info[mb_type].type;
5691 goto decode_intra_mb;
5693 } else if( h->slice_type_nos == FF_P_TYPE ) {
5695 partition_count= p_mb_type_info[mb_type].partition_count;
5696 mb_type= p_mb_type_info[mb_type].type;
5699 goto decode_intra_mb;
5702 if(h->slice_type == FF_SI_TYPE && mb_type)
5704 assert(h->slice_type_nos == FF_I_TYPE);
5706 partition_count = 0;
5707 cbp= i_mb_type_info[mb_type].cbp;
5708 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5709 mb_type= i_mb_type_info[mb_type].type;
5712 mb_type |= MB_TYPE_INTERLACED;
5714 h->slice_table[ mb_xy ]= h->slice_num;
5716 if(IS_INTRA_PCM(mb_type)) {
5720 // We assume these blocks are very rare so we do not optimize it.
5721 // FIXME The two following lines get the bitstream position in the cabac
5722 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5723 ptr= h->cabac.bytestream;
5724 if(h->cabac.low&0x1) ptr--;
5726 if(h->cabac.low&0x1FF) ptr--;
5729 // The pixels are stored in the same order as levels in h->mb array.
5730 for(y=0; y<16; y++){
5731 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5732 for(x=0; x<16; x++){
5733 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5734 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5738 const int index= 256 + 4*(y&3) + 32*(y>>2);
5740 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5741 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5745 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5747 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5748 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5752 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5754 // All blocks are present
5755 h->cbp_table[mb_xy] = 0x1ef;
5756 h->chroma_pred_mode_table[mb_xy] = 0;
5757 // In deblocking, the quantizer is 0
5758 s->current_picture.qscale_table[mb_xy]= 0;
5759 // All coeffs are present
5760 memset(h->non_zero_count[mb_xy], 16, 16);
5761 s->current_picture.mb_type[mb_xy]= mb_type;
5762 h->last_qscale_diff = 0;
5767 h->ref_count[0] <<= 1;
5768 h->ref_count[1] <<= 1;
5771 fill_caches(h, mb_type, 0);
5773 if( IS_INTRA( mb_type ) ) {
5775 if( IS_INTRA4x4( mb_type ) ) {
5776 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5777 mb_type |= MB_TYPE_8x8DCT;
5778 for( i = 0; i < 16; i+=4 ) {
5779 int pred = pred_intra_mode( h, i );
5780 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5781 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5784 for( i = 0; i < 16; i++ ) {
5785 int pred = pred_intra_mode( h, i );
5786 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5788 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5791 write_back_intra_pred_mode(h);
5792 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5794 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5795 if( h->intra16x16_pred_mode < 0 ) return -1;
5797 h->chroma_pred_mode_table[mb_xy] =
5798 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5800 pred_mode= check_intra_pred_mode( h, pred_mode );
5801 if( pred_mode < 0 ) return -1;
5802 h->chroma_pred_mode= pred_mode;
5803 } else if( partition_count == 4 ) {
5804 int i, j, sub_partition_count[4], list, ref[2][4];
5806 if( h->slice_type_nos == FF_B_TYPE ) {
5807 for( i = 0; i < 4; i++ ) {
5808 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5809 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5810 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5812 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5813 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5814 pred_direct_motion(h, &mb_type);
5815 h->ref_cache[0][scan8[4]] =
5816 h->ref_cache[1][scan8[4]] =
5817 h->ref_cache[0][scan8[12]] =
5818 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5819 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5820 for( i = 0; i < 4; i++ )
5821 if( IS_DIRECT(h->sub_mb_type[i]) )
5822 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5826 for( i = 0; i < 4; i++ ) {
5827 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5828 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5829 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5833 for( list = 0; list < h->list_count; list++ ) {
5834 for( i = 0; i < 4; i++ ) {
5835 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5836 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5837 if( h->ref_count[list] > 1 )
5838 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5844 h->ref_cache[list][ scan8[4*i]+1 ]=
5845 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5850 dct8x8_allowed = get_dct8x8_allowed(h);
5852 for(list=0; list<h->list_count; list++){
5854 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5855 if(IS_DIRECT(h->sub_mb_type[i])){
5856 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5860 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5861 const int sub_mb_type= h->sub_mb_type[i];
5862 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5863 for(j=0; j<sub_partition_count[i]; j++){
5866 const int index= 4*i + block_width*j;
5867 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5868 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5869 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5871 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5872 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5873 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5875 if(IS_SUB_8X8(sub_mb_type)){
5877 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5879 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5882 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5884 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5885 }else if(IS_SUB_8X4(sub_mb_type)){
5886 mv_cache[ 1 ][0]= mx;
5887 mv_cache[ 1 ][1]= my;
5889 mvd_cache[ 1 ][0]= mx - mpx;
5890 mvd_cache[ 1 ][1]= my - mpy;
5891 }else if(IS_SUB_4X8(sub_mb_type)){
5892 mv_cache[ 8 ][0]= mx;
5893 mv_cache[ 8 ][1]= my;
5895 mvd_cache[ 8 ][0]= mx - mpx;
5896 mvd_cache[ 8 ][1]= my - mpy;
5898 mv_cache[ 0 ][0]= mx;
5899 mv_cache[ 0 ][1]= my;
5901 mvd_cache[ 0 ][0]= mx - mpx;
5902 mvd_cache[ 0 ][1]= my - mpy;
5905 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5906 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5907 p[0] = p[1] = p[8] = p[9] = 0;
5908 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5912 } else if( IS_DIRECT(mb_type) ) {
5913 pred_direct_motion(h, &mb_type);
5914 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5915 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5916 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5918 int list, mx, my, i, mpx, mpy;
5919 if(IS_16X16(mb_type)){
5920 for(list=0; list<h->list_count; list++){
5921 if(IS_DIR(mb_type, 0, list)){
5922 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5923 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5925 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5927 for(list=0; list<h->list_count; list++){
5928 if(IS_DIR(mb_type, 0, list)){
5929 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5931 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5932 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5933 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5935 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5936 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5938 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5941 else if(IS_16X8(mb_type)){
5942 for(list=0; list<h->list_count; list++){
5944 if(IS_DIR(mb_type, i, list)){
5945 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5946 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5948 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5951 for(list=0; list<h->list_count; list++){
5953 if(IS_DIR(mb_type, i, list)){
5954 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5955 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5956 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5957 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5959 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5960 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5962 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5963 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5968 assert(IS_8X16(mb_type));
5969 for(list=0; list<h->list_count; list++){
5971 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5972 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5973 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5975 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5978 for(list=0; list<h->list_count; list++){
5980 if(IS_DIR(mb_type, i, list)){
5981 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5982 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5983 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5985 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5986 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5987 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5989 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5990 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5997 if( IS_INTER( mb_type ) ) {
5998 h->chroma_pred_mode_table[mb_xy] = 0;
5999 write_back_motion( h, mb_type );
6002 if( !IS_INTRA16x16( mb_type ) ) {
6003 cbp = decode_cabac_mb_cbp_luma( h );
6004 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6007 h->cbp_table[mb_xy] = h->cbp = cbp;
6009 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6010 if( decode_cabac_mb_transform_size( h ) )
6011 mb_type |= MB_TYPE_8x8DCT;
6013 s->current_picture.mb_type[mb_xy]= mb_type;
6015 if( cbp || IS_INTRA16x16( mb_type ) ) {
6016 const uint8_t *scan, *scan8x8, *dc_scan;
6017 const uint32_t *qmul;
6020 if(IS_INTERLACED(mb_type)){
6021 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6022 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6023 dc_scan= luma_dc_field_scan;
6025 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6026 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6027 dc_scan= luma_dc_zigzag_scan;
6030 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6031 if( dqp == INT_MIN ){
6032 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6036 if(((unsigned)s->qscale) > 51){
6037 if(s->qscale<0) s->qscale+= 52;
6038 else s->qscale-= 52;
6040 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6041 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6043 if( IS_INTRA16x16( mb_type ) ) {
6045 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6046 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6049 qmul = h->dequant4_coeff[0][s->qscale];
6050 for( i = 0; i < 16; i++ ) {
6051 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6052 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6055 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6059 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6060 if( cbp & (1<<i8x8) ) {
6061 if( IS_8x8DCT(mb_type) ) {
6062 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6063 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6065 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6066 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6067 const int index = 4*i8x8 + i4x4;
6068 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6070 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6071 //STOP_TIMER("decode_residual")
6075 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6076 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6083 for( c = 0; c < 2; c++ ) {
6084 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6085 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6091 for( c = 0; c < 2; c++ ) {
6092 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6093 for( i = 0; i < 4; i++ ) {
6094 const int index = 16 + 4 * c + i;
6095 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6096 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6100 uint8_t * const nnz= &h->non_zero_count_cache[0];
6101 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6102 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6105 uint8_t * const nnz= &h->non_zero_count_cache[0];
6106 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6107 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6108 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6109 h->last_qscale_diff = 0;
6112 s->current_picture.qscale_table[mb_xy]= s->qscale;
6113 write_back_non_zero_count(h);
6116 h->ref_count[0] >>= 1;
6117 h->ref_count[1] >>= 1;
6124 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6126 const int index_a = qp + h->slice_alpha_c0_offset;
6127 const int alpha = (alpha_table+52)[index_a];
6128 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6133 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6134 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6136 /* 16px edge length, because bS=4 is triggered by being at
6137 * the edge of an intra MB, so all 4 bS are the same */
6138 for( d = 0; d < 16; d++ ) {
6139 const int p0 = pix[-1];
6140 const int p1 = pix[-2];
6141 const int p2 = pix[-3];
6143 const int q0 = pix[0];
6144 const int q1 = pix[1];
6145 const int q2 = pix[2];
6147 if( FFABS( p0 - q0 ) < alpha &&
6148 FFABS( p1 - p0 ) < beta &&
6149 FFABS( q1 - q0 ) < beta ) {
6151 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6152 if( FFABS( p2 - p0 ) < beta)
6154 const int p3 = pix[-4];
6156 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6157 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6158 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6161 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6163 if( FFABS( q2 - q0 ) < beta)
6165 const int q3 = pix[3];
6167 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6168 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6169 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6172 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6176 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6177 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6179 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6185 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6187 const int index_a = qp + h->slice_alpha_c0_offset;
6188 const int alpha = (alpha_table+52)[index_a];
6189 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6194 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6195 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6197 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6201 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6203 for( i = 0; i < 16; i++, pix += stride) {
6209 int bS_index = (i >> 1);
6212 bS_index |= (i & 1);
6215 if( bS[bS_index] == 0 ) {
6219 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6220 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6221 alpha = (alpha_table+52)[index_a];
6222 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6224 if( bS[bS_index] < 4 ) {
6225 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6226 const int p0 = pix[-1];
6227 const int p1 = pix[-2];
6228 const int p2 = pix[-3];
6229 const int q0 = pix[0];
6230 const int q1 = pix[1];
6231 const int q2 = pix[2];
6233 if( FFABS( p0 - q0 ) < alpha &&
6234 FFABS( p1 - p0 ) < beta &&
6235 FFABS( q1 - q0 ) < beta ) {
6239 if( FFABS( p2 - p0 ) < beta ) {
6240 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6243 if( FFABS( q2 - q0 ) < beta ) {
6244 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6248 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6249 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6250 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6251 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6254 const int p0 = pix[-1];
6255 const int p1 = pix[-2];
6256 const int p2 = pix[-3];
6258 const int q0 = pix[0];
6259 const int q1 = pix[1];
6260 const int q2 = pix[2];
6262 if( FFABS( p0 - q0 ) < alpha &&
6263 FFABS( p1 - p0 ) < beta &&
6264 FFABS( q1 - q0 ) < beta ) {
6266 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6267 if( FFABS( p2 - p0 ) < beta)
6269 const int p3 = pix[-4];
6271 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6272 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6273 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6276 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6278 if( FFABS( q2 - q0 ) < beta)
6280 const int q3 = pix[3];
6282 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6283 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6284 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6287 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6291 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6292 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6294 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6299 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6301 for( i = 0; i < 8; i++, pix += stride) {
6309 if( bS[bS_index] == 0 ) {
6313 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6314 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6315 alpha = (alpha_table+52)[index_a];
6316 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6318 if( bS[bS_index] < 4 ) {
6319 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6320 const int p0 = pix[-1];
6321 const int p1 = pix[-2];
6322 const int q0 = pix[0];
6323 const int q1 = pix[1];
6325 if( FFABS( p0 - q0 ) < alpha &&
6326 FFABS( p1 - p0 ) < beta &&
6327 FFABS( q1 - q0 ) < beta ) {
6328 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6330 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6331 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6332 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6335 const int p0 = pix[-1];
6336 const int p1 = pix[-2];
6337 const int q0 = pix[0];
6338 const int q1 = pix[1];
6340 if( FFABS( p0 - q0 ) < alpha &&
6341 FFABS( p1 - p0 ) < beta &&
6342 FFABS( q1 - q0 ) < beta ) {
6344 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6345 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6346 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6352 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6354 const int index_a = qp + h->slice_alpha_c0_offset;
6355 const int alpha = (alpha_table+52)[index_a];
6356 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6357 const int pix_next = stride;
6362 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6363 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6365 /* 16px edge length, see filter_mb_edgev */
6366 for( d = 0; d < 16; d++ ) {
6367 const int p0 = pix[-1*pix_next];
6368 const int p1 = pix[-2*pix_next];
6369 const int p2 = pix[-3*pix_next];
6370 const int q0 = pix[0];
6371 const int q1 = pix[1*pix_next];
6372 const int q2 = pix[2*pix_next];
6374 if( FFABS( p0 - q0 ) < alpha &&
6375 FFABS( p1 - p0 ) < beta &&
6376 FFABS( q1 - q0 ) < beta ) {
6378 const int p3 = pix[-4*pix_next];
6379 const int q3 = pix[ 3*pix_next];
6381 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6382 if( FFABS( p2 - p0 ) < beta) {
6384 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6385 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6386 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6389 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6391 if( FFABS( q2 - q0 ) < beta) {
6393 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6394 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6395 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6398 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6402 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6403 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6405 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6412 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6414 const int index_a = qp + h->slice_alpha_c0_offset;
6415 const int alpha = (alpha_table+52)[index_a];
6416 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6421 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6422 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6424 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6428 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6429 MpegEncContext * const s = &h->s;
6430 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6432 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6436 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6438 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6439 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6440 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6443 assert(!FRAME_MBAFF);
6445 mb_type = s->current_picture.mb_type[mb_xy];
6446 qp = s->current_picture.qscale_table[mb_xy];
6447 qp0 = s->current_picture.qscale_table[mb_xy-1];
6448 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6449 qpc = get_chroma_qp( h, 0, qp );
6450 qpc0 = get_chroma_qp( h, 0, qp0 );
6451 qpc1 = get_chroma_qp( h, 0, qp1 );
6452 qp0 = (qp + qp0 + 1) >> 1;
6453 qp1 = (qp + qp1 + 1) >> 1;
6454 qpc0 = (qpc + qpc0 + 1) >> 1;
6455 qpc1 = (qpc + qpc1 + 1) >> 1;
6456 qp_thresh = 15 - h->slice_alpha_c0_offset;
6457 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6458 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6461 if( IS_INTRA(mb_type) ) {
6462 int16_t bS4[4] = {4,4,4,4};
6463 int16_t bS3[4] = {3,3,3,3};
6464 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6465 if( IS_8x8DCT(mb_type) ) {
6466 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6467 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6468 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6469 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6471 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6472 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6473 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6474 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6475 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6476 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6477 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6478 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6480 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6481 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6482 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6483 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6484 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6485 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6486 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6487 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6490 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6491 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6493 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6495 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6497 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6498 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6499 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6500 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6502 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6503 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6504 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6505 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6507 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6508 bSv[0][0] = 0x0004000400040004ULL;
6509 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6510 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6512 #define FILTER(hv,dir,edge)\
6513 if(bSv[dir][edge]) {\
6514 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6516 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6517 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6523 } else if( IS_8x8DCT(mb_type) ) {
6542 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6543 MpegEncContext * const s = &h->s;
6544 const int mb_xy= mb_x + mb_y*s->mb_stride;
6545 const int mb_type = s->current_picture.mb_type[mb_xy];
6546 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6547 int first_vertical_edge_done = 0;
6550 //for sufficiently low qp, filtering wouldn't do anything
6551 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6553 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6554 int qp = s->current_picture.qscale_table[mb_xy];
6556 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6557 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6563 // left mb is in picture
6564 && h->slice_table[mb_xy-1] != 255
6565 // and current and left pair do not have the same interlaced type
6566 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6567 // and left mb is in the same slice if deblocking_filter == 2
6568 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6569 /* First vertical edge is different in MBAFF frames
6570 * There are 8 different bS to compute and 2 different Qp
6572 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6573 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6578 int mb_qp, mbn0_qp, mbn1_qp;
6580 first_vertical_edge_done = 1;
6582 if( IS_INTRA(mb_type) )
6583 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6585 for( i = 0; i < 8; i++ ) {
6586 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6588 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6590 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6591 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6592 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6599 mb_qp = s->current_picture.qscale_table[mb_xy];
6600 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6601 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6602 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6603 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6604 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6605 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6606 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6607 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6608 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6609 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6610 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6611 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6614 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6615 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6616 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6617 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6618 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6620 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6621 for( dir = 0; dir < 2; dir++ )
6624 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6625 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6626 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6627 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6628 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6630 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6631 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6632 // how often to recheck mv-based bS when iterating between edges
6633 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6634 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6635 // how often to recheck mv-based bS when iterating along each edge
6636 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6638 if (first_vertical_edge_done) {
6640 first_vertical_edge_done = 0;
6643 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6646 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6647 && !IS_INTERLACED(mb_type)
6648 && IS_INTERLACED(mbm_type)
6650 // This is a special case in the norm where the filtering must
6651 // be done twice (one each of the field) even if we are in a
6652 // frame macroblock.
6654 static const int nnz_idx[4] = {4,5,6,3};
6655 unsigned int tmp_linesize = 2 * linesize;
6656 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6657 int mbn_xy = mb_xy - 2 * s->mb_stride;
6662 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6663 if( IS_INTRA(mb_type) ||
6664 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6665 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6667 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6668 for( i = 0; i < 4; i++ ) {
6669 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6670 mbn_nnz[nnz_idx[i]] != 0 )
6676 // Do not use s->qscale as luma quantizer because it has not the same
6677 // value in IPCM macroblocks.
6678 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6679 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6680 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6681 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6682 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6683 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6684 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6685 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6692 for( edge = start; edge < edges; edge++ ) {
6693 /* mbn_xy: neighbor macroblock */
6694 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6695 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6696 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6700 if( (edge&1) && IS_8x8DCT(mb_type) )
6703 if( IS_INTRA(mb_type) ||
6704 IS_INTRA(mbn_type) ) {
6707 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6708 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6717 bS[0] = bS[1] = bS[2] = bS[3] = value;
6722 if( edge & mask_edge ) {
6723 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6726 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6727 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6730 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6731 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6732 int bn_idx= b_idx - (dir ? 8:1);
6734 int xn= h->slice_type_nos == FF_B_TYPE && ref2frm[0][h->ref_cache[0][b_idx]+2] != ref2frmn[0][h->ref_cache[0][bn_idx]+2];
6736 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6738 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6739 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6740 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6742 bS[0] = bS[1] = bS[2] = bS[3] = v;
6748 for( i = 0; i < 4; i++ ) {
6749 int x = dir == 0 ? edge : i;
6750 int y = dir == 0 ? i : edge;
6751 int b_idx= 8 + 4 + x + 8*y;
6752 int bn_idx= b_idx - (dir ? 8:1);
6754 if( h->non_zero_count_cache[b_idx] != 0 ||
6755 h->non_zero_count_cache[bn_idx] != 0 ) {
6760 int xn= h->slice_type_nos == FF_B_TYPE && ref2frm[0][h->ref_cache[0][b_idx]+2] != ref2frmn[0][h->ref_cache[0][bn_idx]+2];
6762 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6764 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6765 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6766 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6774 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6779 // Do not use s->qscale as luma quantizer because it has not the same
6780 // value in IPCM macroblocks.
6781 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6782 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6783 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6784 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6786 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6787 if( (edge&1) == 0 ) {
6788 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6789 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6790 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6791 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6794 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6795 if( (edge&1) == 0 ) {
6796 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6797 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6798 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6799 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6806 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6807 MpegEncContext * const s = &h->s;
6808 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6812 if( h->pps.cabac ) {
6816 align_get_bits( &s->gb );
6819 ff_init_cabac_states( &h->cabac);
6820 ff_init_cabac_decoder( &h->cabac,
6821 s->gb.buffer + get_bits_count(&s->gb)/8,
6822 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6823 /* calculate pre-state */
6824 for( i= 0; i < 460; i++ ) {
6826 if( h->slice_type_nos == FF_I_TYPE )
6827 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6829 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6832 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6834 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6839 int ret = decode_mb_cabac(h);
6841 //STOP_TIMER("decode_mb_cabac")
6843 if(ret>=0) hl_decode_mb(h);
6845 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6848 if(ret>=0) ret = decode_mb_cabac(h);
6850 if(ret>=0) hl_decode_mb(h);
6853 eos = get_cabac_terminate( &h->cabac );
6855 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6856 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6857 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6861 if( ++s->mb_x >= s->mb_width ) {
6863 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6865 if(FIELD_OR_MBAFF_PICTURE) {
6870 if( eos || s->mb_y >= s->mb_height ) {
6871 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6872 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6879 int ret = decode_mb_cavlc(h);
6881 if(ret>=0) hl_decode_mb(h);
6883 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6885 ret = decode_mb_cavlc(h);
6887 if(ret>=0) hl_decode_mb(h);
6892 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6893 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6898 if(++s->mb_x >= s->mb_width){
6900 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6902 if(FIELD_OR_MBAFF_PICTURE) {
6905 if(s->mb_y >= s->mb_height){
6906 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6908 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6909 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6913 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6920 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6921 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6922 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6923 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6927 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6936 for(;s->mb_y < s->mb_height; s->mb_y++){
6937 for(;s->mb_x < s->mb_width; s->mb_x++){
6938 int ret= decode_mb(h);
6943 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6944 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6949 if(++s->mb_x >= s->mb_width){
6951 if(++s->mb_y >= s->mb_height){
6952 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6953 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6957 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6964 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6965 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6966 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6970 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6977 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6980 return -1; //not reached
6983 static int decode_unregistered_user_data(H264Context *h, int size){
6984 MpegEncContext * const s = &h->s;
6985 uint8_t user_data[16+256];
6991 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6992 user_data[i]= get_bits(&s->gb, 8);
6996 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6997 if(e==1 && build>=0)
6998 h->x264_build= build;
7000 if(s->avctx->debug & FF_DEBUG_BUGS)
7001 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7004 skip_bits(&s->gb, 8);
7009 static int decode_sei(H264Context *h){
7010 MpegEncContext * const s = &h->s;
7012 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7017 type+= show_bits(&s->gb, 8);
7018 }while(get_bits(&s->gb, 8) == 255);
7022 size+= show_bits(&s->gb, 8);
7023 }while(get_bits(&s->gb, 8) == 255);
7027 if(decode_unregistered_user_data(h, size) < 0)
7031 skip_bits(&s->gb, 8*size);
7034 //FIXME check bits here
7035 align_get_bits(&s->gb);
7041 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7042 MpegEncContext * const s = &h->s;
7044 cpb_count = get_ue_golomb(&s->gb) + 1;
7045 get_bits(&s->gb, 4); /* bit_rate_scale */
7046 get_bits(&s->gb, 4); /* cpb_size_scale */
7047 for(i=0; i<cpb_count; i++){
7048 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7049 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7050 get_bits1(&s->gb); /* cbr_flag */
7052 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7053 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7054 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7055 get_bits(&s->gb, 5); /* time_offset_length */
7058 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7059 MpegEncContext * const s = &h->s;
7060 int aspect_ratio_info_present_flag;
7061 unsigned int aspect_ratio_idc;
7062 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7064 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7066 if( aspect_ratio_info_present_flag ) {
7067 aspect_ratio_idc= get_bits(&s->gb, 8);
7068 if( aspect_ratio_idc == EXTENDED_SAR ) {
7069 sps->sar.num= get_bits(&s->gb, 16);
7070 sps->sar.den= get_bits(&s->gb, 16);
7071 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7072 sps->sar= pixel_aspect[aspect_ratio_idc];
7074 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7081 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7083 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7084 get_bits1(&s->gb); /* overscan_appropriate_flag */
7087 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7088 get_bits(&s->gb, 3); /* video_format */
7089 get_bits1(&s->gb); /* video_full_range_flag */
7090 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7091 get_bits(&s->gb, 8); /* colour_primaries */
7092 get_bits(&s->gb, 8); /* transfer_characteristics */
7093 get_bits(&s->gb, 8); /* matrix_coefficients */
7097 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7098 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7099 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7102 sps->timing_info_present_flag = get_bits1(&s->gb);
7103 if(sps->timing_info_present_flag){
7104 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7105 sps->time_scale = get_bits_long(&s->gb, 32);
7106 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7109 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7110 if(nal_hrd_parameters_present_flag)
7111 decode_hrd_parameters(h, sps);
7112 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7113 if(vcl_hrd_parameters_present_flag)
7114 decode_hrd_parameters(h, sps);
7115 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7116 get_bits1(&s->gb); /* low_delay_hrd_flag */
7117 get_bits1(&s->gb); /* pic_struct_present_flag */
7119 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7120 if(sps->bitstream_restriction_flag){
7121 unsigned int num_reorder_frames;
7122 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7123 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7124 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7125 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7126 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7127 num_reorder_frames= get_ue_golomb(&s->gb);
7128 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7130 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7131 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7135 sps->num_reorder_frames= num_reorder_frames;
7141 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7142 const uint8_t *jvt_list, const uint8_t *fallback_list){
7143 MpegEncContext * const s = &h->s;
7144 int i, last = 8, next = 8;
7145 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7146 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7147 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7149 for(i=0;i<size;i++){
7151 next = (last + get_se_golomb(&s->gb)) & 0xff;
7152 if(!i && !next){ /* matrix not written, we use the preset one */
7153 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7156 last = factors[scan[i]] = next ? next : last;
7160 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7161 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7162 MpegEncContext * const s = &h->s;
7163 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7164 const uint8_t *fallback[4] = {
7165 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7166 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7167 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7168 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7170 if(get_bits1(&s->gb)){
7171 sps->scaling_matrix_present |= is_sps;
7172 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7173 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7174 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7175 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7176 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7177 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7178 if(is_sps || pps->transform_8x8_mode){
7179 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7180 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7182 } else if(fallback_sps) {
7183 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7184 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7189 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7192 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7193 const size_t size, const char *name)
7196 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7201 vec[id] = av_mallocz(size);
7203 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7208 static inline int decode_seq_parameter_set(H264Context *h){
7209 MpegEncContext * const s = &h->s;
7210 int profile_idc, level_idc;
7211 unsigned int sps_id, tmp, mb_width, mb_height;
7215 profile_idc= get_bits(&s->gb, 8);
7216 get_bits1(&s->gb); //constraint_set0_flag
7217 get_bits1(&s->gb); //constraint_set1_flag
7218 get_bits1(&s->gb); //constraint_set2_flag
7219 get_bits1(&s->gb); //constraint_set3_flag
7220 get_bits(&s->gb, 4); // reserved
7221 level_idc= get_bits(&s->gb, 8);
7222 sps_id= get_ue_golomb(&s->gb);
7224 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7228 sps->profile_idc= profile_idc;
7229 sps->level_idc= level_idc;
7231 if(sps->profile_idc >= 100){ //high profile
7232 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7233 get_bits1(&s->gb); //residual_color_transform_flag
7234 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7235 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7236 sps->transform_bypass = get_bits1(&s->gb);
7237 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7239 sps->scaling_matrix_present = 0;
7241 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7242 sps->poc_type= get_ue_golomb(&s->gb);
7244 if(sps->poc_type == 0){ //FIXME #define
7245 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7246 } else if(sps->poc_type == 1){//FIXME #define
7247 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7248 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7249 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7250 tmp= get_ue_golomb(&s->gb);
7252 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7253 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7256 sps->poc_cycle_length= tmp;
7258 for(i=0; i<sps->poc_cycle_length; i++)
7259 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7260 }else if(sps->poc_type != 2){
7261 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7265 tmp= get_ue_golomb(&s->gb);
7266 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7267 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7270 sps->ref_frame_count= tmp;
7271 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7272 mb_width= get_ue_golomb(&s->gb) + 1;
7273 mb_height= get_ue_golomb(&s->gb) + 1;
7274 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7275 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7276 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7279 sps->mb_width = mb_width;
7280 sps->mb_height= mb_height;
7282 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7283 if(!sps->frame_mbs_only_flag)
7284 sps->mb_aff= get_bits1(&s->gb);
7288 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7290 #ifndef ALLOW_INTERLACE
7292 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7294 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7295 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7297 sps->crop= get_bits1(&s->gb);
7299 sps->crop_left = get_ue_golomb(&s->gb);
7300 sps->crop_right = get_ue_golomb(&s->gb);
7301 sps->crop_top = get_ue_golomb(&s->gb);
7302 sps->crop_bottom= get_ue_golomb(&s->gb);
7303 if(sps->crop_left || sps->crop_top){
7304 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7306 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7307 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7313 sps->crop_bottom= 0;
7316 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7317 if( sps->vui_parameters_present_flag )
7318 decode_vui_parameters(h, sps);
7320 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7321 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7322 sps_id, sps->profile_idc, sps->level_idc,
7324 sps->ref_frame_count,
7325 sps->mb_width, sps->mb_height,
7326 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7327 sps->direct_8x8_inference_flag ? "8B8" : "",
7328 sps->crop_left, sps->crop_right,
7329 sps->crop_top, sps->crop_bottom,
7330 sps->vui_parameters_present_flag ? "VUI" : ""
7337 build_qp_table(PPS *pps, int t, int index)
7340 for(i = 0; i < 255; i++)
7341 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7344 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7345 MpegEncContext * const s = &h->s;
7346 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7349 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7353 tmp= get_ue_golomb(&s->gb);
7354 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7355 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7360 pps->cabac= get_bits1(&s->gb);
7361 pps->pic_order_present= get_bits1(&s->gb);
7362 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7363 if(pps->slice_group_count > 1 ){
7364 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7365 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7366 switch(pps->mb_slice_group_map_type){
7369 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7370 | run_length[ i ] |1 |ue(v) |
7375 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7377 | top_left_mb[ i ] |1 |ue(v) |
7378 | bottom_right_mb[ i ] |1 |ue(v) |
7386 | slice_group_change_direction_flag |1 |u(1) |
7387 | slice_group_change_rate_minus1 |1 |ue(v) |
7392 | slice_group_id_cnt_minus1 |1 |ue(v) |
7393 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7395 | slice_group_id[ i ] |1 |u(v) |
7400 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7401 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7402 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7403 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7404 pps->ref_count[0]= pps->ref_count[1]= 1;
7408 pps->weighted_pred= get_bits1(&s->gb);
7409 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7410 pps->init_qp= get_se_golomb(&s->gb) + 26;
7411 pps->init_qs= get_se_golomb(&s->gb) + 26;
7412 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7413 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7414 pps->constrained_intra_pred= get_bits1(&s->gb);
7415 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7417 pps->transform_8x8_mode= 0;
7418 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7419 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7420 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7422 if(get_bits_count(&s->gb) < bit_length){
7423 pps->transform_8x8_mode= get_bits1(&s->gb);
7424 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7425 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7427 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7430 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7431 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7432 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7433 h->pps.chroma_qp_diff= 1;
7435 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7436 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7437 pps_id, pps->sps_id,
7438 pps->cabac ? "CABAC" : "CAVLC",
7439 pps->slice_group_count,
7440 pps->ref_count[0], pps->ref_count[1],
7441 pps->weighted_pred ? "weighted" : "",
7442 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7443 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7444 pps->constrained_intra_pred ? "CONSTR" : "",
7445 pps->redundant_pic_cnt_present ? "REDU" : "",
7446 pps->transform_8x8_mode ? "8x8DCT" : ""
7454 * Call decode_slice() for each context.
7456 * @param h h264 master context
7457 * @param context_count number of contexts to execute
7459 static void execute_decode_slices(H264Context *h, int context_count){
7460 MpegEncContext * const s = &h->s;
7461 AVCodecContext * const avctx= s->avctx;
7465 if(context_count == 1) {
7466 decode_slice(avctx, h);
7468 for(i = 1; i < context_count; i++) {
7469 hx = h->thread_context[i];
7470 hx->s.error_resilience = avctx->error_resilience;
7471 hx->s.error_count = 0;
7474 avctx->execute(avctx, (void *)decode_slice,
7475 (void **)h->thread_context, NULL, context_count);
7477 /* pull back stuff from slices to master context */
7478 hx = h->thread_context[context_count - 1];
7479 s->mb_x = hx->s.mb_x;
7480 s->mb_y = hx->s.mb_y;
7481 s->dropable = hx->s.dropable;
7482 s->picture_structure = hx->s.picture_structure;
7483 for(i = 1; i < context_count; i++)
7484 h->s.error_count += h->thread_context[i]->s.error_count;
7489 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7490 MpegEncContext * const s = &h->s;
7491 AVCodecContext * const avctx= s->avctx;
7493 H264Context *hx; ///< thread context
7494 int context_count = 0;
7496 h->max_contexts = avctx->thread_count;
7499 for(i=0; i<50; i++){
7500 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7503 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7504 h->current_slice = 0;
7505 if (!s->first_field)
7506 s->current_picture_ptr= NULL;
7518 if(buf_index >= buf_size) break;
7520 for(i = 0; i < h->nal_length_size; i++)
7521 nalsize = (nalsize << 8) | buf[buf_index++];
7522 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7527 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7532 // start code prefix search
7533 for(; buf_index + 3 < buf_size; buf_index++){
7534 // This should always succeed in the first iteration.
7535 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7539 if(buf_index+3 >= buf_size) break;
7544 hx = h->thread_context[context_count];
7546 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7547 if (ptr==NULL || dst_length < 0){
7550 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7552 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7554 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7555 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7558 if (h->is_avc && (nalsize != consumed)){
7559 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7563 buf_index += consumed;
7565 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7566 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7571 switch(hx->nal_unit_type){
7573 if (h->nal_unit_type != NAL_IDR_SLICE) {
7574 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7577 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7579 init_get_bits(&hx->s.gb, ptr, bit_length);
7581 hx->inter_gb_ptr= &hx->s.gb;
7582 hx->s.data_partitioning = 0;
7584 if((err = decode_slice_header(hx, h)))
7587 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7588 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7589 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7590 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7591 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7592 && avctx->skip_frame < AVDISCARD_ALL)
7596 init_get_bits(&hx->s.gb, ptr, bit_length);
7598 hx->inter_gb_ptr= NULL;
7599 hx->s.data_partitioning = 1;
7601 err = decode_slice_header(hx, h);
7604 init_get_bits(&hx->intra_gb, ptr, bit_length);
7605 hx->intra_gb_ptr= &hx->intra_gb;
7608 init_get_bits(&hx->inter_gb, ptr, bit_length);
7609 hx->inter_gb_ptr= &hx->inter_gb;
7611 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7612 && s->context_initialized
7614 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7615 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7616 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7617 && avctx->skip_frame < AVDISCARD_ALL)
7621 init_get_bits(&s->gb, ptr, bit_length);
7625 init_get_bits(&s->gb, ptr, bit_length);
7626 decode_seq_parameter_set(h);
7628 if(s->flags& CODEC_FLAG_LOW_DELAY)
7631 if(avctx->has_b_frames < 2)
7632 avctx->has_b_frames= !s->low_delay;
7635 init_get_bits(&s->gb, ptr, bit_length);
7637 decode_picture_parameter_set(h, bit_length);
7641 case NAL_END_SEQUENCE:
7642 case NAL_END_STREAM:
7643 case NAL_FILLER_DATA:
7645 case NAL_AUXILIARY_SLICE:
7648 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7651 if(context_count == h->max_contexts) {
7652 execute_decode_slices(h, context_count);
7657 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7659 /* Slice could not be decoded in parallel mode, copy down
7660 * NAL unit stuff to context 0 and restart. Note that
7661 * rbsp_buffer is not transfered, but since we no longer
7662 * run in parallel mode this should not be an issue. */
7663 h->nal_unit_type = hx->nal_unit_type;
7664 h->nal_ref_idc = hx->nal_ref_idc;
7670 execute_decode_slices(h, context_count);
7675 * returns the number of bytes consumed for building the current frame
7677 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7678 if(s->flags&CODEC_FLAG_TRUNCATED){
7679 pos -= s->parse_context.last_index;
7680 if(pos<0) pos=0; // FIXME remove (unneeded?)
7684 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7685 if(pos+10>buf_size) pos=buf_size; // oops ;)
7691 static int decode_frame(AVCodecContext *avctx,
7692 void *data, int *data_size,
7693 const uint8_t *buf, int buf_size)
7695 H264Context *h = avctx->priv_data;
7696 MpegEncContext *s = &h->s;
7697 AVFrame *pict = data;
7700 s->flags= avctx->flags;
7701 s->flags2= avctx->flags2;
7703 if(s->flags&CODEC_FLAG_TRUNCATED){
7704 const int next= ff_h264_find_frame_end(h, buf, buf_size);
7705 assert((buf_size > 0) || (next == END_NOT_FOUND));
7707 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7709 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7712 /* no supplementary picture */
7713 if (buf_size == 0) {
7717 //FIXME factorize this with the output code below
7718 out = h->delayed_pic[0];
7720 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7721 if(h->delayed_pic[i]->poc < out->poc){
7722 out = h->delayed_pic[i];
7726 for(i=out_idx; h->delayed_pic[i]; i++)
7727 h->delayed_pic[i] = h->delayed_pic[i+1];
7730 *data_size = sizeof(AVFrame);
7731 *pict= *(AVFrame*)out;
7737 if(h->is_avc && !h->got_avcC) {
7738 int i, cnt, nalsize;
7739 unsigned char *p = avctx->extradata;
7740 if(avctx->extradata_size < 7) {
7741 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7745 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7748 /* sps and pps in the avcC always have length coded with 2 bytes,
7749 so put a fake nal_length_size = 2 while parsing them */
7750 h->nal_length_size = 2;
7751 // Decode sps from avcC
7752 cnt = *(p+5) & 0x1f; // Number of sps
7754 for (i = 0; i < cnt; i++) {
7755 nalsize = AV_RB16(p) + 2;
7756 if(decode_nal_units(h, p, nalsize) < 0) {
7757 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7762 // Decode pps from avcC
7763 cnt = *(p++); // Number of pps
7764 for (i = 0; i < cnt; i++) {
7765 nalsize = AV_RB16(p) + 2;
7766 if(decode_nal_units(h, p, nalsize) != nalsize) {
7767 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7772 // Now store right nal length size, that will be use to parse all other nals
7773 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7774 // Do not reparse avcC
7778 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7779 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7783 buf_index=decode_nal_units(h, buf, buf_size);
7787 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7788 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7789 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7793 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7794 Picture *out = s->current_picture_ptr;
7795 Picture *cur = s->current_picture_ptr;
7796 int i, pics, cross_idr, out_of_order, out_idx;
7800 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7801 s->current_picture_ptr->pict_type= s->pict_type;
7803 h->prev_frame_num_offset= h->frame_num_offset;
7804 h->prev_frame_num= h->frame_num;
7806 h->prev_poc_msb= h->poc_msb;
7807 h->prev_poc_lsb= h->poc_lsb;
7808 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7812 * FIXME: Error handling code does not seem to support interlaced
7813 * when slices span multiple rows
7814 * The ff_er_add_slice calls don't work right for bottom
7815 * fields; they cause massive erroneous error concealing
7816 * Error marking covers both fields (top and bottom).
7817 * This causes a mismatched s->error_count
7818 * and a bad error table. Further, the error count goes to
7819 * INT_MAX when called for bottom field, because mb_y is
7820 * past end by one (callers fault) and resync_mb_y != 0
7821 * causes problems for the first MB line, too.
7828 if (s->first_field) {
7829 /* Wait for second field. */
7833 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7834 /* Derive top_field_first from field pocs. */
7835 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7837 //FIXME do something with unavailable reference frames
7839 /* Sort B-frames into display order */
7841 if(h->sps.bitstream_restriction_flag
7842 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7843 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7847 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7848 && !h->sps.bitstream_restriction_flag){
7849 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7854 while(h->delayed_pic[pics]) pics++;
7856 assert(pics <= MAX_DELAYED_PIC_COUNT);
7858 h->delayed_pic[pics++] = cur;
7859 if(cur->reference == 0)
7860 cur->reference = DELAYED_PIC_REF;
7863 for(i=0; h->delayed_pic[i]; i++)
7864 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7867 out = h->delayed_pic[0];
7869 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7870 if(h->delayed_pic[i]->poc < out->poc){
7871 out = h->delayed_pic[i];
7875 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7877 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7879 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7881 ((!cross_idr && out->poc > h->outputed_poc + 2)
7882 || cur->pict_type == FF_B_TYPE)))
7885 s->avctx->has_b_frames++;
7888 if(out_of_order || pics > s->avctx->has_b_frames){
7889 out->reference &= ~DELAYED_PIC_REF;
7890 for(i=out_idx; h->delayed_pic[i]; i++)
7891 h->delayed_pic[i] = h->delayed_pic[i+1];
7893 if(!out_of_order && pics > s->avctx->has_b_frames){
7894 *data_size = sizeof(AVFrame);
7896 h->outputed_poc = out->poc;
7897 *pict= *(AVFrame*)out;
7899 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7904 assert(pict->data[0] || !*data_size);
7905 ff_print_debug_info(s, pict);
7906 //printf("out %d\n", (int)pict->data[0]);
7909 /* Return the Picture timestamp as the frame number */
7910 /* we subtract 1 because it is added on utils.c */
7911 avctx->frame_number = s->picture_number - 1;
7913 return get_consumed_bytes(s, buf_index, buf_size);
7916 static inline void fill_mb_avail(H264Context *h){
7917 MpegEncContext * const s = &h->s;
7918 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7921 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7922 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7923 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7929 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7930 h->mb_avail[4]= 1; //FIXME move out
7931 h->mb_avail[5]= 0; //FIXME move out
7939 #define SIZE (COUNT*40)
7945 // int int_temp[10000];
7947 AVCodecContext avctx;
7949 dsputil_init(&dsp, &avctx);
7951 init_put_bits(&pb, temp, SIZE);
7952 printf("testing unsigned exp golomb\n");
7953 for(i=0; i<COUNT; i++){
7955 set_ue_golomb(&pb, i);
7956 STOP_TIMER("set_ue_golomb");
7958 flush_put_bits(&pb);
7960 init_get_bits(&gb, temp, 8*SIZE);
7961 for(i=0; i<COUNT; i++){
7964 s= show_bits(&gb, 24);
7967 j= get_ue_golomb(&gb);
7969 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7972 STOP_TIMER("get_ue_golomb");
7976 init_put_bits(&pb, temp, SIZE);
7977 printf("testing signed exp golomb\n");
7978 for(i=0; i<COUNT; i++){
7980 set_se_golomb(&pb, i - COUNT/2);
7981 STOP_TIMER("set_se_golomb");
7983 flush_put_bits(&pb);
7985 init_get_bits(&gb, temp, 8*SIZE);
7986 for(i=0; i<COUNT; i++){
7989 s= show_bits(&gb, 24);
7992 j= get_se_golomb(&gb);
7993 if(j != i - COUNT/2){
7994 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7997 STOP_TIMER("get_se_golomb");
8001 printf("testing 4x4 (I)DCT\n");
8004 uint8_t src[16], ref[16];
8005 uint64_t error= 0, max_error=0;
8007 for(i=0; i<COUNT; i++){
8009 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8010 for(j=0; j<16; j++){
8011 ref[j]= random()%255;
8012 src[j]= random()%255;
8015 h264_diff_dct_c(block, src, ref, 4);
8018 for(j=0; j<16; j++){
8019 // printf("%d ", block[j]);
8020 block[j]= block[j]*4;
8021 if(j&1) block[j]= (block[j]*4 + 2)/5;
8022 if(j&4) block[j]= (block[j]*4 + 2)/5;
8026 s->dsp.h264_idct_add(ref, block, 4);
8027 /* for(j=0; j<16; j++){
8028 printf("%d ", ref[j]);
8032 for(j=0; j<16; j++){
8033 int diff= FFABS(src[j] - ref[j]);
8036 max_error= FFMAX(max_error, diff);
8039 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8040 printf("testing quantizer\n");
8041 for(qp=0; qp<52; qp++){
8043 src1_block[i]= src2_block[i]= random()%255;
8046 printf("Testing NAL layer\n");
8048 uint8_t bitstream[COUNT];
8049 uint8_t nal[COUNT*2];
8051 memset(&h, 0, sizeof(H264Context));
8053 for(i=0; i<COUNT; i++){
8061 for(j=0; j<COUNT; j++){
8062 bitstream[j]= (random() % 255) + 1;
8065 for(j=0; j<zeros; j++){
8066 int pos= random() % COUNT;
8067 while(bitstream[pos] == 0){
8076 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8078 printf("encoding failed\n");
8082 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8086 if(out_length != COUNT){
8087 printf("incorrect length %d %d\n", out_length, COUNT);
8091 if(consumed != nal_length){
8092 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8096 if(memcmp(bitstream, out, COUNT)){
8097 printf("mismatch\n");
8103 printf("Testing RBSP\n");
8111 static av_cold int decode_end(AVCodecContext *avctx)
8113 H264Context *h = avctx->priv_data;
8114 MpegEncContext *s = &h->s;
8116 av_freep(&h->rbsp_buffer[0]);
8117 av_freep(&h->rbsp_buffer[1]);
8118 free_tables(h); //FIXME cleanup init stuff perhaps
8121 // memset(h, 0, sizeof(H264Context));
8127 AVCodec h264_decoder = {
8131 sizeof(H264Context),
8136 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8138 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),