2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type_nos == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
573 if(!(h->top_samples_available&0x8000)){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if(!(h->left_samples_available&0x8000)){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
667 // store all luma nnzs, for deblocking
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
785 }else if(top_ref==ref){
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
891 pred_motion(h, 0, 4, 0, 0, mx, my);
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
984 *mb_type |= MB_TYPE_DIRECT2;
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1004 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1009 if(ref[0] < 0 && ref[1] < 0){
1010 ref[0] = ref[1] = 0;
1011 mv[0][0] = mv[0][1] =
1012 mv[1][0] = mv[1][1] = 0;
1014 for(list=0; list<2; list++){
1016 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1018 mv[list][0] = mv[list][1] = 0;
1024 *mb_type &= ~MB_TYPE_L1;
1025 sub_mb_type &= ~MB_TYPE_L1;
1026 }else if(ref[0] < 0){
1028 *mb_type &= ~MB_TYPE_L0;
1029 sub_mb_type &= ~MB_TYPE_L0;
1032 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1033 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1034 int mb_types_col[2];
1035 int b8_stride = h->b8_stride;
1036 int b4_stride = h->b_stride;
1038 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1040 if(IS_INTERLACED(*mb_type)){
1041 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1042 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1044 l1ref0 -= 2*b8_stride;
1045 l1ref1 -= 2*b8_stride;
1046 l1mv0 -= 4*b4_stride;
1047 l1mv1 -= 4*b4_stride;
1052 int cur_poc = s->current_picture_ptr->poc;
1053 int *col_poc = h->ref_list[1]->field_poc;
1054 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1055 int dy = 2*col_parity - (s->mb_y&1);
1057 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1058 l1ref0 += dy*b8_stride;
1059 l1ref1 += dy*b8_stride;
1060 l1mv0 += 2*dy*b4_stride;
1061 l1mv1 += 2*dy*b4_stride;
1065 for(i8=0; i8<4; i8++){
1068 int xy8 = x8+y8*b8_stride;
1069 int xy4 = 3*x8+y8*b4_stride;
1072 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1074 h->sub_mb_type[i8] = sub_mb_type;
1076 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1077 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1078 if(!IS_INTRA(mb_types_col[y8])
1079 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1080 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1082 a= pack16to32(mv[0][0],mv[0][1]);
1084 b= pack16to32(mv[1][0],mv[1][1]);
1086 a= pack16to32(mv[0][0],mv[0][1]);
1087 b= pack16to32(mv[1][0],mv[1][1]);
1089 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1090 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1092 }else if(IS_16X16(*mb_type)){
1095 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1096 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1097 if(!IS_INTRA(mb_type_col)
1098 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1099 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1100 && (h->x264_build>33 || !h->x264_build)))){
1102 a= pack16to32(mv[0][0],mv[0][1]);
1104 b= pack16to32(mv[1][0],mv[1][1]);
1106 a= pack16to32(mv[0][0],mv[0][1]);
1107 b= pack16to32(mv[1][0],mv[1][1]);
1109 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1110 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1112 for(i8=0; i8<4; i8++){
1113 const int x8 = i8&1;
1114 const int y8 = i8>>1;
1116 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1118 h->sub_mb_type[i8] = sub_mb_type;
1120 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1121 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1122 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1123 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1126 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1127 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1128 && (h->x264_build>33 || !h->x264_build)))){
1129 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1130 if(IS_SUB_8X8(sub_mb_type)){
1131 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1132 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1134 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1139 for(i4=0; i4<4; i4++){
1140 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1141 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1143 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1145 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1151 }else{ /* direct temporal mv pred */
1152 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1153 const int *dist_scale_factor = h->dist_scale_factor;
1156 if(IS_INTERLACED(*mb_type)){
1157 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1158 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1159 dist_scale_factor = h->dist_scale_factor_field;
1161 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1162 /* FIXME assumes direct_8x8_inference == 1 */
1163 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1164 int mb_types_col[2];
1167 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1168 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1169 | (*mb_type & MB_TYPE_INTERLACED);
1170 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1172 if(IS_INTERLACED(*mb_type)){
1173 /* frame to field scaling */
1174 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1175 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1177 l1ref0 -= 2*h->b8_stride;
1178 l1ref1 -= 2*h->b8_stride;
1179 l1mv0 -= 4*h->b_stride;
1180 l1mv1 -= 4*h->b_stride;
1184 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1185 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1187 *mb_type |= MB_TYPE_16x8;
1189 *mb_type |= MB_TYPE_8x8;
1191 /* field to frame scaling */
1192 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1193 * but in MBAFF, top and bottom POC are equal */
1194 int dy = (s->mb_y&1) ? 1 : 2;
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1197 l1ref0 += dy*h->b8_stride;
1198 l1ref1 += dy*h->b8_stride;
1199 l1mv0 += 2*dy*h->b_stride;
1200 l1mv1 += 2*dy*h->b_stride;
1203 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1205 *mb_type |= MB_TYPE_16x16;
1207 *mb_type |= MB_TYPE_8x8;
1210 for(i8=0; i8<4; i8++){
1211 const int x8 = i8&1;
1212 const int y8 = i8>>1;
1214 const int16_t (*l1mv)[2]= l1mv0;
1216 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 h->sub_mb_type[i8] = sub_mb_type;
1220 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1221 if(IS_INTRA(mb_types_col[y8])){
1222 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1223 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1224 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1230 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1232 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1235 scale = dist_scale_factor[ref0];
1236 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1239 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1240 int my_col = (mv_col[1]<<y_shift)/2;
1241 int mx = (scale * mv_col[0] + 128) >> 8;
1242 int my = (scale * my_col + 128) >> 8;
1243 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1244 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col)){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1261 : map_col_to_list0[1][l1ref1[0]];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col)){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*h->b8_stride];
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * idct tranforms the 16 dc values and dequantize them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * dct tranforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1591 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1616 // last_non_zero = i;
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1634 // last_non_zero = i;
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1650 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1654 level= (bias + level)>>QUANT_SHIFT;
1657 level= (bias - level)>>QUANT_SHIFT;
1666 return last_non_zero;
1669 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1676 const int luma_xy= (mx&3) + ((my&3)<<2);
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
1684 const int pic_width = 16*s->mb_width;
1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
1687 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1710 // chroma offset when predicting from a field of opposite parity
1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1719 src_cb= s->edge_emu_buffer;
1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cr= s->edge_emu_buffer;
1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1743 x_offset += 8*s->mb_x;
1744 y_offset += 8*(s->mb_y >> MB_FIELD);
1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1753 chroma_op= chroma_avg;
1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1764 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1776 x_offset += 8*s->mb_x;
1777 y_offset += 8*(s->mb_y >> MB_FIELD);
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1831 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1849 static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1865 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1869 MpegEncContext * const s = &h->s;
1870 const int mb_xy= h->mb_xy;
1871 const int mb_type= s->current_picture.mb_type[mb_xy];
1873 assert(IS_INTER(mb_type));
1875 prefetch_motion(h, 0);
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1880 &weight_op[0], &weight_avg[0],
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1885 &weight_op[1], &weight_avg[1],
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1894 &weight_op[2], &weight_avg[2],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1903 assert(IS_8X8(mb_type));
1906 const int sub_mb_type= h->sub_mb_type[i];
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1914 &weight_op[3], &weight_avg[3],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1919 &weight_op[4], &weight_avg[4],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1928 &weight_op[5], &weight_avg[5],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 assert(IS_SUB_4X4(sub_mb_type));
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[6], &weight_avg[6],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 prefetch_motion(h, 1);
1952 static av_cold void decode_init_vlc(void){
1953 static int done = 0;
1959 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1960 &chroma_dc_coeff_token_len [0], 1, 1,
1961 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1964 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1965 &coeff_token_len [i][0], 1, 1,
1966 &coeff_token_bits[i][0], 1, 1, 1);
1970 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1971 &chroma_dc_total_zeros_len [i][0], 1, 1,
1972 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1974 for(i=0; i<15; i++){
1975 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1976 &total_zeros_len [i][0], 1, 1,
1977 &total_zeros_bits[i][0], 1, 1, 1);
1981 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1982 &run_len [i][0], 1, 1,
1983 &run_bits[i][0], 1, 1, 1);
1985 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1986 &run_len [6][0], 1, 1,
1987 &run_bits[6][0], 1, 1, 1);
1991 static void free_tables(H264Context *h){
1994 av_freep(&h->intra4x4_pred_mode);
1995 av_freep(&h->chroma_pred_mode_table);
1996 av_freep(&h->cbp_table);
1997 av_freep(&h->mvd_table[0]);
1998 av_freep(&h->mvd_table[1]);
1999 av_freep(&h->direct_table);
2000 av_freep(&h->non_zero_count);
2001 av_freep(&h->slice_table_base);
2002 h->slice_table= NULL;
2004 av_freep(&h->mb2b_xy);
2005 av_freep(&h->mb2b8_xy);
2007 for(i = 0; i < MAX_SPS_COUNT; i++)
2008 av_freep(h->sps_buffers + i);
2010 for(i = 0; i < MAX_PPS_COUNT; i++)
2011 av_freep(h->pps_buffers + i);
2013 for(i = 0; i < h->s.avctx->thread_count; i++) {
2014 hx = h->thread_context[i];
2016 av_freep(&hx->top_borders[1]);
2017 av_freep(&hx->top_borders[0]);
2018 av_freep(&hx->s.obmc_scratchpad);
2022 static void init_dequant8_coeff_table(H264Context *h){
2024 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2025 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2026 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2028 for(i=0; i<2; i++ ){
2029 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2030 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2034 for(q=0; q<52; q++){
2035 int shift = ff_div6[q];
2036 int idx = ff_rem6[q];
2038 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2039 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2040 h->pps.scaling_matrix8[i][x]) << shift;
2045 static void init_dequant4_coeff_table(H264Context *h){
2047 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2048 for(i=0; i<6; i++ ){
2049 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2051 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2052 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2059 for(q=0; q<52; q++){
2060 int shift = ff_div6[q] + 2;
2061 int idx = ff_rem6[q];
2063 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2064 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2065 h->pps.scaling_matrix4[i][x]) << shift;
2070 static void init_dequant_tables(H264Context *h){
2072 init_dequant4_coeff_table(h);
2073 if(h->pps.transform_8x8_mode)
2074 init_dequant8_coeff_table(h);
2075 if(h->sps.transform_bypass){
2078 h->dequant4_coeff[i][0][x] = 1<<6;
2079 if(h->pps.transform_8x8_mode)
2082 h->dequant8_coeff[i][0][x] = 1<<6;
2089 * needs width/height
2091 static int alloc_tables(H264Context *h){
2092 MpegEncContext * const s = &h->s;
2093 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2096 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2098 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2102 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2104 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2107 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2108 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2110 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2111 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2112 for(y=0; y<s->mb_height; y++){
2113 for(x=0; x<s->mb_width; x++){
2114 const int mb_xy= x + y*s->mb_stride;
2115 const int b_xy = 4*x + 4*y*h->b_stride;
2116 const int b8_xy= 2*x + 2*y*h->b8_stride;
2118 h->mb2b_xy [mb_xy]= b_xy;
2119 h->mb2b8_xy[mb_xy]= b8_xy;
2123 s->obmc_scratchpad = NULL;
2125 if(!h->dequant4_coeff[0])
2126 init_dequant_tables(h);
2135 * Mimic alloc_tables(), but for every context thread.
2137 static void clone_tables(H264Context *dst, H264Context *src){
2138 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2139 dst->non_zero_count = src->non_zero_count;
2140 dst->slice_table = src->slice_table;
2141 dst->cbp_table = src->cbp_table;
2142 dst->mb2b_xy = src->mb2b_xy;
2143 dst->mb2b8_xy = src->mb2b8_xy;
2144 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2145 dst->mvd_table[0] = src->mvd_table[0];
2146 dst->mvd_table[1] = src->mvd_table[1];
2147 dst->direct_table = src->direct_table;
2149 dst->s.obmc_scratchpad = NULL;
2150 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2155 * Allocate buffers which are not shared amongst multiple threads.
2157 static int context_init(H264Context *h){
2158 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2159 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 return -1; // free_tables will clean up for us
2166 static av_cold void common_init(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2169 s->width = s->avctx->width;
2170 s->height = s->avctx->height;
2171 s->codec_id= s->avctx->codec->id;
2173 ff_h264_pred_init(&h->hpc, s->codec_id);
2175 h->dequant_coeff_pps= -1;
2176 s->unrestricted_mv=1;
2177 s->decode=1; //FIXME
2179 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2180 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2183 static av_cold int decode_init(AVCodecContext *avctx){
2184 H264Context *h= avctx->priv_data;
2185 MpegEncContext * const s = &h->s;
2187 MPV_decode_defaults(s);
2192 s->out_format = FMT_H264;
2193 s->workaround_bugs= avctx->workaround_bugs;
2196 // s->decode_mb= ff_h263_decode_mb;
2197 s->quarter_sample = 1;
2200 if(avctx->codec_id == CODEC_ID_SVQ3)
2201 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2203 avctx->pix_fmt= PIX_FMT_YUV420P;
2207 if(avctx->extradata_size > 0 && avctx->extradata &&
2208 *(char *)avctx->extradata == 1){
2215 h->thread_context[0] = h;
2219 static int frame_start(H264Context *h){
2220 MpegEncContext * const s = &h->s;
2223 if(MPV_frame_start(s, s->avctx) < 0)
2225 ff_er_frame_start(s);
2227 * MPV_frame_start uses pict_type to derive key_frame.
2228 * This is incorrect for H.264; IDR markings must be used.
2229 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2230 * See decode_nal_units().
2232 s->current_picture_ptr->key_frame= 0;
2234 assert(s->linesize && s->uvlinesize);
2236 for(i=0; i<16; i++){
2237 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[16+i]=
2242 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2243 h->block_offset[24+16+i]=
2244 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 /* can't be in alloc_tables because linesize isn't known there.
2248 * FIXME: redo bipred weight to not require extra buffer? */
2249 for(i = 0; i < s->avctx->thread_count; i++)
2250 if(!h->thread_context[i]->s.obmc_scratchpad)
2251 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2253 /* some macroblocks will be accessed before they're available */
2254 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2255 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2257 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2259 // We mark the current picture as non reference after allocating it, so
2260 // that if we break out due to an error it can be released automatically
2261 // in the next MPV_frame_start().
2262 // SVQ3 as well as most other codecs have only last/next/current and thus
2263 // get released even with set reference, besides SVQ3 and others do not
2264 // mark frames as reference later "naturally".
2265 if(s->codec_id != CODEC_ID_SVQ3)
2266 s->current_picture_ptr->reference= 0;
2270 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2271 MpegEncContext * const s = &h->s;
2275 src_cb -= uvlinesize;
2276 src_cr -= uvlinesize;
2278 // There are two lines saved, the line above the the top macroblock of a pair,
2279 // and the line above the bottom macroblock
2280 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2281 for(i=1; i<17; i++){
2282 h->left_border[i]= src_y[15+i* linesize];
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2288 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2289 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2290 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2292 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2293 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2300 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2301 MpegEncContext * const s = &h->s;
2308 if(h->deblocking_filter == 2) {
2310 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2311 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2313 deblock_left = (s->mb_x > 0);
2314 deblock_top = (s->mb_y > 0);
2317 src_y -= linesize + 1;
2318 src_cb -= uvlinesize + 1;
2319 src_cr -= uvlinesize + 1;
2321 #define XCHG(a,b,t,xchg)\
2328 for(i = !deblock_top; i<17; i++){
2329 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2334 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2335 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2336 if(s->mb_x+1 < s->mb_width){
2337 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2341 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2343 for(i = !deblock_top; i<9; i++){
2344 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2345 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2350 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2355 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2356 MpegEncContext * const s = &h->s;
2359 src_y -= 2 * linesize;
2360 src_cb -= 2 * uvlinesize;
2361 src_cr -= 2 * uvlinesize;
2363 // There are two lines saved, the line above the the top macroblock of a pair,
2364 // and the line above the bottom macroblock
2365 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2366 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2367 for(i=2; i<34; i++){
2368 h->left_border[i]= src_y[15+i* linesize];
2371 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2372 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2373 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2374 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2376 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2377 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2378 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2379 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2380 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2381 for(i=2; i<18; i++){
2382 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2383 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2385 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2386 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2387 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2388 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2392 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2393 MpegEncContext * const s = &h->s;
2396 int deblock_left = (s->mb_x > 0);
2397 int deblock_top = (s->mb_y > 1);
2399 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2401 src_y -= 2 * linesize + 1;
2402 src_cb -= 2 * uvlinesize + 1;
2403 src_cr -= 2 * uvlinesize + 1;
2405 #define XCHG(a,b,t,xchg)\
2412 for(i = (!deblock_top)<<1; i<34; i++){
2413 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2418 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2419 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2420 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2421 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2422 if(s->mb_x+1 < s->mb_width){
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2428 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2430 for(i = (!deblock_top) << 1; i<18; i++){
2431 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2432 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2439 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2444 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2445 MpegEncContext * const s = &h->s;
2446 const int mb_x= s->mb_x;
2447 const int mb_y= s->mb_y;
2448 const int mb_xy= h->mb_xy;
2449 const int mb_type= s->current_picture.mb_type[mb_xy];
2450 uint8_t *dest_y, *dest_cb, *dest_cr;
2451 int linesize, uvlinesize /*dct_offset*/;
2453 int *block_offset = &h->block_offset[0];
2454 const unsigned int bottom = mb_y & 1;
2455 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2456 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2457 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2459 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2460 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2461 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2463 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2464 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2466 if (!simple && MB_FIELD) {
2467 linesize = h->mb_linesize = s->linesize * 2;
2468 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2469 block_offset = &h->block_offset[24];
2470 if(mb_y&1){ //FIXME move out of this func?
2471 dest_y -= s->linesize*15;
2472 dest_cb-= s->uvlinesize*7;
2473 dest_cr-= s->uvlinesize*7;
2477 for(list=0; list<h->list_count; list++){
2478 if(!USES_LIST(mb_type, list))
2480 if(IS_16X16(mb_type)){
2481 int8_t *ref = &h->ref_cache[list][scan8[0]];
2482 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2484 for(i=0; i<16; i+=4){
2485 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2486 int ref = h->ref_cache[list][scan8[i]];
2488 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2494 linesize = h->mb_linesize = s->linesize;
2495 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2496 // dct_offset = s->linesize * 16;
2499 if(transform_bypass){
2501 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2502 }else if(IS_8x8DCT(mb_type)){
2503 idct_dc_add = s->dsp.h264_idct8_dc_add;
2504 idct_add = s->dsp.h264_idct8_add;
2506 idct_dc_add = s->dsp.h264_idct_dc_add;
2507 idct_add = s->dsp.h264_idct_add;
2510 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2511 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2512 int mbt_y = mb_y&~1;
2513 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2514 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2515 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2516 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2519 if (!simple && IS_INTRA_PCM(mb_type)) {
2522 // The pixels are stored in h->mb array in the same order as levels,
2523 // copy them in output in the correct order.
2524 for(i=0; i<16; i++) {
2525 for (y=0; y<4; y++) {
2526 for (x=0; x<4; x++) {
2527 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2531 for(i=16; i<16+4; i++) {
2532 for (y=0; y<4; y++) {
2533 for (x=0; x<4; x++) {
2534 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2538 for(i=20; i<20+4; i++) {
2539 for (y=0; y<4; y++) {
2540 for (x=0; x<4; x++) {
2541 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2546 if(IS_INTRA(mb_type)){
2547 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2548 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2550 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2551 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2552 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2555 if(IS_INTRA4x4(mb_type)){
2556 if(simple || !s->encoding){
2557 if(IS_8x8DCT(mb_type)){
2558 for(i=0; i<16; i+=4){
2559 uint8_t * const ptr= dest_y + block_offset[i];
2560 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2561 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2562 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2563 (h->topright_samples_available<<i)&0x4000, linesize);
2565 if(nnz == 1 && h->mb[i*16])
2566 idct_dc_add(ptr, h->mb + i*16, linesize);
2568 idct_add(ptr, h->mb + i*16, linesize);
2572 for(i=0; i<16; i++){
2573 uint8_t * const ptr= dest_y + block_offset[i];
2575 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2579 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2580 assert(mb_y || linesize <= block_offset[i]);
2581 if(!topright_avail){
2582 tr= ptr[3 - linesize]*0x01010101;
2583 topright= (uint8_t*) &tr;
2585 topright= ptr + 4 - linesize;
2589 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2590 nnz = h->non_zero_count_cache[ scan8[i] ];
2593 if(nnz == 1 && h->mb[i*16])
2594 idct_dc_add(ptr, h->mb + i*16, linesize);
2596 idct_add(ptr, h->mb + i*16, linesize);
2598 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2603 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2605 if(!transform_bypass)
2606 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2608 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2610 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2611 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2613 hl_motion(h, dest_y, dest_cb, dest_cr,
2614 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2615 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2616 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2620 if(!IS_INTRA4x4(mb_type)){
2622 if(IS_INTRA16x16(mb_type)){
2623 for(i=0; i<16; i++){
2624 if(h->non_zero_count_cache[ scan8[i] ])
2625 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2626 else if(h->mb[i*16])
2627 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2630 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2631 for(i=0; i<16; i+=di){
2632 int nnz = h->non_zero_count_cache[ scan8[i] ];
2634 if(nnz==1 && h->mb[i*16])
2635 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2637 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2642 for(i=0; i<16; i++){
2643 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2644 uint8_t * const ptr= dest_y + block_offset[i];
2645 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2651 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2652 uint8_t *dest[2] = {dest_cb, dest_cr};
2653 if(transform_bypass){
2654 idct_add = idct_dc_add = s->dsp.add_pixels4;
2656 idct_add = s->dsp.h264_idct_add;
2657 idct_dc_add = s->dsp.h264_idct_dc_add;
2658 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2659 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2662 for(i=16; i<16+8; i++){
2663 if(h->non_zero_count_cache[ scan8[i] ])
2664 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2665 else if(h->mb[i*16])
2666 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2669 for(i=16; i<16+8; i++){
2670 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2671 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2672 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2678 if(h->deblocking_filter) {
2679 if (!simple && FRAME_MBAFF) {
2680 //FIXME try deblocking one mb at a time?
2681 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2682 const int mb_y = s->mb_y - 1;
2683 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2684 const int mb_xy= mb_x + mb_y*s->mb_stride;
2685 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2686 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2687 if (!bottom) return;
2688 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2689 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2690 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2692 if(IS_INTRA(mb_type_top | mb_type_bottom))
2693 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2695 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2698 s->mb_y--; h->mb_xy -= s->mb_stride;
2699 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2700 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2701 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2702 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2703 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2705 s->mb_y++; h->mb_xy += s->mb_stride;
2706 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2707 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2708 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2709 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2710 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2712 tprintf(h->s.avctx, "call filter_mb\n");
2713 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2714 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2715 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2716 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2717 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2723 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2725 static void hl_decode_mb_simple(H264Context *h){
2726 hl_decode_mb_internal(h, 1);
2730 * Process a macroblock; this handles edge cases, such as interlacing.
2732 static void av_noinline hl_decode_mb_complex(H264Context *h){
2733 hl_decode_mb_internal(h, 0);
2736 static void hl_decode_mb(H264Context *h){
2737 MpegEncContext * const s = &h->s;
2738 const int mb_xy= h->mb_xy;
2739 const int mb_type= s->current_picture.mb_type[mb_xy];
2740 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2741 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2743 if(ENABLE_H264_ENCODER && !s->decode)
2747 hl_decode_mb_complex(h);
2748 else hl_decode_mb_simple(h);
2751 static void pic_as_field(Picture *pic, const int parity){
2753 for (i = 0; i < 4; ++i) {
2754 if (parity == PICT_BOTTOM_FIELD)
2755 pic->data[i] += pic->linesize[i];
2756 pic->reference = parity;
2757 pic->linesize[i] *= 2;
2761 static int split_field_copy(Picture *dest, Picture *src,
2762 int parity, int id_add){
2763 int match = !!(src->reference & parity);
2767 pic_as_field(dest, parity);
2769 dest->pic_id += id_add;
2776 * Split one reference list into field parts, interleaving by parity
2777 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2778 * set to look at the actual start of data for that field.
2780 * @param dest output list
2781 * @param dest_len maximum number of fields to put in dest
2782 * @param src the source reference list containing fields and/or field pairs
2783 * (aka short_ref/long_ref, or
2784 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2785 * @param src_len number of Picture's in source (pairs and unmatched fields)
2786 * @param parity the parity of the picture being decoded/needing
2787 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2788 * @return number of fields placed in dest
2790 static int split_field_half_ref_list(Picture *dest, int dest_len,
2791 Picture *src, int src_len, int parity){
2792 int same_parity = 1;
2798 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2799 if (same_parity && same_i < src_len) {
2800 field_output = split_field_copy(dest + out_i, src + same_i,
2802 same_parity = !field_output;
2805 } else if (opp_i < src_len) {
2806 field_output = split_field_copy(dest + out_i, src + opp_i,
2807 PICT_FRAME - parity, 0);
2808 same_parity = field_output;
2820 * Split the reference frame list into a reference field list.
2821 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2822 * The input list contains both reference field pairs and
2823 * unmatched reference fields; it is ordered as spec describes
2824 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2825 * unmatched field pairs are also present. Conceptually this is equivalent
2826 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2828 * @param dest output reference list where ordered fields are to be placed
2829 * @param dest_len max number of fields to place at dest
2830 * @param src source reference list, as described above
2831 * @param src_len number of pictures (pairs and unmatched fields) in src
2832 * @param parity parity of field being currently decoded
2833 * (one of PICT_{TOP,BOTTOM}_FIELD)
2834 * @param long_i index into src array that holds first long reference picture,
2835 * or src_len if no long refs present.
2837 static int split_field_ref_list(Picture *dest, int dest_len,
2838 Picture *src, int src_len,
2839 int parity, int long_i){
2841 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2845 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2846 src_len - long_i, parity);
2851 * fills the default_ref_list.
2853 static int fill_default_ref_list(H264Context *h){
2854 MpegEncContext * const s = &h->s;
2856 int smallest_poc_greater_than_current = -1;
2858 Picture sorted_short_ref[32];
2859 Picture field_entry_list[2][32];
2860 Picture *frame_list[2];
2862 if (FIELD_PICTURE) {
2863 structure_sel = PICT_FRAME;
2864 frame_list[0] = field_entry_list[0];
2865 frame_list[1] = field_entry_list[1];
2868 frame_list[0] = h->default_ref_list[0];
2869 frame_list[1] = h->default_ref_list[1];
2872 if(h->slice_type_nos==FF_B_TYPE){
2879 /* sort frame according to poc in B slice */
2880 for(out_i=0; out_i<h->short_ref_count; out_i++){
2882 int best_poc=INT_MAX;
2884 for(i=0; i<h->short_ref_count; i++){
2885 const int poc= h->short_ref[i]->poc;
2886 if(poc > limit && poc < best_poc){
2892 assert(best_i != INT_MIN);
2895 sorted_short_ref[out_i]= *h->short_ref[best_i];
2896 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2897 if (-1 == smallest_poc_greater_than_current) {
2898 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2899 smallest_poc_greater_than_current = out_i;
2904 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2906 // find the largest poc
2907 for(list=0; list<2; list++){
2910 int step= list ? -1 : 1;
2912 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2914 while(j<0 || j>= h->short_ref_count){
2915 if(j != -99 && step == (list ? -1 : 1))
2918 j= smallest_poc_greater_than_current + (step>>1);
2920 sel = sorted_short_ref[j].reference | structure_sel;
2921 if(sel != PICT_FRAME) continue;
2922 frame_list[list][index ]= sorted_short_ref[j];
2923 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2925 short_len[list] = index;
2927 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2929 if(h->long_ref[i] == NULL) continue;
2930 sel = h->long_ref[i]->reference | structure_sel;
2931 if(sel != PICT_FRAME) continue;
2933 frame_list[ list ][index ]= *h->long_ref[i];
2934 frame_list[ list ][index++].pic_id= i;
2939 for(list=0; list<2; list++){
2941 len[list] = split_field_ref_list(h->default_ref_list[list],
2945 s->picture_structure,
2948 // swap the two first elements of L1 when L0 and L1 are identical
2949 if(list && len[0] > 1 && len[0] == len[1])
2950 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2952 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2956 if(len[list] < h->ref_count[ list ])
2957 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2964 for(i=0; i<h->short_ref_count; i++){
2966 sel = h->short_ref[i]->reference | structure_sel;
2967 if(sel != PICT_FRAME) continue;
2968 frame_list[0][index ]= *h->short_ref[i];
2969 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2972 for(i = 0; i < 16; i++){
2974 if(h->long_ref[i] == NULL) continue;
2975 sel = h->long_ref[i]->reference | structure_sel;
2976 if(sel != PICT_FRAME) continue;
2977 frame_list[0][index ]= *h->long_ref[i];
2978 frame_list[0][index++].pic_id= i;
2982 index = split_field_ref_list(h->default_ref_list[0],
2983 h->ref_count[0], frame_list[0],
2984 index, s->picture_structure,
2987 if(index < h->ref_count[0])
2988 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2991 for (i=0; i<h->ref_count[0]; i++) {
2992 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2994 if(h->slice_type_nos==FF_B_TYPE){
2995 for (i=0; i<h->ref_count[1]; i++) {
2996 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
3003 static void print_short_term(H264Context *h);
3004 static void print_long_term(H264Context *h);
3007 * Extract structure information about the picture described by pic_num in
3008 * the current decoding context (frame or field). Note that pic_num is
3009 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3010 * @param pic_num picture number for which to extract structure information
3011 * @param structure one of PICT_XXX describing structure of picture
3013 * @return frame number (short term) or long term index of picture
3014 * described by pic_num
3016 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3017 MpegEncContext * const s = &h->s;
3019 *structure = s->picture_structure;
3022 /* opposite field */
3023 *structure ^= PICT_FRAME;
3030 static int decode_ref_pic_list_reordering(H264Context *h){
3031 MpegEncContext * const s = &h->s;
3032 int list, index, pic_structure;
3034 print_short_term(h);
3036 if(h->slice_type_nos==FF_I_TYPE) return 0; //FIXME move before func
3038 for(list=0; list<h->list_count; list++){
3039 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3041 if(get_bits1(&s->gb)){
3042 int pred= h->curr_pic_num;
3044 for(index=0; ; index++){
3045 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3046 unsigned int pic_id;
3048 Picture *ref = NULL;
3050 if(reordering_of_pic_nums_idc==3)
3053 if(index >= h->ref_count[list]){
3054 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3058 if(reordering_of_pic_nums_idc<3){
3059 if(reordering_of_pic_nums_idc<2){
3060 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3063 if(abs_diff_pic_num > h->max_pic_num){
3064 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3068 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3069 else pred+= abs_diff_pic_num;
3070 pred &= h->max_pic_num - 1;
3072 frame_num = pic_num_extract(h, pred, &pic_structure);
3074 for(i= h->short_ref_count-1; i>=0; i--){
3075 ref = h->short_ref[i];
3076 assert(ref->reference);
3077 assert(!ref->long_ref);
3078 if(ref->data[0] != NULL &&
3079 ref->frame_num == frame_num &&
3080 (ref->reference & pic_structure) &&
3081 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3088 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3090 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3093 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3096 ref = h->long_ref[long_idx];
3097 assert(!(ref && !ref->reference));
3098 if(ref && (ref->reference & pic_structure)){
3099 ref->pic_id= pic_id;
3100 assert(ref->long_ref);
3108 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3109 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3111 for(i=index; i+1<h->ref_count[list]; i++){
3112 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3115 for(; i > index; i--){
3116 h->ref_list[list][i]= h->ref_list[list][i-1];
3118 h->ref_list[list][index]= *ref;
3120 pic_as_field(&h->ref_list[list][index], pic_structure);
3124 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3130 for(list=0; list<h->list_count; list++){
3131 for(index= 0; index < h->ref_count[list]; index++){
3132 if(!h->ref_list[list][index].data[0])
3133 h->ref_list[list][index]= s->current_picture;
3137 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3138 direct_dist_scale_factor(h);
3139 direct_ref_list_init(h);
3143 static void fill_mbaff_ref_list(H264Context *h){
3145 for(list=0; list<2; list++){ //FIXME try list_count
3146 for(i=0; i<h->ref_count[list]; i++){
3147 Picture *frame = &h->ref_list[list][i];
3148 Picture *field = &h->ref_list[list][16+2*i];
3151 field[0].linesize[j] <<= 1;
3152 field[0].reference = PICT_TOP_FIELD;
3153 field[1] = field[0];
3155 field[1].data[j] += frame->linesize[j];
3156 field[1].reference = PICT_BOTTOM_FIELD;
3158 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3159 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3161 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3162 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3166 for(j=0; j<h->ref_count[1]; j++){
3167 for(i=0; i<h->ref_count[0]; i++)
3168 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3169 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3170 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3174 static int pred_weight_table(H264Context *h){
3175 MpegEncContext * const s = &h->s;
3177 int luma_def, chroma_def;
3180 h->use_weight_chroma= 0;
3181 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3182 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3183 luma_def = 1<<h->luma_log2_weight_denom;
3184 chroma_def = 1<<h->chroma_log2_weight_denom;
3186 for(list=0; list<2; list++){
3187 for(i=0; i<h->ref_count[list]; i++){
3188 int luma_weight_flag, chroma_weight_flag;
3190 luma_weight_flag= get_bits1(&s->gb);
3191 if(luma_weight_flag){
3192 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3193 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3194 if( h->luma_weight[list][i] != luma_def
3195 || h->luma_offset[list][i] != 0)
3198 h->luma_weight[list][i]= luma_def;
3199 h->luma_offset[list][i]= 0;
3202 chroma_weight_flag= get_bits1(&s->gb);
3203 if(chroma_weight_flag){
3206 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3207 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3208 if( h->chroma_weight[list][i][j] != chroma_def
3209 || h->chroma_offset[list][i][j] != 0)
3210 h->use_weight_chroma= 1;
3215 h->chroma_weight[list][i][j]= chroma_def;
3216 h->chroma_offset[list][i][j]= 0;
3220 if(h->slice_type_nos != FF_B_TYPE) break;
3222 h->use_weight= h->use_weight || h->use_weight_chroma;
3226 static void implicit_weight_table(H264Context *h){
3227 MpegEncContext * const s = &h->s;
3229 int cur_poc = s->current_picture_ptr->poc;
3231 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3232 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3234 h->use_weight_chroma= 0;
3239 h->use_weight_chroma= 2;
3240 h->luma_log2_weight_denom= 5;
3241 h->chroma_log2_weight_denom= 5;
3243 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3244 int poc0 = h->ref_list[0][ref0].poc;
3245 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3246 int poc1 = h->ref_list[1][ref1].poc;
3247 int td = av_clip(poc1 - poc0, -128, 127);
3249 int tb = av_clip(cur_poc - poc0, -128, 127);
3250 int tx = (16384 + (FFABS(td) >> 1)) / td;
3251 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3252 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3253 h->implicit_weight[ref0][ref1] = 32;
3255 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3257 h->implicit_weight[ref0][ref1] = 32;
3263 * Mark a picture as no longer needed for reference. The refmask
3264 * argument allows unreferencing of individual fields or the whole frame.
3265 * If the picture becomes entirely unreferenced, but is being held for
3266 * display purposes, it is marked as such.
3267 * @param refmask mask of fields to unreference; the mask is bitwise
3268 * anded with the reference marking of pic
3269 * @return non-zero if pic becomes entirely unreferenced (except possibly
3270 * for display purposes) zero if one of the fields remains in
3273 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3275 if (pic->reference &= refmask) {
3278 for(i = 0; h->delayed_pic[i]; i++)
3279 if(pic == h->delayed_pic[i]){
3280 pic->reference=DELAYED_PIC_REF;
3288 * instantaneous decoder refresh.
3290 static void idr(H264Context *h){
3293 for(i=0; i<16; i++){
3294 if (h->long_ref[i] != NULL) {
3295 unreference_pic(h, h->long_ref[i], 0);
3296 h->long_ref[i]= NULL;
3299 h->long_ref_count=0;
3301 for(i=0; i<h->short_ref_count; i++){
3302 unreference_pic(h, h->short_ref[i], 0);
3303 h->short_ref[i]= NULL;
3305 h->short_ref_count=0;
3308 /* forget old pics after a seek */
3309 static void flush_dpb(AVCodecContext *avctx){
3310 H264Context *h= avctx->priv_data;
3312 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3313 if(h->delayed_pic[i])
3314 h->delayed_pic[i]->reference= 0;
3315 h->delayed_pic[i]= NULL;
3317 h->outputed_poc= INT_MIN;
3319 if(h->s.current_picture_ptr)
3320 h->s.current_picture_ptr->reference= 0;
3321 h->s.first_field= 0;
3322 ff_mpeg_flush(avctx);
3326 * Find a Picture in the short term reference list by frame number.
3327 * @param frame_num frame number to search for
3328 * @param idx the index into h->short_ref where returned picture is found
3329 * undefined if no picture found.
3330 * @return pointer to the found picture, or NULL if no pic with the provided
3331 * frame number is found
3333 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3334 MpegEncContext * const s = &h->s;
3337 for(i=0; i<h->short_ref_count; i++){
3338 Picture *pic= h->short_ref[i];
3339 if(s->avctx->debug&FF_DEBUG_MMCO)
3340 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3341 if(pic->frame_num == frame_num) {
3350 * Remove a picture from the short term reference list by its index in
3351 * that list. This does no checking on the provided index; it is assumed
3352 * to be valid. Other list entries are shifted down.
3353 * @param i index into h->short_ref of picture to remove.
3355 static void remove_short_at_index(H264Context *h, int i){
3356 assert(i >= 0 && i < h->short_ref_count);
3357 h->short_ref[i]= NULL;
3358 if (--h->short_ref_count)
3359 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3364 * @return the removed picture or NULL if an error occurs
3366 static Picture * remove_short(H264Context *h, int frame_num){
3367 MpegEncContext * const s = &h->s;
3371 if(s->avctx->debug&FF_DEBUG_MMCO)
3372 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3374 pic = find_short(h, frame_num, &i);
3376 remove_short_at_index(h, i);
3382 * Remove a picture from the long term reference list by its index in
3383 * that list. This does no checking on the provided index; it is assumed
3384 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3385 * @param i index into h->long_ref of picture to remove.
3387 static void remove_long_at_index(H264Context *h, int i){
3388 h->long_ref[i]= NULL;
3389 h->long_ref_count--;
3394 * @return the removed picture or NULL if an error occurs
3396 static Picture * remove_long(H264Context *h, int i){
3399 pic= h->long_ref[i];
3401 remove_long_at_index(h, i);
3407 * print short term list
3409 static void print_short_term(H264Context *h) {
3411 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3412 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3413 for(i=0; i<h->short_ref_count; i++){
3414 Picture *pic= h->short_ref[i];
3415 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3421 * print long term list
3423 static void print_long_term(H264Context *h) {
3425 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3426 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3427 for(i = 0; i < 16; i++){
3428 Picture *pic= h->long_ref[i];
3430 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3437 * Executes the reference picture marking (memory management control operations).
3439 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3440 MpegEncContext * const s = &h->s;
3442 int current_ref_assigned=0;
3445 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3446 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3448 for(i=0; i<mmco_count; i++){
3449 int structure, frame_num, unref_pic;
3450 if(s->avctx->debug&FF_DEBUG_MMCO)
3451 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3453 switch(mmco[i].opcode){
3454 case MMCO_SHORT2UNUSED:
3455 if(s->avctx->debug&FF_DEBUG_MMCO)
3456 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3457 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3458 pic = find_short(h, frame_num, &j);
3460 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3461 remove_short_at_index(h, j);
3462 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3463 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3465 case MMCO_SHORT2LONG:
3466 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3467 h->long_ref[mmco[i].long_arg]->frame_num ==
3468 mmco[i].short_pic_num / 2) {
3469 /* do nothing, we've already moved this field pair. */
3471 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3473 pic= remove_long(h, mmco[i].long_arg);
3474 if(pic) unreference_pic(h, pic, 0);
3476 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3477 if (h->long_ref[ mmco[i].long_arg ]){
3478 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3479 h->long_ref_count++;
3483 case MMCO_LONG2UNUSED:
3484 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3485 pic = h->long_ref[j];
3487 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3488 remove_long_at_index(h, j);
3489 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3490 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3494 if (FIELD_PICTURE && !s->first_field) {
3495 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3496 /* Just mark second field as referenced */
3498 } else if (s->current_picture_ptr->reference) {
3499 /* First field in pair is in short term list or
3500 * at a different long term index.
3501 * This is not allowed; see 7.4.3, notes 2 and 3.
3502 * Report the problem and keep the pair where it is,
3503 * and mark this field valid.
3505 av_log(h->s.avctx, AV_LOG_ERROR,
3506 "illegal long term reference assignment for second "
3507 "field in complementary field pair (first field is "
3508 "short term or has non-matching long index)\n");
3514 pic= remove_long(h, mmco[i].long_arg);
3515 if(pic) unreference_pic(h, pic, 0);
3517 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3518 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3519 h->long_ref_count++;
3522 s->current_picture_ptr->reference |= s->picture_structure;
3523 current_ref_assigned=1;
3525 case MMCO_SET_MAX_LONG:
3526 assert(mmco[i].long_arg <= 16);
3527 // just remove the long term which index is greater than new max
3528 for(j = mmco[i].long_arg; j<16; j++){
3529 pic = remove_long(h, j);
3530 if (pic) unreference_pic(h, pic, 0);
3534 while(h->short_ref_count){
3535 pic= remove_short(h, h->short_ref[0]->frame_num);
3536 if(pic) unreference_pic(h, pic, 0);
3538 for(j = 0; j < 16; j++) {
3539 pic= remove_long(h, j);
3540 if(pic) unreference_pic(h, pic, 0);
3542 s->current_picture_ptr->poc=
3543 s->current_picture_ptr->field_poc[0]=
3544 s->current_picture_ptr->field_poc[1]=
3548 s->current_picture_ptr->frame_num= 0;
3554 if (!current_ref_assigned && FIELD_PICTURE &&
3555 !s->first_field && s->current_picture_ptr->reference) {
3557 /* Second field of complementary field pair; the first field of
3558 * which is already referenced. If short referenced, it
3559 * should be first entry in short_ref. If not, it must exist
3560 * in long_ref; trying to put it on the short list here is an
3561 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3563 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3564 /* Just mark the second field valid */
3565 s->current_picture_ptr->reference = PICT_FRAME;
3566 } else if (s->current_picture_ptr->long_ref) {
3567 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3568 "assignment for second field "
3569 "in complementary field pair "
3570 "(first field is long term)\n");
3573 * First field in reference, but not in any sensible place on our
3574 * reference lists. This shouldn't happen unless reference
3575 * handling somewhere else is wrong.
3579 current_ref_assigned = 1;
3582 if(!current_ref_assigned){
3583 pic= remove_short(h, s->current_picture_ptr->frame_num);
3585 unreference_pic(h, pic, 0);
3586 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3589 if(h->short_ref_count)
3590 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3592 h->short_ref[0]= s->current_picture_ptr;
3593 h->short_ref[0]->long_ref=0;
3594 h->short_ref_count++;
3595 s->current_picture_ptr->reference |= s->picture_structure;
3598 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3600 /* We have too many reference frames, probably due to corrupted
3601 * stream. Need to discard one frame. Prevents overrun of the
3602 * short_ref and long_ref buffers.
3604 av_log(h->s.avctx, AV_LOG_ERROR,
3605 "number of reference frames exceeds max (probably "
3606 "corrupt input), discarding one\n");
3608 if (h->long_ref_count && !h->short_ref_count) {
3609 for (i = 0; i < 16; ++i)
3614 pic = h->long_ref[i];
3615 remove_long_at_index(h, i);
3617 pic = h->short_ref[h->short_ref_count - 1];
3618 remove_short_at_index(h, h->short_ref_count - 1);
3620 unreference_pic(h, pic, 0);
3623 print_short_term(h);
3628 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3629 MpegEncContext * const s = &h->s;
3632 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3633 s->broken_link= get_bits1(gb) -1;
3634 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3635 if(h->mmco[0].long_arg == -1)
3638 h->mmco[0].opcode= MMCO_LONG;
3642 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3643 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3644 MMCOOpcode opcode= get_ue_golomb(gb);
3646 h->mmco[i].opcode= opcode;
3647 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3648 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3649 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3650 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3654 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3655 unsigned int long_arg= get_ue_golomb(gb);
3656 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3657 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3660 h->mmco[i].long_arg= long_arg;
3663 if(opcode > (unsigned)MMCO_LONG){
3664 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3667 if(opcode == MMCO_END)
3672 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3674 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3675 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3676 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3677 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3679 if (FIELD_PICTURE) {
3680 h->mmco[0].short_pic_num *= 2;
3681 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3682 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3693 static int init_poc(H264Context *h){
3694 MpegEncContext * const s = &h->s;
3695 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3698 if(h->nal_unit_type == NAL_IDR_SLICE){
3699 h->frame_num_offset= 0;
3701 if(h->frame_num < h->prev_frame_num)
3702 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3704 h->frame_num_offset= h->prev_frame_num_offset;
3707 if(h->sps.poc_type==0){
3708 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3710 if(h->nal_unit_type == NAL_IDR_SLICE){
3715 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3716 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3717 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3718 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3720 h->poc_msb = h->prev_poc_msb;
3721 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3723 field_poc[1] = h->poc_msb + h->poc_lsb;
3724 if(s->picture_structure == PICT_FRAME)
3725 field_poc[1] += h->delta_poc_bottom;
3726 }else if(h->sps.poc_type==1){
3727 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3730 if(h->sps.poc_cycle_length != 0)
3731 abs_frame_num = h->frame_num_offset + h->frame_num;
3735 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3738 expected_delta_per_poc_cycle = 0;
3739 for(i=0; i < h->sps.poc_cycle_length; i++)
3740 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3742 if(abs_frame_num > 0){
3743 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3744 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3746 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3747 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3748 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3752 if(h->nal_ref_idc == 0)
3753 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3755 field_poc[0] = expectedpoc + h->delta_poc[0];
3756 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3758 if(s->picture_structure == PICT_FRAME)
3759 field_poc[1] += h->delta_poc[1];
3762 if(h->nal_unit_type == NAL_IDR_SLICE){
3765 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3766 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3772 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3773 s->current_picture_ptr->field_poc[0]= field_poc[0];
3774 s->current_picture_ptr->poc = field_poc[0];
3776 if(s->picture_structure != PICT_TOP_FIELD) {
3777 s->current_picture_ptr->field_poc[1]= field_poc[1];
3778 s->current_picture_ptr->poc = field_poc[1];
3780 if(!FIELD_PICTURE || !s->first_field) {
3781 Picture *cur = s->current_picture_ptr;
3782 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3790 * initialize scan tables
3792 static void init_scan_tables(H264Context *h){
3793 MpegEncContext * const s = &h->s;
3795 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3796 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3797 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3799 for(i=0; i<16; i++){
3800 #define T(x) (x>>2) | ((x<<2) & 0xF)
3801 h->zigzag_scan[i] = T(zigzag_scan[i]);
3802 h-> field_scan[i] = T( field_scan[i]);
3806 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3807 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3808 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3809 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3810 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3812 for(i=0; i<64; i++){
3813 #define T(x) (x>>3) | ((x&7)<<3)
3814 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3815 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3816 h->field_scan8x8[i] = T(field_scan8x8[i]);
3817 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3821 if(h->sps.transform_bypass){ //FIXME same ugly
3822 h->zigzag_scan_q0 = zigzag_scan;
3823 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3824 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3825 h->field_scan_q0 = field_scan;
3826 h->field_scan8x8_q0 = field_scan8x8;
3827 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3829 h->zigzag_scan_q0 = h->zigzag_scan;
3830 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3831 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3832 h->field_scan_q0 = h->field_scan;
3833 h->field_scan8x8_q0 = h->field_scan8x8;
3834 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3839 * Replicates H264 "master" context to thread contexts.
3841 static void clone_slice(H264Context *dst, H264Context *src)
3843 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3844 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3845 dst->s.current_picture = src->s.current_picture;
3846 dst->s.linesize = src->s.linesize;
3847 dst->s.uvlinesize = src->s.uvlinesize;
3848 dst->s.first_field = src->s.first_field;
3850 dst->prev_poc_msb = src->prev_poc_msb;
3851 dst->prev_poc_lsb = src->prev_poc_lsb;
3852 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3853 dst->prev_frame_num = src->prev_frame_num;
3854 dst->short_ref_count = src->short_ref_count;
3856 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3857 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3858 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3859 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3861 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3862 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3866 * decodes a slice header.
3867 * This will also call MPV_common_init() and frame_start() as needed.
3869 * @param h h264context
3870 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3872 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3874 static int decode_slice_header(H264Context *h, H264Context *h0){
3875 MpegEncContext * const s = &h->s;
3876 MpegEncContext * const s0 = &h0->s;
3877 unsigned int first_mb_in_slice;
3878 unsigned int pps_id;
3879 int num_ref_idx_active_override_flag;
3880 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3881 unsigned int slice_type, tmp, i, j;
3882 int default_ref_list_done = 0;
3883 int last_pic_structure;
3885 s->dropable= h->nal_ref_idc == 0;
3887 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3888 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3889 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3891 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3892 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3895 first_mb_in_slice= get_ue_golomb(&s->gb);
3897 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3898 h0->current_slice = 0;
3899 if (!s0->first_field)
3900 s->current_picture_ptr= NULL;
3903 slice_type= get_ue_golomb(&s->gb);
3905 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3910 h->slice_type_fixed=1;
3912 h->slice_type_fixed=0;
3914 slice_type= slice_type_map[ slice_type ];
3915 if (slice_type == FF_I_TYPE
3916 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3917 default_ref_list_done = 1;
3919 h->slice_type= slice_type;
3920 h->slice_type_nos= slice_type & 3;
3922 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3923 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3924 av_log(h->s.avctx, AV_LOG_ERROR,
3925 "B picture before any references, skipping\n");
3929 pps_id= get_ue_golomb(&s->gb);
3930 if(pps_id>=MAX_PPS_COUNT){
3931 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3934 if(!h0->pps_buffers[pps_id]) {
3935 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3938 h->pps= *h0->pps_buffers[pps_id];
3940 if(!h0->sps_buffers[h->pps.sps_id]) {
3941 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3944 h->sps = *h0->sps_buffers[h->pps.sps_id];
3946 if(h == h0 && h->dequant_coeff_pps != pps_id){
3947 h->dequant_coeff_pps = pps_id;
3948 init_dequant_tables(h);
3951 s->mb_width= h->sps.mb_width;
3952 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3954 h->b_stride= s->mb_width*4;
3955 h->b8_stride= s->mb_width*2;
3957 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3958 if(h->sps.frame_mbs_only_flag)
3959 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3961 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3963 if (s->context_initialized
3964 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3966 return -1; // width / height changed during parallelized decoding
3970 if (!s->context_initialized) {
3972 return -1; // we cant (re-)initialize context during parallel decoding
3973 if (MPV_common_init(s) < 0)
3977 init_scan_tables(h);
3980 for(i = 1; i < s->avctx->thread_count; i++) {
3982 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3983 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3984 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3987 init_scan_tables(c);
3991 for(i = 0; i < s->avctx->thread_count; i++)
3992 if(context_init(h->thread_context[i]) < 0)
3995 s->avctx->width = s->width;
3996 s->avctx->height = s->height;
3997 s->avctx->sample_aspect_ratio= h->sps.sar;
3998 if(!s->avctx->sample_aspect_ratio.den)
3999 s->avctx->sample_aspect_ratio.den = 1;
4001 if(h->sps.timing_info_present_flag){
4002 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4003 if(h->x264_build > 0 && h->x264_build < 44)
4004 s->avctx->time_base.den *= 2;
4005 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4006 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4010 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4013 h->mb_aff_frame = 0;
4014 last_pic_structure = s0->picture_structure;
4015 if(h->sps.frame_mbs_only_flag){
4016 s->picture_structure= PICT_FRAME;
4018 if(get_bits1(&s->gb)) { //field_pic_flag
4019 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4021 s->picture_structure= PICT_FRAME;
4022 h->mb_aff_frame = h->sps.mb_aff;
4026 if(h0->current_slice == 0){
4027 while(h->frame_num != h->prev_frame_num &&
4028 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
4029 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
4031 h->prev_frame_num++;
4032 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
4033 s->current_picture_ptr->frame_num= h->prev_frame_num;
4034 execute_ref_pic_marking(h, NULL, 0);
4037 /* See if we have a decoded first field looking for a pair... */
4038 if (s0->first_field) {
4039 assert(s0->current_picture_ptr);
4040 assert(s0->current_picture_ptr->data[0]);
4041 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4043 /* figure out if we have a complementary field pair */
4044 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4046 * Previous field is unmatched. Don't display it, but let it
4047 * remain for reference if marked as such.
4049 s0->current_picture_ptr = NULL;
4050 s0->first_field = FIELD_PICTURE;
4053 if (h->nal_ref_idc &&
4054 s0->current_picture_ptr->reference &&
4055 s0->current_picture_ptr->frame_num != h->frame_num) {
4057 * This and previous field were reference, but had
4058 * different frame_nums. Consider this field first in
4059 * pair. Throw away previous field except for reference
4062 s0->first_field = 1;
4063 s0->current_picture_ptr = NULL;
4066 /* Second field in complementary pair */
4067 s0->first_field = 0;
4072 /* Frame or first field in a potentially complementary pair */
4073 assert(!s0->current_picture_ptr);
4074 s0->first_field = FIELD_PICTURE;
4077 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4078 s0->first_field = 0;
4085 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4087 assert(s->mb_num == s->mb_width * s->mb_height);
4088 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4089 first_mb_in_slice >= s->mb_num){
4090 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4093 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4094 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4095 if (s->picture_structure == PICT_BOTTOM_FIELD)
4096 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4097 assert(s->mb_y < s->mb_height);
4099 if(s->picture_structure==PICT_FRAME){
4100 h->curr_pic_num= h->frame_num;
4101 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4103 h->curr_pic_num= 2*h->frame_num + 1;
4104 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4107 if(h->nal_unit_type == NAL_IDR_SLICE){
4108 get_ue_golomb(&s->gb); /* idr_pic_id */
4111 if(h->sps.poc_type==0){
4112 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4114 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4115 h->delta_poc_bottom= get_se_golomb(&s->gb);
4119 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4120 h->delta_poc[0]= get_se_golomb(&s->gb);
4122 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4123 h->delta_poc[1]= get_se_golomb(&s->gb);
4128 if(h->pps.redundant_pic_cnt_present){
4129 h->redundant_pic_count= get_ue_golomb(&s->gb);
4132 //set defaults, might be overriden a few line later
4133 h->ref_count[0]= h->pps.ref_count[0];
4134 h->ref_count[1]= h->pps.ref_count[1];
4136 if(h->slice_type_nos != FF_I_TYPE){
4137 if(h->slice_type_nos == FF_B_TYPE){
4138 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4140 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4142 if(num_ref_idx_active_override_flag){
4143 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4144 if(h->slice_type_nos==FF_B_TYPE)
4145 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4147 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4148 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4149 h->ref_count[0]= h->ref_count[1]= 1;
4153 if(h->slice_type_nos == FF_B_TYPE)
4160 if(!default_ref_list_done){
4161 fill_default_ref_list(h);
4164 if(decode_ref_pic_list_reordering(h) < 0)
4167 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4168 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4169 pred_weight_table(h);
4170 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4171 implicit_weight_table(h);
4176 decode_ref_pic_marking(h0, &s->gb);
4179 fill_mbaff_ref_list(h);
4181 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4182 tmp = get_ue_golomb(&s->gb);
4184 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4187 h->cabac_init_idc= tmp;
4190 h->last_qscale_diff = 0;
4191 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4193 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4197 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4198 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4199 //FIXME qscale / qp ... stuff
4200 if(h->slice_type == FF_SP_TYPE){
4201 get_bits1(&s->gb); /* sp_for_switch_flag */
4203 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4204 get_se_golomb(&s->gb); /* slice_qs_delta */
4207 h->deblocking_filter = 1;
4208 h->slice_alpha_c0_offset = 0;
4209 h->slice_beta_offset = 0;
4210 if( h->pps.deblocking_filter_parameters_present ) {
4211 tmp= get_ue_golomb(&s->gb);
4213 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4216 h->deblocking_filter= tmp;
4217 if(h->deblocking_filter < 2)
4218 h->deblocking_filter^= 1; // 1<->0
4220 if( h->deblocking_filter ) {
4221 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4222 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4226 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4227 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4228 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4229 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4230 h->deblocking_filter= 0;
4232 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4233 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4234 /* Cheat slightly for speed:
4235 Do not bother to deblock across slices. */
4236 h->deblocking_filter = 2;
4238 h0->max_contexts = 1;
4239 if(!h0->single_decode_warning) {
4240 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4241 h0->single_decode_warning = 1;
4244 return 1; // deblocking switched inside frame
4249 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4250 slice_group_change_cycle= get_bits(&s->gb, ?);
4253 h0->last_slice_type = slice_type;
4254 h->slice_num = ++h0->current_slice;
4257 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4261 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4262 +(h->ref_list[j][i].reference&3);
4265 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4266 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4268 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4269 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4271 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4273 av_get_pict_type_char(h->slice_type),
4274 pps_id, h->frame_num,
4275 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4276 h->ref_count[0], h->ref_count[1],
4278 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4280 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4281 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4291 static inline int get_level_prefix(GetBitContext *gb){
4295 OPEN_READER(re, gb);
4296 UPDATE_CACHE(re, gb);
4297 buf=GET_CACHE(re, gb);
4299 log= 32 - av_log2(buf);
4301 print_bin(buf>>(32-log), log);
4302 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4305 LAST_SKIP_BITS(re, gb, log);
4306 CLOSE_READER(re, gb);
4311 static inline int get_dct8x8_allowed(H264Context *h){
4314 if(!IS_SUB_8X8(h->sub_mb_type[i])
4315 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4322 * decodes a residual block.
4323 * @param n block index
4324 * @param scantable scantable
4325 * @param max_coeff number of coefficients in the block
4326 * @return <0 if an error occurred
4328 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4329 MpegEncContext * const s = &h->s;
4330 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4332 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4334 //FIXME put trailing_onex into the context
4336 if(n == CHROMA_DC_BLOCK_INDEX){
4337 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4338 total_coeff= coeff_token>>2;
4340 if(n == LUMA_DC_BLOCK_INDEX){
4341 total_coeff= pred_non_zero_count(h, 0);
4342 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4343 total_coeff= coeff_token>>2;
4345 total_coeff= pred_non_zero_count(h, n);
4346 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4347 total_coeff= coeff_token>>2;
4348 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4352 //FIXME set last_non_zero?
4356 if(total_coeff > (unsigned)max_coeff) {
4357 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4361 trailing_ones= coeff_token&3;
4362 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4363 assert(total_coeff<=16);
4365 for(i=0; i<trailing_ones; i++){
4366 level[i]= 1 - 2*get_bits1(gb);
4370 int level_code, mask;
4371 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4372 int prefix= get_level_prefix(gb);
4374 //first coefficient has suffix_length equal to 0 or 1
4375 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4377 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4379 level_code= (prefix<<suffix_length); //part
4380 }else if(prefix==14){
4382 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4384 level_code= prefix + get_bits(gb, 4); //part
4386 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4387 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4389 level_code += (1<<(prefix-3))-4096;
4392 if(trailing_ones < 3) level_code += 2;
4397 mask= -(level_code&1);
4398 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4401 //remaining coefficients have suffix_length > 0
4402 for(;i<total_coeff;i++) {
4403 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4404 prefix = get_level_prefix(gb);
4406 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4408 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4410 level_code += (1<<(prefix-3))-4096;
4412 mask= -(level_code&1);
4413 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4414 if(level_code > suffix_limit[suffix_length])
4419 if(total_coeff == max_coeff)
4422 if(n == CHROMA_DC_BLOCK_INDEX)
4423 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4425 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4428 coeff_num = zeros_left + total_coeff - 1;
4429 j = scantable[coeff_num];
4431 block[j] = level[0];
4432 for(i=1;i<total_coeff;i++) {
4435 else if(zeros_left < 7){
4436 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4438 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4440 zeros_left -= run_before;
4441 coeff_num -= 1 + run_before;
4442 j= scantable[ coeff_num ];
4447 block[j] = (level[0] * qmul[j] + 32)>>6;
4448 for(i=1;i<total_coeff;i++) {
4451 else if(zeros_left < 7){
4452 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4454 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4456 zeros_left -= run_before;
4457 coeff_num -= 1 + run_before;
4458 j= scantable[ coeff_num ];
4460 block[j]= (level[i] * qmul[j] + 32)>>6;
4465 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4472 static void predict_field_decoding_flag(H264Context *h){
4473 MpegEncContext * const s = &h->s;
4474 const int mb_xy= h->mb_xy;
4475 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4476 ? s->current_picture.mb_type[mb_xy-1]
4477 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4478 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4480 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4484 * decodes a P_SKIP or B_SKIP macroblock
4486 static void decode_mb_skip(H264Context *h){
4487 MpegEncContext * const s = &h->s;
4488 const int mb_xy= h->mb_xy;
4491 memset(h->non_zero_count[mb_xy], 0, 16);
4492 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4495 mb_type|= MB_TYPE_INTERLACED;
4497 if( h->slice_type_nos == FF_B_TYPE )
4499 // just for fill_caches. pred_direct_motion will set the real mb_type
4500 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4502 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4503 pred_direct_motion(h, &mb_type);
4504 mb_type|= MB_TYPE_SKIP;
4509 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4511 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4512 pred_pskip_motion(h, &mx, &my);
4513 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4514 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4517 write_back_motion(h, mb_type);
4518 s->current_picture.mb_type[mb_xy]= mb_type;
4519 s->current_picture.qscale_table[mb_xy]= s->qscale;
4520 h->slice_table[ mb_xy ]= h->slice_num;
4521 h->prev_mb_skipped= 1;
4525 * decodes a macroblock
4526 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4528 static int decode_mb_cavlc(H264Context *h){
4529 MpegEncContext * const s = &h->s;
4531 int partition_count;
4532 unsigned int mb_type, cbp;
4533 int dct8x8_allowed= h->pps.transform_8x8_mode;
4535 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4537 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4539 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4540 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4542 if(h->slice_type_nos != FF_I_TYPE){
4543 if(s->mb_skip_run==-1)
4544 s->mb_skip_run= get_ue_golomb(&s->gb);
4546 if (s->mb_skip_run--) {
4547 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4548 if(s->mb_skip_run==0)
4549 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4551 predict_field_decoding_flag(h);
4558 if( (s->mb_y&1) == 0 )
4559 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4561 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4563 h->prev_mb_skipped= 0;
4565 mb_type= get_ue_golomb(&s->gb);
4566 if(h->slice_type_nos == FF_B_TYPE){
4568 partition_count= b_mb_type_info[mb_type].partition_count;
4569 mb_type= b_mb_type_info[mb_type].type;
4572 goto decode_intra_mb;
4574 }else if(h->slice_type_nos == FF_P_TYPE){
4576 partition_count= p_mb_type_info[mb_type].partition_count;
4577 mb_type= p_mb_type_info[mb_type].type;
4580 goto decode_intra_mb;
4583 assert(h->slice_type_nos == FF_I_TYPE);
4584 if(h->slice_type == FF_SI_TYPE && mb_type)
4588 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4592 cbp= i_mb_type_info[mb_type].cbp;
4593 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4594 mb_type= i_mb_type_info[mb_type].type;
4598 mb_type |= MB_TYPE_INTERLACED;
4600 h->slice_table[ mb_xy ]= h->slice_num;
4602 if(IS_INTRA_PCM(mb_type)){
4605 // We assume these blocks are very rare so we do not optimize it.
4606 align_get_bits(&s->gb);
4608 // The pixels are stored in the same order as levels in h->mb array.
4609 for(y=0; y<16; y++){
4610 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4611 for(x=0; x<16; x++){
4612 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4613 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4617 const int index= 256 + 4*(y&3) + 32*(y>>2);
4619 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4620 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4624 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4626 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4627 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4631 // In deblocking, the quantizer is 0
4632 s->current_picture.qscale_table[mb_xy]= 0;
4633 // All coeffs are present
4634 memset(h->non_zero_count[mb_xy], 16, 16);
4636 s->current_picture.mb_type[mb_xy]= mb_type;
4641 h->ref_count[0] <<= 1;
4642 h->ref_count[1] <<= 1;
4645 fill_caches(h, mb_type, 0);
4648 if(IS_INTRA(mb_type)){
4650 // init_top_left_availability(h);
4651 if(IS_INTRA4x4(mb_type)){
4654 if(dct8x8_allowed && get_bits1(&s->gb)){
4655 mb_type |= MB_TYPE_8x8DCT;
4659 // fill_intra4x4_pred_table(h);
4660 for(i=0; i<16; i+=di){
4661 int mode= pred_intra_mode(h, i);
4663 if(!get_bits1(&s->gb)){
4664 const int rem_mode= get_bits(&s->gb, 3);
4665 mode = rem_mode + (rem_mode >= mode);
4669 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4671 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4673 write_back_intra_pred_mode(h);
4674 if( check_intra4x4_pred_mode(h) < 0)
4677 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4678 if(h->intra16x16_pred_mode < 0)
4682 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4685 h->chroma_pred_mode= pred_mode;
4686 }else if(partition_count==4){
4687 int i, j, sub_partition_count[4], list, ref[2][4];
4689 if(h->slice_type_nos == FF_B_TYPE){
4691 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4692 if(h->sub_mb_type[i] >=13){
4693 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4696 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4697 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4699 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4700 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4701 pred_direct_motion(h, &mb_type);
4702 h->ref_cache[0][scan8[4]] =
4703 h->ref_cache[1][scan8[4]] =
4704 h->ref_cache[0][scan8[12]] =
4705 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4708 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4710 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4711 if(h->sub_mb_type[i] >=4){
4712 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4715 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4716 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4720 for(list=0; list<h->list_count; list++){
4721 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4723 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4724 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4725 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4727 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4739 dct8x8_allowed = get_dct8x8_allowed(h);
4741 for(list=0; list<h->list_count; list++){
4743 if(IS_DIRECT(h->sub_mb_type[i])) {
4744 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4747 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4748 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4750 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4751 const int sub_mb_type= h->sub_mb_type[i];
4752 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4753 for(j=0; j<sub_partition_count[i]; j++){
4755 const int index= 4*i + block_width*j;
4756 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4757 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4758 mx += get_se_golomb(&s->gb);
4759 my += get_se_golomb(&s->gb);
4760 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4762 if(IS_SUB_8X8(sub_mb_type)){
4764 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4766 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4767 }else if(IS_SUB_8X4(sub_mb_type)){
4768 mv_cache[ 1 ][0]= mx;
4769 mv_cache[ 1 ][1]= my;
4770 }else if(IS_SUB_4X8(sub_mb_type)){
4771 mv_cache[ 8 ][0]= mx;
4772 mv_cache[ 8 ][1]= my;
4774 mv_cache[ 0 ][0]= mx;
4775 mv_cache[ 0 ][1]= my;
4778 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4784 }else if(IS_DIRECT(mb_type)){
4785 pred_direct_motion(h, &mb_type);
4786 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4788 int list, mx, my, i;
4789 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4790 if(IS_16X16(mb_type)){
4791 for(list=0; list<h->list_count; list++){
4793 if(IS_DIR(mb_type, 0, list)){
4794 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4795 if(val >= h->ref_count[list]){
4796 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4800 val= LIST_NOT_USED&0xFF;
4801 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4803 for(list=0; list<h->list_count; list++){
4805 if(IS_DIR(mb_type, 0, list)){
4806 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4807 mx += get_se_golomb(&s->gb);
4808 my += get_se_golomb(&s->gb);
4809 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4811 val= pack16to32(mx,my);
4814 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4817 else if(IS_16X8(mb_type)){
4818 for(list=0; list<h->list_count; list++){
4821 if(IS_DIR(mb_type, i, list)){
4822 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4823 if(val >= h->ref_count[list]){
4824 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4828 val= LIST_NOT_USED&0xFF;
4829 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4832 for(list=0; list<h->list_count; list++){
4835 if(IS_DIR(mb_type, i, list)){
4836 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4837 mx += get_se_golomb(&s->gb);
4838 my += get_se_golomb(&s->gb);
4839 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4841 val= pack16to32(mx,my);
4844 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4848 assert(IS_8X16(mb_type));
4849 for(list=0; list<h->list_count; list++){
4852 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4853 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4854 if(val >= h->ref_count[list]){
4855 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4859 val= LIST_NOT_USED&0xFF;
4860 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4863 for(list=0; list<h->list_count; list++){
4866 if(IS_DIR(mb_type, i, list)){
4867 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4868 mx += get_se_golomb(&s->gb);
4869 my += get_se_golomb(&s->gb);
4870 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4872 val= pack16to32(mx,my);
4875 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4881 if(IS_INTER(mb_type))
4882 write_back_motion(h, mb_type);
4884 if(!IS_INTRA16x16(mb_type)){
4885 cbp= get_ue_golomb(&s->gb);
4887 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4891 if(IS_INTRA4x4(mb_type))
4892 cbp= golomb_to_intra4x4_cbp[cbp];
4894 cbp= golomb_to_inter_cbp[cbp];
4898 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4899 if(get_bits1(&s->gb))
4900 mb_type |= MB_TYPE_8x8DCT;
4902 s->current_picture.mb_type[mb_xy]= mb_type;
4904 if(cbp || IS_INTRA16x16(mb_type)){
4905 int i8x8, i4x4, chroma_idx;
4907 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4908 const uint8_t *scan, *scan8x8, *dc_scan;
4910 // fill_non_zero_count_cache(h);
4912 if(IS_INTERLACED(mb_type)){
4913 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4914 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4915 dc_scan= luma_dc_field_scan;
4917 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4918 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4919 dc_scan= luma_dc_zigzag_scan;
4922 dquant= get_se_golomb(&s->gb);
4924 if( dquant > 25 || dquant < -26 ){
4925 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4929 s->qscale += dquant;
4930 if(((unsigned)s->qscale) > 51){
4931 if(s->qscale<0) s->qscale+= 52;
4932 else s->qscale-= 52;
4935 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4936 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4937 if(IS_INTRA16x16(mb_type)){
4938 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4939 return -1; //FIXME continue if partitioned and other return -1 too
4942 assert((cbp&15) == 0 || (cbp&15) == 15);
4945 for(i8x8=0; i8x8<4; i8x8++){
4946 for(i4x4=0; i4x4<4; i4x4++){
4947 const int index= i4x4 + 4*i8x8;
4948 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4954 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4957 for(i8x8=0; i8x8<4; i8x8++){
4958 if(cbp & (1<<i8x8)){
4959 if(IS_8x8DCT(mb_type)){
4960 DCTELEM *buf = &h->mb[64*i8x8];
4962 for(i4x4=0; i4x4<4; i4x4++){
4963 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4964 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4967 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4968 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4970 for(i4x4=0; i4x4<4; i4x4++){
4971 const int index= i4x4 + 4*i8x8;
4973 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4979 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4980 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4986 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4987 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4993 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4994 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4995 for(i4x4=0; i4x4<4; i4x4++){
4996 const int index= 16 + 4*chroma_idx + i4x4;
4997 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
5003 uint8_t * const nnz= &h->non_zero_count_cache[0];
5004 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5005 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5008 uint8_t * const nnz= &h->non_zero_count_cache[0];
5009 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5010 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5011 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5013 s->current_picture.qscale_table[mb_xy]= s->qscale;
5014 write_back_non_zero_count(h);
5017 h->ref_count[0] >>= 1;
5018 h->ref_count[1] >>= 1;
5024 static int decode_cabac_field_decoding_flag(H264Context *h) {
5025 MpegEncContext * const s = &h->s;
5026 const int mb_x = s->mb_x;
5027 const int mb_y = s->mb_y & ~1;
5028 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5029 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5031 unsigned int ctx = 0;
5033 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5036 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5040 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5043 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5044 uint8_t *state= &h->cabac_state[ctx_base];
5048 MpegEncContext * const s = &h->s;
5049 const int mba_xy = h->left_mb_xy[0];
5050 const int mbb_xy = h->top_mb_xy;
5052 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5054 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5056 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5057 return 0; /* I4x4 */
5060 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5061 return 0; /* I4x4 */
5064 if( get_cabac_terminate( &h->cabac ) )
5065 return 25; /* PCM */
5067 mb_type = 1; /* I16x16 */
5068 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5069 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5070 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5071 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5072 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5076 static int decode_cabac_mb_type( H264Context *h ) {
5077 MpegEncContext * const s = &h->s;
5079 if( h->slice_type_nos == FF_I_TYPE ) {
5080 return decode_cabac_intra_mb_type(h, 3, 1);
5081 } else if( h->slice_type_nos == FF_P_TYPE ) {
5082 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5084 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5085 /* P_L0_D16x16, P_8x8 */
5086 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5088 /* P_L0_D8x16, P_L0_D16x8 */
5089 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5092 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5094 } else if( h->slice_type_nos == FF_B_TYPE ) {
5095 const int mba_xy = h->left_mb_xy[0];
5096 const int mbb_xy = h->top_mb_xy;
5100 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5102 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5105 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5106 return 0; /* B_Direct_16x16 */
5108 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5109 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5112 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5113 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5114 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5115 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5117 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5118 else if( bits == 13 ) {
5119 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5120 } else if( bits == 14 )
5121 return 11; /* B_L1_L0_8x16 */
5122 else if( bits == 15 )
5123 return 22; /* B_8x8 */
5125 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5126 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5128 /* TODO SI/SP frames? */
5133 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5134 MpegEncContext * const s = &h->s;
5138 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5139 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5142 && h->slice_table[mba_xy] == h->slice_num
5143 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5144 mba_xy += s->mb_stride;
5146 mbb_xy = mb_xy - s->mb_stride;
5148 && h->slice_table[mbb_xy] == h->slice_num
5149 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5150 mbb_xy -= s->mb_stride;
5152 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5154 int mb_xy = h->mb_xy;
5156 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5159 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5161 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5164 if( h->slice_type_nos == FF_B_TYPE )
5166 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5169 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5172 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5175 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5176 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5177 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5179 if( mode >= pred_mode )
5185 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5186 const int mba_xy = h->left_mb_xy[0];
5187 const int mbb_xy = h->top_mb_xy;
5191 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5192 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5195 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5198 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5201 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5203 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5209 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5210 int cbp_b, cbp_a, ctx, cbp = 0;
5212 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5213 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5215 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5216 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5217 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5218 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5219 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5220 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5221 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5222 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5225 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5229 cbp_a = (h->left_cbp>>4)&0x03;
5230 cbp_b = (h-> top_cbp>>4)&0x03;
5233 if( cbp_a > 0 ) ctx++;
5234 if( cbp_b > 0 ) ctx += 2;
5235 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5239 if( cbp_a == 2 ) ctx++;
5240 if( cbp_b == 2 ) ctx += 2;
5241 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5243 static int decode_cabac_mb_dqp( H264Context *h) {
5247 if( h->last_qscale_diff != 0 )
5250 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5256 if(val > 102) //prevent infinite loop
5263 return -(val + 1)/2;
5265 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5266 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5268 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5270 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5274 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5276 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5277 return 0; /* B_Direct_8x8 */
5278 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5279 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5281 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5282 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5283 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5286 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5287 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5291 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5292 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5295 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5296 int refa = h->ref_cache[list][scan8[n] - 1];
5297 int refb = h->ref_cache[list][scan8[n] - 8];
5301 if( h->slice_type_nos == FF_B_TYPE) {
5302 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5304 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5313 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5319 if(ref >= 32 /*h->ref_list[list]*/){
5320 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5321 return 0; //FIXME we should return -1 and check the return everywhere
5327 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5328 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5329 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5330 int ctxbase = (l == 0) ? 40 : 47;
5335 else if( amvd > 32 )
5340 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5345 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5353 while( get_cabac_bypass( &h->cabac ) ) {
5357 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5362 if( get_cabac_bypass( &h->cabac ) )
5366 return get_cabac_bypass_sign( &h->cabac, -mvd );
5369 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5375 nza = h->left_cbp&0x100;
5376 nzb = h-> top_cbp&0x100;
5378 nza = (h->left_cbp>>(6+idx))&0x01;
5379 nzb = (h-> top_cbp>>(6+idx))&0x01;
5383 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5384 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5386 assert(cat == 1 || cat == 2);
5387 nza = h->non_zero_count_cache[scan8[idx] - 1];
5388 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5398 return ctx + 4 * cat;
5401 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5402 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5403 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5404 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5405 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5408 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5409 static const int significant_coeff_flag_offset[2][6] = {
5410 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5411 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5413 static const int last_coeff_flag_offset[2][6] = {
5414 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5415 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5417 static const int coeff_abs_level_m1_offset[6] = {
5418 227+0, 227+10, 227+20, 227+30, 227+39, 426
5420 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5421 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5422 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5423 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5424 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5425 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5426 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5427 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5428 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5430 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5431 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5432 * map node ctx => cabac ctx for level=1 */
5433 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5434 /* map node ctx => cabac ctx for level>1 */
5435 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5436 static const uint8_t coeff_abs_level_transition[2][8] = {
5437 /* update node ctx after decoding a level=1 */
5438 { 1, 2, 3, 3, 4, 5, 6, 7 },
5439 /* update node ctx after decoding a level>1 */
5440 { 4, 4, 4, 4, 5, 6, 7, 7 }
5446 int coeff_count = 0;
5449 uint8_t *significant_coeff_ctx_base;
5450 uint8_t *last_coeff_ctx_base;
5451 uint8_t *abs_level_m1_ctx_base;
5454 #define CABAC_ON_STACK
5456 #ifdef CABAC_ON_STACK
5459 cc.range = h->cabac.range;
5460 cc.low = h->cabac.low;
5461 cc.bytestream= h->cabac.bytestream;
5463 #define CC &h->cabac
5467 /* cat: 0-> DC 16x16 n = 0
5468 * 1-> AC 16x16 n = luma4x4idx
5469 * 2-> Luma4x4 n = luma4x4idx
5470 * 3-> DC Chroma n = iCbCr
5471 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5472 * 5-> Luma8x8 n = 4 * luma8x8idx
5475 /* read coded block flag */
5476 if( is_dc || cat != 5 ) {
5477 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5480 h->non_zero_count_cache[scan8[16+n]] = 0;
5482 h->non_zero_count_cache[scan8[n]] = 0;
5485 #ifdef CABAC_ON_STACK
5486 h->cabac.range = cc.range ;
5487 h->cabac.low = cc.low ;
5488 h->cabac.bytestream= cc.bytestream;
5494 significant_coeff_ctx_base = h->cabac_state
5495 + significant_coeff_flag_offset[MB_FIELD][cat];
5496 last_coeff_ctx_base = h->cabac_state
5497 + last_coeff_flag_offset[MB_FIELD][cat];
5498 abs_level_m1_ctx_base = h->cabac_state
5499 + coeff_abs_level_m1_offset[cat];
5501 if( !is_dc && cat == 5 ) {
5502 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5503 for(last= 0; last < coefs; last++) { \
5504 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5505 if( get_cabac( CC, sig_ctx )) { \
5506 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5507 index[coeff_count++] = last; \
5508 if( get_cabac( CC, last_ctx ) ) { \
5514 if( last == max_coeff -1 ) {\
5515 index[coeff_count++] = last;\
5517 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5518 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5519 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5521 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5523 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5525 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5528 assert(coeff_count > 0);
5532 h->cbp_table[h->mb_xy] |= 0x100;
5534 h->cbp_table[h->mb_xy] |= 0x40 << n;
5537 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5539 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5541 assert( cat == 1 || cat == 2 );
5542 h->non_zero_count_cache[scan8[n]] = coeff_count;
5546 while( coeff_count-- ) {
5547 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5549 int j= scantable[index[coeff_count]];
5551 if( get_cabac( CC, ctx ) == 0 ) {
5552 node_ctx = coeff_abs_level_transition[0][node_ctx];
5554 block[j] = get_cabac_bypass_sign( CC, -1);
5556 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5560 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5561 node_ctx = coeff_abs_level_transition[1][node_ctx];
5563 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5567 if( coeff_abs >= 15 ) {
5569 while( get_cabac_bypass( CC ) ) {
5575 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5581 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5583 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5587 #ifdef CABAC_ON_STACK
5588 h->cabac.range = cc.range ;
5589 h->cabac.low = cc.low ;
5590 h->cabac.bytestream= cc.bytestream;
5595 #ifndef CONFIG_SMALL
5596 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5597 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5600 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5601 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5605 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5607 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5609 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5610 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5614 static inline void compute_mb_neighbors(H264Context *h)
5616 MpegEncContext * const s = &h->s;
5617 const int mb_xy = h->mb_xy;
5618 h->top_mb_xy = mb_xy - s->mb_stride;
5619 h->left_mb_xy[0] = mb_xy - 1;
5621 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5622 const int top_pair_xy = pair_xy - s->mb_stride;
5623 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5624 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5625 const int curr_mb_frame_flag = !MB_FIELD;
5626 const int bottom = (s->mb_y & 1);
5628 ? !curr_mb_frame_flag // bottom macroblock
5629 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5631 h->top_mb_xy -= s->mb_stride;
5633 if (left_mb_frame_flag != curr_mb_frame_flag) {
5634 h->left_mb_xy[0] = pair_xy - 1;
5636 } else if (FIELD_PICTURE) {
5637 h->top_mb_xy -= s->mb_stride;
5643 * decodes a macroblock
5644 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5646 static int decode_mb_cabac(H264Context *h) {
5647 MpegEncContext * const s = &h->s;
5649 int mb_type, partition_count, cbp = 0;
5650 int dct8x8_allowed= h->pps.transform_8x8_mode;
5652 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5654 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5656 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5657 if( h->slice_type_nos != FF_I_TYPE ) {
5659 /* a skipped mb needs the aff flag from the following mb */
5660 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5661 predict_field_decoding_flag(h);
5662 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5663 skip = h->next_mb_skipped;
5665 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5666 /* read skip flags */
5668 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5669 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5670 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5671 if(h->next_mb_skipped)
5672 predict_field_decoding_flag(h);
5674 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5679 h->cbp_table[mb_xy] = 0;
5680 h->chroma_pred_mode_table[mb_xy] = 0;
5681 h->last_qscale_diff = 0;
5688 if( (s->mb_y&1) == 0 )
5690 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5692 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5694 h->prev_mb_skipped = 0;
5696 compute_mb_neighbors(h);
5697 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5698 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5702 if( h->slice_type_nos == FF_B_TYPE ) {
5704 partition_count= b_mb_type_info[mb_type].partition_count;
5705 mb_type= b_mb_type_info[mb_type].type;
5708 goto decode_intra_mb;
5710 } else if( h->slice_type_nos == FF_P_TYPE ) {
5712 partition_count= p_mb_type_info[mb_type].partition_count;
5713 mb_type= p_mb_type_info[mb_type].type;
5716 goto decode_intra_mb;
5719 if(h->slice_type == FF_SI_TYPE && mb_type)
5721 assert(h->slice_type_nos == FF_I_TYPE);
5723 partition_count = 0;
5724 cbp= i_mb_type_info[mb_type].cbp;
5725 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5726 mb_type= i_mb_type_info[mb_type].type;
5729 mb_type |= MB_TYPE_INTERLACED;
5731 h->slice_table[ mb_xy ]= h->slice_num;
5733 if(IS_INTRA_PCM(mb_type)) {
5737 // We assume these blocks are very rare so we do not optimize it.
5738 // FIXME The two following lines get the bitstream position in the cabac
5739 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5740 ptr= h->cabac.bytestream;
5741 if(h->cabac.low&0x1) ptr--;
5743 if(h->cabac.low&0x1FF) ptr--;
5746 // The pixels are stored in the same order as levels in h->mb array.
5747 for(y=0; y<16; y++){
5748 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5749 for(x=0; x<16; x++){
5750 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5751 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5755 const int index= 256 + 4*(y&3) + 32*(y>>2);
5757 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5758 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5762 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5764 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5765 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5769 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5771 // All blocks are present
5772 h->cbp_table[mb_xy] = 0x1ef;
5773 h->chroma_pred_mode_table[mb_xy] = 0;
5774 // In deblocking, the quantizer is 0
5775 s->current_picture.qscale_table[mb_xy]= 0;
5776 // All coeffs are present
5777 memset(h->non_zero_count[mb_xy], 16, 16);
5778 s->current_picture.mb_type[mb_xy]= mb_type;
5779 h->last_qscale_diff = 0;
5784 h->ref_count[0] <<= 1;
5785 h->ref_count[1] <<= 1;
5788 fill_caches(h, mb_type, 0);
5790 if( IS_INTRA( mb_type ) ) {
5792 if( IS_INTRA4x4( mb_type ) ) {
5793 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5794 mb_type |= MB_TYPE_8x8DCT;
5795 for( i = 0; i < 16; i+=4 ) {
5796 int pred = pred_intra_mode( h, i );
5797 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5798 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5801 for( i = 0; i < 16; i++ ) {
5802 int pred = pred_intra_mode( h, i );
5803 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5805 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5808 write_back_intra_pred_mode(h);
5809 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5811 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5812 if( h->intra16x16_pred_mode < 0 ) return -1;
5814 h->chroma_pred_mode_table[mb_xy] =
5815 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5817 pred_mode= check_intra_pred_mode( h, pred_mode );
5818 if( pred_mode < 0 ) return -1;
5819 h->chroma_pred_mode= pred_mode;
5820 } else if( partition_count == 4 ) {
5821 int i, j, sub_partition_count[4], list, ref[2][4];
5823 if( h->slice_type_nos == FF_B_TYPE ) {
5824 for( i = 0; i < 4; i++ ) {
5825 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5826 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5827 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5829 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5830 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5831 pred_direct_motion(h, &mb_type);
5832 h->ref_cache[0][scan8[4]] =
5833 h->ref_cache[1][scan8[4]] =
5834 h->ref_cache[0][scan8[12]] =
5835 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5836 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5837 for( i = 0; i < 4; i++ )
5838 if( IS_DIRECT(h->sub_mb_type[i]) )
5839 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5843 for( i = 0; i < 4; i++ ) {
5844 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5845 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5846 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5850 for( list = 0; list < h->list_count; list++ ) {
5851 for( i = 0; i < 4; i++ ) {
5852 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5853 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5854 if( h->ref_count[list] > 1 )
5855 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5861 h->ref_cache[list][ scan8[4*i]+1 ]=
5862 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5867 dct8x8_allowed = get_dct8x8_allowed(h);
5869 for(list=0; list<h->list_count; list++){
5871 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5872 if(IS_DIRECT(h->sub_mb_type[i])){
5873 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5877 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5878 const int sub_mb_type= h->sub_mb_type[i];
5879 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5880 for(j=0; j<sub_partition_count[i]; j++){
5883 const int index= 4*i + block_width*j;
5884 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5885 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5886 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5888 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5889 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5890 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5892 if(IS_SUB_8X8(sub_mb_type)){
5894 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5896 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5899 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5901 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5902 }else if(IS_SUB_8X4(sub_mb_type)){
5903 mv_cache[ 1 ][0]= mx;
5904 mv_cache[ 1 ][1]= my;
5906 mvd_cache[ 1 ][0]= mx - mpx;
5907 mvd_cache[ 1 ][1]= my - mpy;
5908 }else if(IS_SUB_4X8(sub_mb_type)){
5909 mv_cache[ 8 ][0]= mx;
5910 mv_cache[ 8 ][1]= my;
5912 mvd_cache[ 8 ][0]= mx - mpx;
5913 mvd_cache[ 8 ][1]= my - mpy;
5915 mv_cache[ 0 ][0]= mx;
5916 mv_cache[ 0 ][1]= my;
5918 mvd_cache[ 0 ][0]= mx - mpx;
5919 mvd_cache[ 0 ][1]= my - mpy;
5922 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5923 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5924 p[0] = p[1] = p[8] = p[9] = 0;
5925 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5929 } else if( IS_DIRECT(mb_type) ) {
5930 pred_direct_motion(h, &mb_type);
5931 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5932 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5933 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5935 int list, mx, my, i, mpx, mpy;
5936 if(IS_16X16(mb_type)){
5937 for(list=0; list<h->list_count; list++){
5938 if(IS_DIR(mb_type, 0, list)){
5939 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5940 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5942 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5944 for(list=0; list<h->list_count; list++){
5945 if(IS_DIR(mb_type, 0, list)){
5946 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5948 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5949 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5950 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5952 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5953 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5955 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5958 else if(IS_16X8(mb_type)){
5959 for(list=0; list<h->list_count; list++){
5961 if(IS_DIR(mb_type, i, list)){
5962 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5963 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5965 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5968 for(list=0; list<h->list_count; list++){
5970 if(IS_DIR(mb_type, i, list)){
5971 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5972 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5973 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5974 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5976 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5977 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5979 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5980 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5985 assert(IS_8X16(mb_type));
5986 for(list=0; list<h->list_count; list++){
5988 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5989 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5990 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5992 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5995 for(list=0; list<h->list_count; list++){
5997 if(IS_DIR(mb_type, i, list)){
5998 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5999 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6000 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6002 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6003 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6004 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6006 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6007 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6014 if( IS_INTER( mb_type ) ) {
6015 h->chroma_pred_mode_table[mb_xy] = 0;
6016 write_back_motion( h, mb_type );
6019 if( !IS_INTRA16x16( mb_type ) ) {
6020 cbp = decode_cabac_mb_cbp_luma( h );
6021 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6024 h->cbp_table[mb_xy] = h->cbp = cbp;
6026 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6027 if( decode_cabac_mb_transform_size( h ) )
6028 mb_type |= MB_TYPE_8x8DCT;
6030 s->current_picture.mb_type[mb_xy]= mb_type;
6032 if( cbp || IS_INTRA16x16( mb_type ) ) {
6033 const uint8_t *scan, *scan8x8, *dc_scan;
6034 const uint32_t *qmul;
6037 if(IS_INTERLACED(mb_type)){
6038 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6039 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6040 dc_scan= luma_dc_field_scan;
6042 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6043 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6044 dc_scan= luma_dc_zigzag_scan;
6047 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6048 if( dqp == INT_MIN ){
6049 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6053 if(((unsigned)s->qscale) > 51){
6054 if(s->qscale<0) s->qscale+= 52;
6055 else s->qscale-= 52;
6057 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6058 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6060 if( IS_INTRA16x16( mb_type ) ) {
6062 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6063 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6066 qmul = h->dequant4_coeff[0][s->qscale];
6067 for( i = 0; i < 16; i++ ) {
6068 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6069 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6072 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6076 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6077 if( cbp & (1<<i8x8) ) {
6078 if( IS_8x8DCT(mb_type) ) {
6079 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6080 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6082 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6083 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6084 const int index = 4*i8x8 + i4x4;
6085 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6087 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6088 //STOP_TIMER("decode_residual")
6092 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6093 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6100 for( c = 0; c < 2; c++ ) {
6101 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6102 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6108 for( c = 0; c < 2; c++ ) {
6109 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6110 for( i = 0; i < 4; i++ ) {
6111 const int index = 16 + 4 * c + i;
6112 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6113 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6117 uint8_t * const nnz= &h->non_zero_count_cache[0];
6118 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6119 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6122 uint8_t * const nnz= &h->non_zero_count_cache[0];
6123 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6124 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6125 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6126 h->last_qscale_diff = 0;
6129 s->current_picture.qscale_table[mb_xy]= s->qscale;
6130 write_back_non_zero_count(h);
6133 h->ref_count[0] >>= 1;
6134 h->ref_count[1] >>= 1;
6141 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6143 const int index_a = qp + h->slice_alpha_c0_offset;
6144 const int alpha = (alpha_table+52)[index_a];
6145 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6150 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6151 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6153 /* 16px edge length, because bS=4 is triggered by being at
6154 * the edge of an intra MB, so all 4 bS are the same */
6155 for( d = 0; d < 16; d++ ) {
6156 const int p0 = pix[-1];
6157 const int p1 = pix[-2];
6158 const int p2 = pix[-3];
6160 const int q0 = pix[0];
6161 const int q1 = pix[1];
6162 const int q2 = pix[2];
6164 if( FFABS( p0 - q0 ) < alpha &&
6165 FFABS( p1 - p0 ) < beta &&
6166 FFABS( q1 - q0 ) < beta ) {
6168 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6169 if( FFABS( p2 - p0 ) < beta)
6171 const int p3 = pix[-4];
6173 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6174 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6175 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6178 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6180 if( FFABS( q2 - q0 ) < beta)
6182 const int q3 = pix[3];
6184 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6185 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6186 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6189 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6193 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6194 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6196 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6202 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6204 const int index_a = qp + h->slice_alpha_c0_offset;
6205 const int alpha = (alpha_table+52)[index_a];
6206 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6211 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6212 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6214 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6218 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6220 for( i = 0; i < 16; i++, pix += stride) {
6226 int bS_index = (i >> 1);
6229 bS_index |= (i & 1);
6232 if( bS[bS_index] == 0 ) {
6236 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6237 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6238 alpha = (alpha_table+52)[index_a];
6239 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6241 if( bS[bS_index] < 4 ) {
6242 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6243 const int p0 = pix[-1];
6244 const int p1 = pix[-2];
6245 const int p2 = pix[-3];
6246 const int q0 = pix[0];
6247 const int q1 = pix[1];
6248 const int q2 = pix[2];
6250 if( FFABS( p0 - q0 ) < alpha &&
6251 FFABS( p1 - p0 ) < beta &&
6252 FFABS( q1 - q0 ) < beta ) {
6256 if( FFABS( p2 - p0 ) < beta ) {
6257 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6260 if( FFABS( q2 - q0 ) < beta ) {
6261 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6265 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6266 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6267 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6268 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6271 const int p0 = pix[-1];
6272 const int p1 = pix[-2];
6273 const int p2 = pix[-3];
6275 const int q0 = pix[0];
6276 const int q1 = pix[1];
6277 const int q2 = pix[2];
6279 if( FFABS( p0 - q0 ) < alpha &&
6280 FFABS( p1 - p0 ) < beta &&
6281 FFABS( q1 - q0 ) < beta ) {
6283 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6284 if( FFABS( p2 - p0 ) < beta)
6286 const int p3 = pix[-4];
6288 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6289 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6290 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6293 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6295 if( FFABS( q2 - q0 ) < beta)
6297 const int q3 = pix[3];
6299 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6300 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6301 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6304 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6308 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6309 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6311 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6316 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6318 for( i = 0; i < 8; i++, pix += stride) {
6326 if( bS[bS_index] == 0 ) {
6330 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6331 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6332 alpha = (alpha_table+52)[index_a];
6333 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6335 if( bS[bS_index] < 4 ) {
6336 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6337 const int p0 = pix[-1];
6338 const int p1 = pix[-2];
6339 const int q0 = pix[0];
6340 const int q1 = pix[1];
6342 if( FFABS( p0 - q0 ) < alpha &&
6343 FFABS( p1 - p0 ) < beta &&
6344 FFABS( q1 - q0 ) < beta ) {
6345 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6347 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6348 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6349 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6352 const int p0 = pix[-1];
6353 const int p1 = pix[-2];
6354 const int q0 = pix[0];
6355 const int q1 = pix[1];
6357 if( FFABS( p0 - q0 ) < alpha &&
6358 FFABS( p1 - p0 ) < beta &&
6359 FFABS( q1 - q0 ) < beta ) {
6361 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6362 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6363 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6369 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6371 const int index_a = qp + h->slice_alpha_c0_offset;
6372 const int alpha = (alpha_table+52)[index_a];
6373 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6374 const int pix_next = stride;
6379 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6380 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6382 /* 16px edge length, see filter_mb_edgev */
6383 for( d = 0; d < 16; d++ ) {
6384 const int p0 = pix[-1*pix_next];
6385 const int p1 = pix[-2*pix_next];
6386 const int p2 = pix[-3*pix_next];
6387 const int q0 = pix[0];
6388 const int q1 = pix[1*pix_next];
6389 const int q2 = pix[2*pix_next];
6391 if( FFABS( p0 - q0 ) < alpha &&
6392 FFABS( p1 - p0 ) < beta &&
6393 FFABS( q1 - q0 ) < beta ) {
6395 const int p3 = pix[-4*pix_next];
6396 const int q3 = pix[ 3*pix_next];
6398 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6399 if( FFABS( p2 - p0 ) < beta) {
6401 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6402 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6403 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6406 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6408 if( FFABS( q2 - q0 ) < beta) {
6410 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6411 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6412 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6415 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6419 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6420 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6422 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6429 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6431 const int index_a = qp + h->slice_alpha_c0_offset;
6432 const int alpha = (alpha_table+52)[index_a];
6433 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6438 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6439 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6441 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6445 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6446 MpegEncContext * const s = &h->s;
6447 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6449 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6453 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6455 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6456 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6457 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6460 assert(!FRAME_MBAFF);
6462 mb_type = s->current_picture.mb_type[mb_xy];
6463 qp = s->current_picture.qscale_table[mb_xy];
6464 qp0 = s->current_picture.qscale_table[mb_xy-1];
6465 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6466 qpc = get_chroma_qp( h, 0, qp );
6467 qpc0 = get_chroma_qp( h, 0, qp0 );
6468 qpc1 = get_chroma_qp( h, 0, qp1 );
6469 qp0 = (qp + qp0 + 1) >> 1;
6470 qp1 = (qp + qp1 + 1) >> 1;
6471 qpc0 = (qpc + qpc0 + 1) >> 1;
6472 qpc1 = (qpc + qpc1 + 1) >> 1;
6473 qp_thresh = 15 - h->slice_alpha_c0_offset;
6474 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6475 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6478 if( IS_INTRA(mb_type) ) {
6479 int16_t bS4[4] = {4,4,4,4};
6480 int16_t bS3[4] = {3,3,3,3};
6481 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6482 if( IS_8x8DCT(mb_type) ) {
6483 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6484 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6485 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6486 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6488 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6489 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6490 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6491 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6492 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6493 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6494 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6495 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6497 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6498 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6499 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6500 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6501 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6502 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6503 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6504 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6507 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6508 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6510 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6512 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6514 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6515 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6516 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6517 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6519 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6520 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6521 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6522 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6524 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6525 bSv[0][0] = 0x0004000400040004ULL;
6526 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6527 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6529 #define FILTER(hv,dir,edge)\
6530 if(bSv[dir][edge]) {\
6531 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6533 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6534 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6540 } else if( IS_8x8DCT(mb_type) ) {
6559 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6560 MpegEncContext * const s = &h->s;
6561 const int mb_xy= mb_x + mb_y*s->mb_stride;
6562 const int mb_type = s->current_picture.mb_type[mb_xy];
6563 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6564 int first_vertical_edge_done = 0;
6567 //for sufficiently low qp, filtering wouldn't do anything
6568 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6570 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6571 int qp = s->current_picture.qscale_table[mb_xy];
6573 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6574 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6580 // left mb is in picture
6581 && h->slice_table[mb_xy-1] != 255
6582 // and current and left pair do not have the same interlaced type
6583 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6584 // and left mb is in the same slice if deblocking_filter == 2
6585 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6586 /* First vertical edge is different in MBAFF frames
6587 * There are 8 different bS to compute and 2 different Qp
6589 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6590 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6595 int mb_qp, mbn0_qp, mbn1_qp;
6597 first_vertical_edge_done = 1;
6599 if( IS_INTRA(mb_type) )
6600 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6602 for( i = 0; i < 8; i++ ) {
6603 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6605 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6607 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6608 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6609 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6616 mb_qp = s->current_picture.qscale_table[mb_xy];
6617 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6618 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6619 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6620 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6621 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6622 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6623 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6624 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6625 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6626 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6627 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6628 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6631 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6632 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6633 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6634 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6635 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6637 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6638 for( dir = 0; dir < 2; dir++ )
6641 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6642 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6643 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6644 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6645 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6647 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6648 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6649 // how often to recheck mv-based bS when iterating between edges
6650 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6651 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6652 // how often to recheck mv-based bS when iterating along each edge
6653 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6655 if (first_vertical_edge_done) {
6657 first_vertical_edge_done = 0;
6660 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6663 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6664 && !IS_INTERLACED(mb_type)
6665 && IS_INTERLACED(mbm_type)
6667 // This is a special case in the norm where the filtering must
6668 // be done twice (one each of the field) even if we are in a
6669 // frame macroblock.
6671 static const int nnz_idx[4] = {4,5,6,3};
6672 unsigned int tmp_linesize = 2 * linesize;
6673 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6674 int mbn_xy = mb_xy - 2 * s->mb_stride;
6679 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6680 if( IS_INTRA(mb_type) ||
6681 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6682 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6684 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6685 for( i = 0; i < 4; i++ ) {
6686 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6687 mbn_nnz[nnz_idx[i]] != 0 )
6693 // Do not use s->qscale as luma quantizer because it has not the same
6694 // value in IPCM macroblocks.
6695 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6696 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6697 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6698 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6699 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6700 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6701 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6702 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6709 for( edge = start; edge < edges; edge++ ) {
6710 /* mbn_xy: neighbor macroblock */
6711 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6712 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6713 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6717 if( (edge&1) && IS_8x8DCT(mb_type) )
6720 if( IS_INTRA(mb_type) ||
6721 IS_INTRA(mbn_type) ) {
6724 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6725 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6734 bS[0] = bS[1] = bS[2] = bS[3] = value;
6739 if( edge & mask_edge ) {
6740 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6743 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6744 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6747 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6748 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6749 int bn_idx= b_idx - (dir ? 8:1);
6752 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6753 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6754 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6755 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6758 if(h->slice_type_nos == FF_B_TYPE && v){
6760 for( l = 0; !v && l < 2; l++ ) {
6762 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6763 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6764 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6768 bS[0] = bS[1] = bS[2] = bS[3] = v;
6774 for( i = 0; i < 4; i++ ) {
6775 int x = dir == 0 ? edge : i;
6776 int y = dir == 0 ? i : edge;
6777 int b_idx= 8 + 4 + x + 8*y;
6778 int bn_idx= b_idx - (dir ? 8:1);
6780 if( h->non_zero_count_cache[b_idx] != 0 ||
6781 h->non_zero_count_cache[bn_idx] != 0 ) {
6787 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6788 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6789 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6790 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6796 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6798 for( l = 0; l < 2; l++ ) {
6800 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6801 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6802 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6811 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6816 // Do not use s->qscale as luma quantizer because it has not the same
6817 // value in IPCM macroblocks.
6818 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6819 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6820 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6821 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6823 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6824 if( (edge&1) == 0 ) {
6825 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6826 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6827 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6828 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6831 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6832 if( (edge&1) == 0 ) {
6833 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6834 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6835 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6836 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6843 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6844 MpegEncContext * const s = &h->s;
6845 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6849 if( h->pps.cabac ) {
6853 align_get_bits( &s->gb );
6856 ff_init_cabac_states( &h->cabac);
6857 ff_init_cabac_decoder( &h->cabac,
6858 s->gb.buffer + get_bits_count(&s->gb)/8,
6859 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6860 /* calculate pre-state */
6861 for( i= 0; i < 460; i++ ) {
6863 if( h->slice_type_nos == FF_I_TYPE )
6864 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6866 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6869 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6871 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6876 int ret = decode_mb_cabac(h);
6878 //STOP_TIMER("decode_mb_cabac")
6880 if(ret>=0) hl_decode_mb(h);
6882 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6885 if(ret>=0) ret = decode_mb_cabac(h);
6887 if(ret>=0) hl_decode_mb(h);
6890 eos = get_cabac_terminate( &h->cabac );
6892 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6893 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6894 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6898 if( ++s->mb_x >= s->mb_width ) {
6900 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6902 if(FIELD_OR_MBAFF_PICTURE) {
6907 if( eos || s->mb_y >= s->mb_height ) {
6908 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6909 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6916 int ret = decode_mb_cavlc(h);
6918 if(ret>=0) hl_decode_mb(h);
6920 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6922 ret = decode_mb_cavlc(h);
6924 if(ret>=0) hl_decode_mb(h);
6929 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6930 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6935 if(++s->mb_x >= s->mb_width){
6937 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6939 if(FIELD_OR_MBAFF_PICTURE) {
6942 if(s->mb_y >= s->mb_height){
6943 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6945 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6946 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6950 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6957 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6958 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6959 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6960 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6964 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6973 for(;s->mb_y < s->mb_height; s->mb_y++){
6974 for(;s->mb_x < s->mb_width; s->mb_x++){
6975 int ret= decode_mb(h);
6980 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6981 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6986 if(++s->mb_x >= s->mb_width){
6988 if(++s->mb_y >= s->mb_height){
6989 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6990 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6994 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7001 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7002 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7003 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7007 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7014 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7017 return -1; //not reached
7020 static int decode_unregistered_user_data(H264Context *h, int size){
7021 MpegEncContext * const s = &h->s;
7022 uint8_t user_data[16+256];
7028 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7029 user_data[i]= get_bits(&s->gb, 8);
7033 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7034 if(e==1 && build>=0)
7035 h->x264_build= build;
7037 if(s->avctx->debug & FF_DEBUG_BUGS)
7038 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7041 skip_bits(&s->gb, 8);
7046 static int decode_sei(H264Context *h){
7047 MpegEncContext * const s = &h->s;
7049 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7054 type+= show_bits(&s->gb, 8);
7055 }while(get_bits(&s->gb, 8) == 255);
7059 size+= show_bits(&s->gb, 8);
7060 }while(get_bits(&s->gb, 8) == 255);
7064 if(decode_unregistered_user_data(h, size) < 0)
7068 skip_bits(&s->gb, 8*size);
7071 //FIXME check bits here
7072 align_get_bits(&s->gb);
7078 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7079 MpegEncContext * const s = &h->s;
7081 cpb_count = get_ue_golomb(&s->gb) + 1;
7082 get_bits(&s->gb, 4); /* bit_rate_scale */
7083 get_bits(&s->gb, 4); /* cpb_size_scale */
7084 for(i=0; i<cpb_count; i++){
7085 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7086 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7087 get_bits1(&s->gb); /* cbr_flag */
7089 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7090 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7091 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7092 get_bits(&s->gb, 5); /* time_offset_length */
7095 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7096 MpegEncContext * const s = &h->s;
7097 int aspect_ratio_info_present_flag;
7098 unsigned int aspect_ratio_idc;
7099 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7101 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7103 if( aspect_ratio_info_present_flag ) {
7104 aspect_ratio_idc= get_bits(&s->gb, 8);
7105 if( aspect_ratio_idc == EXTENDED_SAR ) {
7106 sps->sar.num= get_bits(&s->gb, 16);
7107 sps->sar.den= get_bits(&s->gb, 16);
7108 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7109 sps->sar= pixel_aspect[aspect_ratio_idc];
7111 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7118 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7120 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7121 get_bits1(&s->gb); /* overscan_appropriate_flag */
7124 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7125 get_bits(&s->gb, 3); /* video_format */
7126 get_bits1(&s->gb); /* video_full_range_flag */
7127 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7128 get_bits(&s->gb, 8); /* colour_primaries */
7129 get_bits(&s->gb, 8); /* transfer_characteristics */
7130 get_bits(&s->gb, 8); /* matrix_coefficients */
7134 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7135 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7136 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7139 sps->timing_info_present_flag = get_bits1(&s->gb);
7140 if(sps->timing_info_present_flag){
7141 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7142 sps->time_scale = get_bits_long(&s->gb, 32);
7143 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7146 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7147 if(nal_hrd_parameters_present_flag)
7148 decode_hrd_parameters(h, sps);
7149 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7150 if(vcl_hrd_parameters_present_flag)
7151 decode_hrd_parameters(h, sps);
7152 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7153 get_bits1(&s->gb); /* low_delay_hrd_flag */
7154 get_bits1(&s->gb); /* pic_struct_present_flag */
7156 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7157 if(sps->bitstream_restriction_flag){
7158 unsigned int num_reorder_frames;
7159 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7160 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7161 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7162 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7163 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7164 num_reorder_frames= get_ue_golomb(&s->gb);
7165 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7167 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7168 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7172 sps->num_reorder_frames= num_reorder_frames;
7178 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7179 const uint8_t *jvt_list, const uint8_t *fallback_list){
7180 MpegEncContext * const s = &h->s;
7181 int i, last = 8, next = 8;
7182 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7183 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7184 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7186 for(i=0;i<size;i++){
7188 next = (last + get_se_golomb(&s->gb)) & 0xff;
7189 if(!i && !next){ /* matrix not written, we use the preset one */
7190 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7193 last = factors[scan[i]] = next ? next : last;
7197 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7198 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7199 MpegEncContext * const s = &h->s;
7200 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7201 const uint8_t *fallback[4] = {
7202 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7203 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7204 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7205 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7207 if(get_bits1(&s->gb)){
7208 sps->scaling_matrix_present |= is_sps;
7209 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7210 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7211 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7212 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7213 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7214 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7215 if(is_sps || pps->transform_8x8_mode){
7216 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7217 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7219 } else if(fallback_sps) {
7220 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7221 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7226 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7229 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7230 const size_t size, const char *name)
7233 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7238 vec[id] = av_mallocz(size);
7240 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7245 static inline int decode_seq_parameter_set(H264Context *h){
7246 MpegEncContext * const s = &h->s;
7247 int profile_idc, level_idc;
7248 unsigned int sps_id, tmp, mb_width, mb_height;
7252 profile_idc= get_bits(&s->gb, 8);
7253 get_bits1(&s->gb); //constraint_set0_flag
7254 get_bits1(&s->gb); //constraint_set1_flag
7255 get_bits1(&s->gb); //constraint_set2_flag
7256 get_bits1(&s->gb); //constraint_set3_flag
7257 get_bits(&s->gb, 4); // reserved
7258 level_idc= get_bits(&s->gb, 8);
7259 sps_id= get_ue_golomb(&s->gb);
7261 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7265 sps->profile_idc= profile_idc;
7266 sps->level_idc= level_idc;
7268 if(sps->profile_idc >= 100){ //high profile
7269 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7270 get_bits1(&s->gb); //residual_color_transform_flag
7271 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7272 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7273 sps->transform_bypass = get_bits1(&s->gb);
7274 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7276 sps->scaling_matrix_present = 0;
7278 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7279 sps->poc_type= get_ue_golomb(&s->gb);
7281 if(sps->poc_type == 0){ //FIXME #define
7282 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7283 } else if(sps->poc_type == 1){//FIXME #define
7284 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7285 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7286 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7287 tmp= get_ue_golomb(&s->gb);
7289 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7290 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7293 sps->poc_cycle_length= tmp;
7295 for(i=0; i<sps->poc_cycle_length; i++)
7296 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7297 }else if(sps->poc_type != 2){
7298 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7302 tmp= get_ue_golomb(&s->gb);
7303 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7304 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7307 sps->ref_frame_count= tmp;
7308 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7309 mb_width= get_ue_golomb(&s->gb) + 1;
7310 mb_height= get_ue_golomb(&s->gb) + 1;
7311 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7312 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7313 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7316 sps->mb_width = mb_width;
7317 sps->mb_height= mb_height;
7319 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7320 if(!sps->frame_mbs_only_flag)
7321 sps->mb_aff= get_bits1(&s->gb);
7325 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7327 #ifndef ALLOW_INTERLACE
7329 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7331 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7332 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7334 sps->crop= get_bits1(&s->gb);
7336 sps->crop_left = get_ue_golomb(&s->gb);
7337 sps->crop_right = get_ue_golomb(&s->gb);
7338 sps->crop_top = get_ue_golomb(&s->gb);
7339 sps->crop_bottom= get_ue_golomb(&s->gb);
7340 if(sps->crop_left || sps->crop_top){
7341 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7343 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7344 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7350 sps->crop_bottom= 0;
7353 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7354 if( sps->vui_parameters_present_flag )
7355 decode_vui_parameters(h, sps);
7357 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7358 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7359 sps_id, sps->profile_idc, sps->level_idc,
7361 sps->ref_frame_count,
7362 sps->mb_width, sps->mb_height,
7363 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7364 sps->direct_8x8_inference_flag ? "8B8" : "",
7365 sps->crop_left, sps->crop_right,
7366 sps->crop_top, sps->crop_bottom,
7367 sps->vui_parameters_present_flag ? "VUI" : ""
7374 build_qp_table(PPS *pps, int t, int index)
7377 for(i = 0; i < 52; i++)
7378 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7381 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7382 MpegEncContext * const s = &h->s;
7383 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7386 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7390 tmp= get_ue_golomb(&s->gb);
7391 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7392 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7397 pps->cabac= get_bits1(&s->gb);
7398 pps->pic_order_present= get_bits1(&s->gb);
7399 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7400 if(pps->slice_group_count > 1 ){
7401 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7402 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7403 switch(pps->mb_slice_group_map_type){
7406 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7407 | run_length[ i ] |1 |ue(v) |
7412 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7414 | top_left_mb[ i ] |1 |ue(v) |
7415 | bottom_right_mb[ i ] |1 |ue(v) |
7423 | slice_group_change_direction_flag |1 |u(1) |
7424 | slice_group_change_rate_minus1 |1 |ue(v) |
7429 | slice_group_id_cnt_minus1 |1 |ue(v) |
7430 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7432 | slice_group_id[ i ] |1 |u(v) |
7437 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7438 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7439 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7440 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7441 pps->ref_count[0]= pps->ref_count[1]= 1;
7445 pps->weighted_pred= get_bits1(&s->gb);
7446 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7447 pps->init_qp= get_se_golomb(&s->gb) + 26;
7448 pps->init_qs= get_se_golomb(&s->gb) + 26;
7449 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7450 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7451 pps->constrained_intra_pred= get_bits1(&s->gb);
7452 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7454 pps->transform_8x8_mode= 0;
7455 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7456 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7457 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7459 if(get_bits_count(&s->gb) < bit_length){
7460 pps->transform_8x8_mode= get_bits1(&s->gb);
7461 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7462 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7464 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7467 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7468 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7469 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7470 h->pps.chroma_qp_diff= 1;
7472 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7473 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7474 pps_id, pps->sps_id,
7475 pps->cabac ? "CABAC" : "CAVLC",
7476 pps->slice_group_count,
7477 pps->ref_count[0], pps->ref_count[1],
7478 pps->weighted_pred ? "weighted" : "",
7479 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7480 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7481 pps->constrained_intra_pred ? "CONSTR" : "",
7482 pps->redundant_pic_cnt_present ? "REDU" : "",
7483 pps->transform_8x8_mode ? "8x8DCT" : ""
7491 * Call decode_slice() for each context.
7493 * @param h h264 master context
7494 * @param context_count number of contexts to execute
7496 static void execute_decode_slices(H264Context *h, int context_count){
7497 MpegEncContext * const s = &h->s;
7498 AVCodecContext * const avctx= s->avctx;
7502 if(context_count == 1) {
7503 decode_slice(avctx, h);
7505 for(i = 1; i < context_count; i++) {
7506 hx = h->thread_context[i];
7507 hx->s.error_resilience = avctx->error_resilience;
7508 hx->s.error_count = 0;
7511 avctx->execute(avctx, (void *)decode_slice,
7512 (void **)h->thread_context, NULL, context_count);
7514 /* pull back stuff from slices to master context */
7515 hx = h->thread_context[context_count - 1];
7516 s->mb_x = hx->s.mb_x;
7517 s->mb_y = hx->s.mb_y;
7518 s->dropable = hx->s.dropable;
7519 s->picture_structure = hx->s.picture_structure;
7520 for(i = 1; i < context_count; i++)
7521 h->s.error_count += h->thread_context[i]->s.error_count;
7526 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7527 MpegEncContext * const s = &h->s;
7528 AVCodecContext * const avctx= s->avctx;
7530 H264Context *hx; ///< thread context
7531 int context_count = 0;
7533 h->max_contexts = avctx->thread_count;
7536 for(i=0; i<50; i++){
7537 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7540 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7541 h->current_slice = 0;
7542 if (!s->first_field)
7543 s->current_picture_ptr= NULL;
7555 if(buf_index >= buf_size) break;
7557 for(i = 0; i < h->nal_length_size; i++)
7558 nalsize = (nalsize << 8) | buf[buf_index++];
7559 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7564 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7569 // start code prefix search
7570 for(; buf_index + 3 < buf_size; buf_index++){
7571 // This should always succeed in the first iteration.
7572 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7576 if(buf_index+3 >= buf_size) break;
7581 hx = h->thread_context[context_count];
7583 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7584 if (ptr==NULL || dst_length < 0){
7587 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7589 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7591 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7592 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7595 if (h->is_avc && (nalsize != consumed)){
7596 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7600 buf_index += consumed;
7602 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7603 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7608 switch(hx->nal_unit_type){
7610 if (h->nal_unit_type != NAL_IDR_SLICE) {
7611 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7614 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7616 init_get_bits(&hx->s.gb, ptr, bit_length);
7618 hx->inter_gb_ptr= &hx->s.gb;
7619 hx->s.data_partitioning = 0;
7621 if((err = decode_slice_header(hx, h)))
7624 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7625 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7626 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7627 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7628 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7629 && avctx->skip_frame < AVDISCARD_ALL)
7633 init_get_bits(&hx->s.gb, ptr, bit_length);
7635 hx->inter_gb_ptr= NULL;
7636 hx->s.data_partitioning = 1;
7638 err = decode_slice_header(hx, h);
7641 init_get_bits(&hx->intra_gb, ptr, bit_length);
7642 hx->intra_gb_ptr= &hx->intra_gb;
7645 init_get_bits(&hx->inter_gb, ptr, bit_length);
7646 hx->inter_gb_ptr= &hx->inter_gb;
7648 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7649 && s->context_initialized
7651 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7652 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7653 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7654 && avctx->skip_frame < AVDISCARD_ALL)
7658 init_get_bits(&s->gb, ptr, bit_length);
7662 init_get_bits(&s->gb, ptr, bit_length);
7663 decode_seq_parameter_set(h);
7665 if(s->flags& CODEC_FLAG_LOW_DELAY)
7668 if(avctx->has_b_frames < 2)
7669 avctx->has_b_frames= !s->low_delay;
7672 init_get_bits(&s->gb, ptr, bit_length);
7674 decode_picture_parameter_set(h, bit_length);
7678 case NAL_END_SEQUENCE:
7679 case NAL_END_STREAM:
7680 case NAL_FILLER_DATA:
7682 case NAL_AUXILIARY_SLICE:
7685 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7688 if(context_count == h->max_contexts) {
7689 execute_decode_slices(h, context_count);
7694 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7696 /* Slice could not be decoded in parallel mode, copy down
7697 * NAL unit stuff to context 0 and restart. Note that
7698 * rbsp_buffer is not transfered, but since we no longer
7699 * run in parallel mode this should not be an issue. */
7700 h->nal_unit_type = hx->nal_unit_type;
7701 h->nal_ref_idc = hx->nal_ref_idc;
7707 execute_decode_slices(h, context_count);
7712 * returns the number of bytes consumed for building the current frame
7714 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7715 if(s->flags&CODEC_FLAG_TRUNCATED){
7716 pos -= s->parse_context.last_index;
7717 if(pos<0) pos=0; // FIXME remove (unneeded?)
7721 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7722 if(pos+10>buf_size) pos=buf_size; // oops ;)
7728 static int decode_frame(AVCodecContext *avctx,
7729 void *data, int *data_size,
7730 const uint8_t *buf, int buf_size)
7732 H264Context *h = avctx->priv_data;
7733 MpegEncContext *s = &h->s;
7734 AVFrame *pict = data;
7737 s->flags= avctx->flags;
7738 s->flags2= avctx->flags2;
7740 if(s->flags&CODEC_FLAG_TRUNCATED){
7741 const int next= ff_h264_find_frame_end(h, buf, buf_size);
7742 assert((buf_size > 0) || (next == END_NOT_FOUND));
7744 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7746 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7749 /* no supplementary picture */
7750 if (buf_size == 0) {
7754 //FIXME factorize this with the output code below
7755 out = h->delayed_pic[0];
7757 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7758 if(h->delayed_pic[i]->poc < out->poc){
7759 out = h->delayed_pic[i];
7763 for(i=out_idx; h->delayed_pic[i]; i++)
7764 h->delayed_pic[i] = h->delayed_pic[i+1];
7767 *data_size = sizeof(AVFrame);
7768 *pict= *(AVFrame*)out;
7774 if(h->is_avc && !h->got_avcC) {
7775 int i, cnt, nalsize;
7776 unsigned char *p = avctx->extradata;
7777 if(avctx->extradata_size < 7) {
7778 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7782 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7785 /* sps and pps in the avcC always have length coded with 2 bytes,
7786 so put a fake nal_length_size = 2 while parsing them */
7787 h->nal_length_size = 2;
7788 // Decode sps from avcC
7789 cnt = *(p+5) & 0x1f; // Number of sps
7791 for (i = 0; i < cnt; i++) {
7792 nalsize = AV_RB16(p) + 2;
7793 if(decode_nal_units(h, p, nalsize) < 0) {
7794 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7799 // Decode pps from avcC
7800 cnt = *(p++); // Number of pps
7801 for (i = 0; i < cnt; i++) {
7802 nalsize = AV_RB16(p) + 2;
7803 if(decode_nal_units(h, p, nalsize) != nalsize) {
7804 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7809 // Now store right nal length size, that will be use to parse all other nals
7810 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7811 // Do not reparse avcC
7815 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7816 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7820 buf_index=decode_nal_units(h, buf, buf_size);
7824 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7825 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7826 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7830 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7831 Picture *out = s->current_picture_ptr;
7832 Picture *cur = s->current_picture_ptr;
7833 int i, pics, cross_idr, out_of_order, out_idx;
7837 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7838 s->current_picture_ptr->pict_type= s->pict_type;
7841 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7842 h->prev_poc_msb= h->poc_msb;
7843 h->prev_poc_lsb= h->poc_lsb;
7845 h->prev_frame_num_offset= h->frame_num_offset;
7846 h->prev_frame_num= h->frame_num;
7849 * FIXME: Error handling code does not seem to support interlaced
7850 * when slices span multiple rows
7851 * The ff_er_add_slice calls don't work right for bottom
7852 * fields; they cause massive erroneous error concealing
7853 * Error marking covers both fields (top and bottom).
7854 * This causes a mismatched s->error_count
7855 * and a bad error table. Further, the error count goes to
7856 * INT_MAX when called for bottom field, because mb_y is
7857 * past end by one (callers fault) and resync_mb_y != 0
7858 * causes problems for the first MB line, too.
7865 if (s->first_field) {
7866 /* Wait for second field. */
7870 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7871 /* Derive top_field_first from field pocs. */
7872 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7874 //FIXME do something with unavailable reference frames
7876 /* Sort B-frames into display order */
7878 if(h->sps.bitstream_restriction_flag
7879 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7880 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7884 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7885 && !h->sps.bitstream_restriction_flag){
7886 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7891 while(h->delayed_pic[pics]) pics++;
7893 assert(pics <= MAX_DELAYED_PIC_COUNT);
7895 h->delayed_pic[pics++] = cur;
7896 if(cur->reference == 0)
7897 cur->reference = DELAYED_PIC_REF;
7900 for(i=0; h->delayed_pic[i]; i++)
7901 if(h->delayed_pic[i]->poc==0)
7904 out = h->delayed_pic[0];
7906 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7907 if(h->delayed_pic[i]->poc < out->poc){
7908 out = h->delayed_pic[i];
7912 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7914 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7916 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7918 ((!cross_idr && out->poc > h->outputed_poc + 2)
7919 || cur->pict_type == FF_B_TYPE)))
7922 s->avctx->has_b_frames++;
7925 if(out_of_order || pics > s->avctx->has_b_frames){
7926 out->reference &= ~DELAYED_PIC_REF;
7927 for(i=out_idx; h->delayed_pic[i]; i++)
7928 h->delayed_pic[i] = h->delayed_pic[i+1];
7930 if(!out_of_order && pics > s->avctx->has_b_frames){
7931 *data_size = sizeof(AVFrame);
7933 h->outputed_poc = out->poc;
7934 *pict= *(AVFrame*)out;
7936 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7941 assert(pict->data[0] || !*data_size);
7942 ff_print_debug_info(s, pict);
7943 //printf("out %d\n", (int)pict->data[0]);
7946 /* Return the Picture timestamp as the frame number */
7947 /* we subtract 1 because it is added on utils.c */
7948 avctx->frame_number = s->picture_number - 1;
7950 return get_consumed_bytes(s, buf_index, buf_size);
7953 static inline void fill_mb_avail(H264Context *h){
7954 MpegEncContext * const s = &h->s;
7955 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7958 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7959 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7960 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7966 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7967 h->mb_avail[4]= 1; //FIXME move out
7968 h->mb_avail[5]= 0; //FIXME move out
7976 #define SIZE (COUNT*40)
7982 // int int_temp[10000];
7984 AVCodecContext avctx;
7986 dsputil_init(&dsp, &avctx);
7988 init_put_bits(&pb, temp, SIZE);
7989 printf("testing unsigned exp golomb\n");
7990 for(i=0; i<COUNT; i++){
7992 set_ue_golomb(&pb, i);
7993 STOP_TIMER("set_ue_golomb");
7995 flush_put_bits(&pb);
7997 init_get_bits(&gb, temp, 8*SIZE);
7998 for(i=0; i<COUNT; i++){
8001 s= show_bits(&gb, 24);
8004 j= get_ue_golomb(&gb);
8006 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8009 STOP_TIMER("get_ue_golomb");
8013 init_put_bits(&pb, temp, SIZE);
8014 printf("testing signed exp golomb\n");
8015 for(i=0; i<COUNT; i++){
8017 set_se_golomb(&pb, i - COUNT/2);
8018 STOP_TIMER("set_se_golomb");
8020 flush_put_bits(&pb);
8022 init_get_bits(&gb, temp, 8*SIZE);
8023 for(i=0; i<COUNT; i++){
8026 s= show_bits(&gb, 24);
8029 j= get_se_golomb(&gb);
8030 if(j != i - COUNT/2){
8031 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8034 STOP_TIMER("get_se_golomb");
8038 printf("testing 4x4 (I)DCT\n");
8041 uint8_t src[16], ref[16];
8042 uint64_t error= 0, max_error=0;
8044 for(i=0; i<COUNT; i++){
8046 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8047 for(j=0; j<16; j++){
8048 ref[j]= random()%255;
8049 src[j]= random()%255;
8052 h264_diff_dct_c(block, src, ref, 4);
8055 for(j=0; j<16; j++){
8056 // printf("%d ", block[j]);
8057 block[j]= block[j]*4;
8058 if(j&1) block[j]= (block[j]*4 + 2)/5;
8059 if(j&4) block[j]= (block[j]*4 + 2)/5;
8063 s->dsp.h264_idct_add(ref, block, 4);
8064 /* for(j=0; j<16; j++){
8065 printf("%d ", ref[j]);
8069 for(j=0; j<16; j++){
8070 int diff= FFABS(src[j] - ref[j]);
8073 max_error= FFMAX(max_error, diff);
8076 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8077 printf("testing quantizer\n");
8078 for(qp=0; qp<52; qp++){
8080 src1_block[i]= src2_block[i]= random()%255;
8083 printf("Testing NAL layer\n");
8085 uint8_t bitstream[COUNT];
8086 uint8_t nal[COUNT*2];
8088 memset(&h, 0, sizeof(H264Context));
8090 for(i=0; i<COUNT; i++){
8098 for(j=0; j<COUNT; j++){
8099 bitstream[j]= (random() % 255) + 1;
8102 for(j=0; j<zeros; j++){
8103 int pos= random() % COUNT;
8104 while(bitstream[pos] == 0){
8113 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8115 printf("encoding failed\n");
8119 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8123 if(out_length != COUNT){
8124 printf("incorrect length %d %d\n", out_length, COUNT);
8128 if(consumed != nal_length){
8129 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8133 if(memcmp(bitstream, out, COUNT)){
8134 printf("mismatch\n");
8140 printf("Testing RBSP\n");
8148 static av_cold int decode_end(AVCodecContext *avctx)
8150 H264Context *h = avctx->priv_data;
8151 MpegEncContext *s = &h->s;
8153 av_freep(&h->rbsp_buffer[0]);
8154 av_freep(&h->rbsp_buffer[1]);
8155 free_tables(h); //FIXME cleanup init stuff perhaps
8158 // memset(h, 0, sizeof(H264Context));
8164 AVCodec h264_decoder = {
8168 sizeof(H264Context),
8173 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8175 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),