2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 MpegEncContext * const s = &h->s;
554 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
556 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
557 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
558 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
559 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
560 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
561 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
562 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
566 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
568 static inline int check_intra4x4_pred_mode(H264Context *h){
569 MpegEncContext * const s = &h->s;
570 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
571 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 if(!(h->top_samples_available&0x8000)){
576 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
578 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
581 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
586 if(!(h->left_samples_available&0x8000)){
588 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
590 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
593 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
599 } //FIXME cleanup like next
602 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
604 static inline int check_intra_pred_mode(H264Context *h, int mode){
605 MpegEncContext * const s = &h->s;
606 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
607 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
610 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
614 if(!(h->top_samples_available&0x8000)){
617 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
622 if(!(h->left_samples_available&0x8000)){
625 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 * gets the predicted intra4x4 prediction mode.
636 static inline int pred_intra_mode(H264Context *h, int n){
637 const int index8= scan8[n];
638 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
639 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
640 const int min= FFMIN(left, top);
642 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
644 if(min<0) return DC_PRED;
648 static inline void write_back_non_zero_count(H264Context *h){
649 MpegEncContext * const s = &h->s;
650 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
652 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
653 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
654 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
655 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
656 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
657 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
658 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
660 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
661 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
662 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
664 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
665 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
666 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
669 // store all luma nnzs, for deblocking
672 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
673 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
678 * gets the predicted number of non zero coefficients.
679 * @param n block index
681 static inline int pred_non_zero_count(H264Context *h, int n){
682 const int index8= scan8[n];
683 const int left= h->non_zero_count_cache[index8 - 1];
684 const int top = h->non_zero_count_cache[index8 - 8];
687 if(i<64) i= (i+1)>>1;
689 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
694 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
695 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
696 MpegEncContext *s = &h->s;
698 /* there is no consistent mapping of mvs to neighboring locations that will
699 * make mbaff happy, so we can't move all this logic to fill_caches */
701 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
703 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
704 *C = h->mv_cache[list][scan8[0]-2];
707 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
708 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
709 if(IS_INTERLACED(mb_types[topright_xy])){
710 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
711 const int x4 = X4, y4 = Y4;\
712 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
713 if(!USES_LIST(mb_type,list))\
714 return LIST_NOT_USED;\
715 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
716 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
717 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
718 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
720 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
723 if(topright_ref == PART_NOT_AVAILABLE
724 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
725 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
727 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
728 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
731 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
733 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
734 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
740 if(topright_ref != PART_NOT_AVAILABLE){
741 *C= h->mv_cache[list][ i - 8 + part_width ];
744 tprintf(s->avctx, "topright MV not available\n");
746 *C= h->mv_cache[list][ i - 8 - 1 ];
747 return h->ref_cache[list][ i - 8 - 1 ];
752 * gets the predicted MV.
753 * @param n the block index
754 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
755 * @param mx the x component of the predicted motion vector
756 * @param my the y component of the predicted motion vector
758 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
759 const int index8= scan8[n];
760 const int top_ref= h->ref_cache[list][ index8 - 8 ];
761 const int left_ref= h->ref_cache[list][ index8 - 1 ];
762 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
763 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
765 int diagonal_ref, match_count;
767 assert(part_width==1 || part_width==2 || part_width==4);
777 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
778 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
779 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
780 if(match_count > 1){ //most common
781 *mx= mid_pred(A[0], B[0], C[0]);
782 *my= mid_pred(A[1], B[1], C[1]);
783 }else if(match_count==1){
787 }else if(top_ref==ref){
795 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
799 *mx= mid_pred(A[0], B[0], C[0]);
800 *my= mid_pred(A[1], B[1], C[1]);
804 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
808 * gets the directionally predicted 16x8 MV.
809 * @param n the block index
810 * @param mx the x component of the predicted motion vector
811 * @param my the y component of the predicted motion vector
813 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
815 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
816 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
818 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
826 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
827 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
829 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
839 pred_motion(h, n, 4, list, ref, mx, my);
843 * gets the directionally predicted 8x16 MV.
844 * @param n the block index
845 * @param mx the x component of the predicted motion vector
846 * @param my the y component of the predicted motion vector
848 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
850 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
851 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
853 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
864 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
866 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
868 if(diagonal_ref == ref){
876 pred_motion(h, n, 2, list, ref, mx, my);
879 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
880 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
881 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
883 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
885 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
886 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
887 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
893 pred_motion(h, 0, 4, 0, 0, mx, my);
898 static inline void direct_dist_scale_factor(H264Context * const h){
899 const int poc = h->s.current_picture_ptr->poc;
900 const int poc1 = h->ref_list[1][0].poc;
902 for(i=0; i<h->ref_count[0]; i++){
903 int poc0 = h->ref_list[0][i].poc;
904 int td = av_clip(poc1 - poc0, -128, 127);
905 if(td == 0 /* FIXME || pic0 is a long-term ref */){
906 h->dist_scale_factor[i] = 256;
908 int tb = av_clip(poc - poc0, -128, 127);
909 int tx = (16384 + (FFABS(td) >> 1)) / td;
910 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor_field[2*i] =
916 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
920 static inline void direct_ref_list_init(H264Context * const h){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 Picture * const cur = s->current_picture_ptr;
925 if(cur->pict_type == FF_I_TYPE)
926 cur->ref_count[0] = 0;
927 if(cur->pict_type != FF_B_TYPE)
928 cur->ref_count[1] = 0;
929 for(list=0; list<2; list++){
930 cur->ref_count[list] = h->ref_count[list];
931 for(j=0; j<h->ref_count[list]; j++)
932 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
934 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
936 for(list=0; list<2; list++){
937 for(i=0; i<ref1->ref_count[list]; i++){
938 const int poc = ref1->ref_poc[list][i];
939 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
940 for(j=0; j<h->ref_count[list]; j++)
941 if(h->ref_list[list][j].poc == poc){
942 h->map_col_to_list0[list][i] = j;
948 for(list=0; list<2; list++){
949 for(i=0; i<ref1->ref_count[list]; i++){
950 j = h->map_col_to_list0[list][i];
951 h->map_col_to_list0_field[list][2*i] = 2*j;
952 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
958 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
959 MpegEncContext * const s = &h->s;
960 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
961 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
962 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
963 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
964 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
965 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
966 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
967 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
968 const int is_b8x8 = IS_8X8(*mb_type);
969 unsigned int sub_mb_type;
972 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
973 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
974 /* FIXME save sub mb types from previous frames (or derive from MVs)
975 * so we know exactly what block size to use */
976 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
977 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
978 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
979 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
980 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
982 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
983 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
986 *mb_type |= MB_TYPE_DIRECT2;
988 *mb_type |= MB_TYPE_INTERLACED;
990 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
992 if(h->direct_spatial_mv_pred){
997 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
999 /* ref = min(neighbors) */
1000 for(list=0; list<2; list++){
1001 int refa = h->ref_cache[list][scan8[0] - 1];
1002 int refb = h->ref_cache[list][scan8[0] - 8];
1003 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1005 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1007 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1009 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1015 if(ref[0] < 0 && ref[1] < 0){
1016 ref[0] = ref[1] = 0;
1017 mv[0][0] = mv[0][1] =
1018 mv[1][0] = mv[1][1] = 0;
1020 for(list=0; list<2; list++){
1022 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1024 mv[list][0] = mv[list][1] = 0;
1030 *mb_type &= ~MB_TYPE_L1;
1031 sub_mb_type &= ~MB_TYPE_L1;
1032 }else if(ref[0] < 0){
1034 *mb_type &= ~MB_TYPE_L0;
1035 sub_mb_type &= ~MB_TYPE_L0;
1038 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1039 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1040 int mb_types_col[2];
1041 int b8_stride = h->b8_stride;
1042 int b4_stride = h->b_stride;
1044 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1046 if(IS_INTERLACED(*mb_type)){
1047 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1048 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1050 l1ref0 -= 2*b8_stride;
1051 l1ref1 -= 2*b8_stride;
1052 l1mv0 -= 4*b4_stride;
1053 l1mv1 -= 4*b4_stride;
1058 int cur_poc = s->current_picture_ptr->poc;
1059 int *col_poc = h->ref_list[1]->field_poc;
1060 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1061 int dy = 2*col_parity - (s->mb_y&1);
1063 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1064 l1ref0 += dy*b8_stride;
1065 l1ref1 += dy*b8_stride;
1066 l1mv0 += 2*dy*b4_stride;
1067 l1mv1 += 2*dy*b4_stride;
1071 for(i8=0; i8<4; i8++){
1074 int xy8 = x8+y8*b8_stride;
1075 int xy4 = 3*x8+y8*b4_stride;
1078 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1080 h->sub_mb_type[i8] = sub_mb_type;
1082 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1083 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1084 if(!IS_INTRA(mb_types_col[y8])
1085 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1086 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1088 a= pack16to32(mv[0][0],mv[0][1]);
1090 b= pack16to32(mv[1][0],mv[1][1]);
1092 a= pack16to32(mv[0][0],mv[0][1]);
1093 b= pack16to32(mv[1][0],mv[1][1]);
1095 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1096 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1098 }else if(IS_16X16(*mb_type)){
1101 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1102 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1103 if(!IS_INTRA(mb_type_col)
1104 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1105 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1106 && (h->x264_build>33 || !h->x264_build)))){
1108 a= pack16to32(mv[0][0],mv[0][1]);
1110 b= pack16to32(mv[1][0],mv[1][1]);
1112 a= pack16to32(mv[0][0],mv[0][1]);
1113 b= pack16to32(mv[1][0],mv[1][1]);
1115 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1116 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1118 for(i8=0; i8<4; i8++){
1119 const int x8 = i8&1;
1120 const int y8 = i8>>1;
1122 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1124 h->sub_mb_type[i8] = sub_mb_type;
1126 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1127 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1128 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1129 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1132 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1133 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1134 && (h->x264_build>33 || !h->x264_build)))){
1135 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1136 if(IS_SUB_8X8(sub_mb_type)){
1137 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1138 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1140 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1142 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1145 for(i4=0; i4<4; i4++){
1146 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1147 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1149 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1151 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1157 }else{ /* direct temporal mv pred */
1158 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1159 const int *dist_scale_factor = h->dist_scale_factor;
1162 if(IS_INTERLACED(*mb_type)){
1163 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1164 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1165 dist_scale_factor = h->dist_scale_factor_field;
1167 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1168 /* FIXME assumes direct_8x8_inference == 1 */
1169 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1170 int mb_types_col[2];
1173 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1174 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1175 | (*mb_type & MB_TYPE_INTERLACED);
1176 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1178 if(IS_INTERLACED(*mb_type)){
1179 /* frame to field scaling */
1180 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1181 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1183 l1ref0 -= 2*h->b8_stride;
1184 l1ref1 -= 2*h->b8_stride;
1185 l1mv0 -= 4*h->b_stride;
1186 l1mv1 -= 4*h->b_stride;
1190 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1191 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1193 *mb_type |= MB_TYPE_16x8;
1195 *mb_type |= MB_TYPE_8x8;
1197 /* field to frame scaling */
1198 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1199 * but in MBAFF, top and bottom POC are equal */
1200 int dy = (s->mb_y&1) ? 1 : 2;
1202 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1203 l1ref0 += dy*h->b8_stride;
1204 l1ref1 += dy*h->b8_stride;
1205 l1mv0 += 2*dy*h->b_stride;
1206 l1mv1 += 2*dy*h->b_stride;
1209 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1211 *mb_type |= MB_TYPE_16x16;
1213 *mb_type |= MB_TYPE_8x8;
1216 for(i8=0; i8<4; i8++){
1217 const int x8 = i8&1;
1218 const int y8 = i8>>1;
1220 const int16_t (*l1mv)[2]= l1mv0;
1222 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1224 h->sub_mb_type[i8] = sub_mb_type;
1226 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1227 if(IS_INTRA(mb_types_col[y8])){
1228 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1229 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1230 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1234 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1236 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1238 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1241 scale = dist_scale_factor[ref0];
1242 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1245 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1246 int my_col = (mv_col[1]<<y_shift)/2;
1247 int mx = (scale * mv_col[0] + 128) >> 8;
1248 int my = (scale * my_col + 128) >> 8;
1249 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1250 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1257 /* one-to-one mv scaling */
1259 if(IS_16X16(*mb_type)){
1262 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1263 if(IS_INTRA(mb_type_col)){
1266 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1267 : map_col_to_list0[1][l1ref1[0]];
1268 const int scale = dist_scale_factor[ref0];
1269 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1271 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1272 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1274 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1275 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1277 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1278 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1279 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1281 for(i8=0; i8<4; i8++){
1282 const int x8 = i8&1;
1283 const int y8 = i8>>1;
1285 const int16_t (*l1mv)[2]= l1mv0;
1287 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1289 h->sub_mb_type[i8] = sub_mb_type;
1290 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1291 if(IS_INTRA(mb_type_col)){
1292 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1293 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1294 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1298 ref0 = l1ref0[x8 + y8*h->b8_stride];
1300 ref0 = map_col_to_list0[0][ref0];
1302 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1305 scale = dist_scale_factor[ref0];
1307 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1308 if(IS_SUB_8X8(sub_mb_type)){
1309 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1310 int mx = (scale * mv_col[0] + 128) >> 8;
1311 int my = (scale * mv_col[1] + 128) >> 8;
1312 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1313 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1315 for(i4=0; i4<4; i4++){
1316 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1317 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1318 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1319 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1320 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1321 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1328 static inline void write_back_motion(H264Context *h, int mb_type){
1329 MpegEncContext * const s = &h->s;
1330 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1331 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1334 if(!USES_LIST(mb_type, 0))
1335 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1337 for(list=0; list<h->list_count; list++){
1339 if(!USES_LIST(mb_type, list))
1343 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1344 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1346 if( h->pps.cabac ) {
1347 if(IS_SKIP(mb_type))
1348 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1351 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1352 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1357 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1358 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1359 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1360 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1361 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1365 if(h->slice_type == FF_B_TYPE && h->pps.cabac){
1366 if(IS_8X8(mb_type)){
1367 uint8_t *direct_table = &h->direct_table[b8_xy];
1368 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1369 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1370 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1376 * Decodes a network abstraction layer unit.
1377 * @param consumed is the number of bytes used as input
1378 * @param length is the length of the array
1379 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1380 * @returns decoded bytes, might be src+1 if no escapes
1382 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1387 // src[0]&0x80; //forbidden bit
1388 h->nal_ref_idc= src[0]>>5;
1389 h->nal_unit_type= src[0]&0x1F;
1393 for(i=0; i<length; i++)
1394 printf("%2X ", src[i]);
1396 for(i=0; i+1<length; i+=2){
1397 if(src[i]) continue;
1398 if(i>0 && src[i-1]==0) i--;
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1401 /* startcode, so we must be past the end */
1408 if(i>=length-1){ //no escaped 0
1409 *dst_length= length;
1410 *consumed= length+1; //+1 for the header
1414 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1415 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1416 dst= h->rbsp_buffer[bufidx];
1422 //printf("decoding esc\n");
1425 //remove escapes (very rare 1:2^22)
1426 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1427 if(src[si+2]==3){ //escape
1432 }else //next start code
1436 dst[di++]= src[si++];
1440 *consumed= si + 1;//+1 for the header
1441 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1446 * identifies the exact end of the bitstream
1447 * @return the length of the trailing, or 0 if damaged
1449 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1453 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1463 * idct tranforms the 16 dc values and dequantize them.
1464 * @param qp quantization parameter
1466 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1469 int temp[16]; //FIXME check if this is a good idea
1470 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1471 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1473 //memset(block, 64, 2*256);
1476 const int offset= y_offset[i];
1477 const int z0= block[offset+stride*0] + block[offset+stride*4];
1478 const int z1= block[offset+stride*0] - block[offset+stride*4];
1479 const int z2= block[offset+stride*1] - block[offset+stride*5];
1480 const int z3= block[offset+stride*1] + block[offset+stride*5];
1489 const int offset= x_offset[i];
1490 const int z0= temp[4*0+i] + temp[4*2+i];
1491 const int z1= temp[4*0+i] - temp[4*2+i];
1492 const int z2= temp[4*1+i] - temp[4*3+i];
1493 const int z3= temp[4*1+i] + temp[4*3+i];
1495 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1496 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1497 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1498 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1504 * dct tranforms the 16 dc values.
1505 * @param qp quantization parameter ??? FIXME
1507 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1508 // const int qmul= dequant_coeff[qp][0];
1510 int temp[16]; //FIXME check if this is a good idea
1511 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1512 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1515 const int offset= y_offset[i];
1516 const int z0= block[offset+stride*0] + block[offset+stride*4];
1517 const int z1= block[offset+stride*0] - block[offset+stride*4];
1518 const int z2= block[offset+stride*1] - block[offset+stride*5];
1519 const int z3= block[offset+stride*1] + block[offset+stride*5];
1528 const int offset= x_offset[i];
1529 const int z0= temp[4*0+i] + temp[4*2+i];
1530 const int z1= temp[4*0+i] - temp[4*2+i];
1531 const int z2= temp[4*1+i] - temp[4*3+i];
1532 const int z3= temp[4*1+i] + temp[4*3+i];
1534 block[stride*0 +offset]= (z0 + z3)>>1;
1535 block[stride*2 +offset]= (z1 + z2)>>1;
1536 block[stride*8 +offset]= (z1 - z2)>>1;
1537 block[stride*10+offset]= (z0 - z3)>>1;
1545 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1546 const int stride= 16*2;
1547 const int xStride= 16;
1550 a= block[stride*0 + xStride*0];
1551 b= block[stride*0 + xStride*1];
1552 c= block[stride*1 + xStride*0];
1553 d= block[stride*1 + xStride*1];
1560 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1561 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1562 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1563 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1567 static void chroma_dc_dct_c(DCTELEM *block){
1568 const int stride= 16*2;
1569 const int xStride= 16;
1572 a= block[stride*0 + xStride*0];
1573 b= block[stride*0 + xStride*1];
1574 c= block[stride*1 + xStride*0];
1575 d= block[stride*1 + xStride*1];
1582 block[stride*0 + xStride*0]= (a+c);
1583 block[stride*0 + xStride*1]= (e+b);
1584 block[stride*1 + xStride*0]= (a-c);
1585 block[stride*1 + xStride*1]= (e-b);
1590 * gets the chroma qp.
1592 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1593 return h->pps.chroma_qp_table[t][qscale & 0xff];
1596 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1597 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1598 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1600 const int * const quant_table= quant_coeff[qscale];
1601 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1602 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1603 const unsigned int threshold2= (threshold1<<1);
1609 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1610 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1611 const unsigned int dc_threshold2= (dc_threshold1<<1);
1613 int level= block[0]*quant_coeff[qscale+18][0];
1614 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1616 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1619 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1622 // last_non_zero = i;
1627 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1628 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1629 const unsigned int dc_threshold2= (dc_threshold1<<1);
1631 int level= block[0]*quant_table[0];
1632 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1634 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1637 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1640 // last_non_zero = i;
1653 const int j= scantable[i];
1654 int level= block[j]*quant_table[j];
1656 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1657 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1658 if(((unsigned)(level+threshold1))>threshold2){
1660 level= (bias + level)>>QUANT_SHIFT;
1663 level= (bias - level)>>QUANT_SHIFT;
1672 return last_non_zero;
1675 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1676 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1677 int src_x_offset, int src_y_offset,
1678 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1679 MpegEncContext * const s = &h->s;
1680 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1681 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1682 const int luma_xy= (mx&3) + ((my&3)<<2);
1683 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1684 uint8_t * src_cb, * src_cr;
1685 int extra_width= h->emu_edge_width;
1686 int extra_height= h->emu_edge_height;
1688 const int full_mx= mx>>2;
1689 const int full_my= my>>2;
1690 const int pic_width = 16*s->mb_width;
1691 const int pic_height = 16*s->mb_height >> MB_FIELD;
1693 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1696 if(mx&7) extra_width -= 3;
1697 if(my&7) extra_height -= 3;
1699 if( full_mx < 0-extra_width
1700 || full_my < 0-extra_height
1701 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1702 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1703 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1704 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1708 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1710 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1713 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1716 // chroma offset when predicting from a field of opposite parity
1717 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1718 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1720 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1721 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cb= s->edge_emu_buffer;
1727 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1731 src_cr= s->edge_emu_buffer;
1733 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1736 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1737 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1738 int x_offset, int y_offset,
1739 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1740 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1741 int list0, int list1){
1742 MpegEncContext * const s = &h->s;
1743 qpel_mc_func *qpix_op= qpix_put;
1744 h264_chroma_mc_func chroma_op= chroma_put;
1746 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1747 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1748 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1749 x_offset += 8*s->mb_x;
1750 y_offset += 8*(s->mb_y >> MB_FIELD);
1753 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1754 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1755 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1756 qpix_op, chroma_op);
1759 chroma_op= chroma_avg;
1763 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1764 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1765 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1766 qpix_op, chroma_op);
1770 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1771 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1772 int x_offset, int y_offset,
1773 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1774 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1775 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1776 int list0, int list1){
1777 MpegEncContext * const s = &h->s;
1779 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1780 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1781 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1782 x_offset += 8*s->mb_x;
1783 y_offset += 8*(s->mb_y >> MB_FIELD);
1786 /* don't optimize for luma-only case, since B-frames usually
1787 * use implicit weights => chroma too. */
1788 uint8_t *tmp_cb = s->obmc_scratchpad;
1789 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1790 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1791 int refn0 = h->ref_cache[0][ scan8[n] ];
1792 int refn1 = h->ref_cache[1][ scan8[n] ];
1794 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1795 dest_y, dest_cb, dest_cr,
1796 x_offset, y_offset, qpix_put, chroma_put);
1797 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1798 tmp_y, tmp_cb, tmp_cr,
1799 x_offset, y_offset, qpix_put, chroma_put);
1801 if(h->use_weight == 2){
1802 int weight0 = h->implicit_weight[refn0][refn1];
1803 int weight1 = 64 - weight0;
1804 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1806 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1808 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1809 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1810 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1811 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1812 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1813 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1814 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1815 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1816 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1819 int list = list1 ? 1 : 0;
1820 int refn = h->ref_cache[list][ scan8[n] ];
1821 Picture *ref= &h->ref_list[list][refn];
1822 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1823 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1824 qpix_put, chroma_put);
1826 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1827 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1828 if(h->use_weight_chroma){
1829 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1831 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1832 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1837 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1838 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1839 int x_offset, int y_offset,
1840 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1841 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1842 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1843 int list0, int list1){
1844 if((h->use_weight==2 && list0 && list1
1845 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1846 || h->use_weight==1)
1847 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1848 x_offset, y_offset, qpix_put, chroma_put,
1849 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1851 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1852 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1855 static inline void prefetch_motion(H264Context *h, int list){
1856 /* fetch pixels for estimated mv 4 macroblocks ahead
1857 * optimized for 64byte cache lines */
1858 MpegEncContext * const s = &h->s;
1859 const int refn = h->ref_cache[list][scan8[0]];
1861 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1862 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1863 uint8_t **src= h->ref_list[list][refn].data;
1864 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1865 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1866 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1867 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1871 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1872 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1873 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1874 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1875 MpegEncContext * const s = &h->s;
1876 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1877 const int mb_type= s->current_picture.mb_type[mb_xy];
1879 assert(IS_INTER(mb_type));
1881 prefetch_motion(h, 0);
1883 if(IS_16X16(mb_type)){
1884 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1885 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1886 &weight_op[0], &weight_avg[0],
1887 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1888 }else if(IS_16X8(mb_type)){
1889 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1890 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1891 &weight_op[1], &weight_avg[1],
1892 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1893 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1894 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1895 &weight_op[1], &weight_avg[1],
1896 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1897 }else if(IS_8X16(mb_type)){
1898 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1899 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1900 &weight_op[2], &weight_avg[2],
1901 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1902 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1903 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1904 &weight_op[2], &weight_avg[2],
1905 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1909 assert(IS_8X8(mb_type));
1912 const int sub_mb_type= h->sub_mb_type[i];
1914 int x_offset= (i&1)<<2;
1915 int y_offset= (i&2)<<1;
1917 if(IS_SUB_8X8(sub_mb_type)){
1918 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1919 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1920 &weight_op[3], &weight_avg[3],
1921 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1922 }else if(IS_SUB_8X4(sub_mb_type)){
1923 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1924 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1925 &weight_op[4], &weight_avg[4],
1926 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1927 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1928 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1929 &weight_op[4], &weight_avg[4],
1930 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1931 }else if(IS_SUB_4X8(sub_mb_type)){
1932 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1933 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1934 &weight_op[5], &weight_avg[5],
1935 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1937 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1938 &weight_op[5], &weight_avg[5],
1939 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1942 assert(IS_SUB_4X4(sub_mb_type));
1944 int sub_x_offset= x_offset + 2*(j&1);
1945 int sub_y_offset= y_offset + (j&2);
1946 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1947 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1948 &weight_op[6], &weight_avg[6],
1949 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1955 prefetch_motion(h, 1);
1958 static av_cold void decode_init_vlc(void){
1959 static int done = 0;
1965 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1966 &chroma_dc_coeff_token_len [0], 1, 1,
1967 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1970 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1971 &coeff_token_len [i][0], 1, 1,
1972 &coeff_token_bits[i][0], 1, 1, 1);
1976 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1977 &chroma_dc_total_zeros_len [i][0], 1, 1,
1978 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1980 for(i=0; i<15; i++){
1981 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1982 &total_zeros_len [i][0], 1, 1,
1983 &total_zeros_bits[i][0], 1, 1, 1);
1987 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1988 &run_len [i][0], 1, 1,
1989 &run_bits[i][0], 1, 1, 1);
1991 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1992 &run_len [6][0], 1, 1,
1993 &run_bits[6][0], 1, 1, 1);
1997 static void free_tables(H264Context *h){
2000 av_freep(&h->intra4x4_pred_mode);
2001 av_freep(&h->chroma_pred_mode_table);
2002 av_freep(&h->cbp_table);
2003 av_freep(&h->mvd_table[0]);
2004 av_freep(&h->mvd_table[1]);
2005 av_freep(&h->direct_table);
2006 av_freep(&h->non_zero_count);
2007 av_freep(&h->slice_table_base);
2008 h->slice_table= NULL;
2010 av_freep(&h->mb2b_xy);
2011 av_freep(&h->mb2b8_xy);
2013 for(i = 0; i < MAX_SPS_COUNT; i++)
2014 av_freep(h->sps_buffers + i);
2016 for(i = 0; i < MAX_PPS_COUNT; i++)
2017 av_freep(h->pps_buffers + i);
2019 for(i = 0; i < h->s.avctx->thread_count; i++) {
2020 hx = h->thread_context[i];
2022 av_freep(&hx->top_borders[1]);
2023 av_freep(&hx->top_borders[0]);
2024 av_freep(&hx->s.obmc_scratchpad);
2028 static void init_dequant8_coeff_table(H264Context *h){
2030 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2031 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2032 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2034 for(i=0; i<2; i++ ){
2035 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2036 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2040 for(q=0; q<52; q++){
2041 int shift = ff_div6[q];
2042 int idx = ff_rem6[q];
2044 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2045 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2046 h->pps.scaling_matrix8[i][x]) << shift;
2051 static void init_dequant4_coeff_table(H264Context *h){
2053 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2054 for(i=0; i<6; i++ ){
2055 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2057 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2058 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2065 for(q=0; q<52; q++){
2066 int shift = ff_div6[q] + 2;
2067 int idx = ff_rem6[q];
2069 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2070 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2071 h->pps.scaling_matrix4[i][x]) << shift;
2076 static void init_dequant_tables(H264Context *h){
2078 init_dequant4_coeff_table(h);
2079 if(h->pps.transform_8x8_mode)
2080 init_dequant8_coeff_table(h);
2081 if(h->sps.transform_bypass){
2084 h->dequant4_coeff[i][0][x] = 1<<6;
2085 if(h->pps.transform_8x8_mode)
2088 h->dequant8_coeff[i][0][x] = 1<<6;
2095 * needs width/height
2097 static int alloc_tables(H264Context *h){
2098 MpegEncContext * const s = &h->s;
2099 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2102 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2105 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2106 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2108 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2109 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2110 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2111 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2113 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2114 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2116 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2117 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2118 for(y=0; y<s->mb_height; y++){
2119 for(x=0; x<s->mb_width; x++){
2120 const int mb_xy= x + y*s->mb_stride;
2121 const int b_xy = 4*x + 4*y*h->b_stride;
2122 const int b8_xy= 2*x + 2*y*h->b8_stride;
2124 h->mb2b_xy [mb_xy]= b_xy;
2125 h->mb2b8_xy[mb_xy]= b8_xy;
2129 s->obmc_scratchpad = NULL;
2131 if(!h->dequant4_coeff[0])
2132 init_dequant_tables(h);
2141 * Mimic alloc_tables(), but for every context thread.
2143 static void clone_tables(H264Context *dst, H264Context *src){
2144 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2145 dst->non_zero_count = src->non_zero_count;
2146 dst->slice_table = src->slice_table;
2147 dst->cbp_table = src->cbp_table;
2148 dst->mb2b_xy = src->mb2b_xy;
2149 dst->mb2b8_xy = src->mb2b8_xy;
2150 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2151 dst->mvd_table[0] = src->mvd_table[0];
2152 dst->mvd_table[1] = src->mvd_table[1];
2153 dst->direct_table = src->direct_table;
2155 dst->s.obmc_scratchpad = NULL;
2156 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2161 * Allocate buffers which are not shared amongst multiple threads.
2163 static int context_init(H264Context *h){
2164 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2165 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2169 return -1; // free_tables will clean up for us
2172 static av_cold void common_init(H264Context *h){
2173 MpegEncContext * const s = &h->s;
2175 s->width = s->avctx->width;
2176 s->height = s->avctx->height;
2177 s->codec_id= s->avctx->codec->id;
2179 ff_h264_pred_init(&h->hpc, s->codec_id);
2181 h->dequant_coeff_pps= -1;
2182 s->unrestricted_mv=1;
2183 s->decode=1; //FIXME
2185 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2186 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2189 static av_cold int decode_init(AVCodecContext *avctx){
2190 H264Context *h= avctx->priv_data;
2191 MpegEncContext * const s = &h->s;
2193 MPV_decode_defaults(s);
2198 s->out_format = FMT_H264;
2199 s->workaround_bugs= avctx->workaround_bugs;
2202 // s->decode_mb= ff_h263_decode_mb;
2203 s->quarter_sample = 1;
2205 avctx->pix_fmt= PIX_FMT_YUV420P;
2209 if(avctx->extradata_size > 0 && avctx->extradata &&
2210 *(char *)avctx->extradata == 1){
2217 h->thread_context[0] = h;
2221 static int frame_start(H264Context *h){
2222 MpegEncContext * const s = &h->s;
2225 if(MPV_frame_start(s, s->avctx) < 0)
2227 ff_er_frame_start(s);
2229 * MPV_frame_start uses pict_type to derive key_frame.
2230 * This is incorrect for H.264; IDR markings must be used.
2231 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2232 * See decode_nal_units().
2234 s->current_picture_ptr->key_frame= 0;
2236 assert(s->linesize && s->uvlinesize);
2238 for(i=0; i<16; i++){
2239 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2240 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2243 h->block_offset[16+i]=
2244 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2245 h->block_offset[24+16+i]=
2246 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2249 /* can't be in alloc_tables because linesize isn't known there.
2250 * FIXME: redo bipred weight to not require extra buffer? */
2251 for(i = 0; i < s->avctx->thread_count; i++)
2252 if(!h->thread_context[i]->s.obmc_scratchpad)
2253 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2255 /* some macroblocks will be accessed before they're available */
2256 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2257 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2259 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2263 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2264 MpegEncContext * const s = &h->s;
2268 src_cb -= uvlinesize;
2269 src_cr -= uvlinesize;
2271 // There are two lines saved, the line above the the top macroblock of a pair,
2272 // and the line above the bottom macroblock
2273 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2274 for(i=1; i<17; i++){
2275 h->left_border[i]= src_y[15+i* linesize];
2278 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2279 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2281 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2282 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2283 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2285 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2286 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2288 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2289 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2293 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2294 MpegEncContext * const s = &h->s;
2301 if(h->deblocking_filter == 2) {
2302 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2303 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2304 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2306 deblock_left = (s->mb_x > 0);
2307 deblock_top = (s->mb_y > 0);
2310 src_y -= linesize + 1;
2311 src_cb -= uvlinesize + 1;
2312 src_cr -= uvlinesize + 1;
2314 #define XCHG(a,b,t,xchg)\
2321 for(i = !deblock_top; i<17; i++){
2322 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2327 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2328 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2329 if(s->mb_x+1 < s->mb_width){
2330 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2334 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2336 for(i = !deblock_top; i<9; i++){
2337 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2338 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2342 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2343 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2348 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2349 MpegEncContext * const s = &h->s;
2352 src_y -= 2 * linesize;
2353 src_cb -= 2 * uvlinesize;
2354 src_cr -= 2 * uvlinesize;
2356 // There are two lines saved, the line above the the top macroblock of a pair,
2357 // and the line above the bottom macroblock
2358 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2359 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2360 for(i=2; i<34; i++){
2361 h->left_border[i]= src_y[15+i* linesize];
2364 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2365 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2366 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2367 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2369 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2370 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2371 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2372 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2373 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2374 for(i=2; i<18; i++){
2375 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2376 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2378 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2379 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2380 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2381 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2385 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2386 MpegEncContext * const s = &h->s;
2389 int deblock_left = (s->mb_x > 0);
2390 int deblock_top = (s->mb_y > 1);
2392 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2394 src_y -= 2 * linesize + 1;
2395 src_cb -= 2 * uvlinesize + 1;
2396 src_cr -= 2 * uvlinesize + 1;
2398 #define XCHG(a,b,t,xchg)\
2405 for(i = (!deblock_top)<<1; i<34; i++){
2406 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2411 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2412 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2413 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2414 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2415 if(s->mb_x+1 < s->mb_width){
2416 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2417 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2421 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2423 for(i = (!deblock_top) << 1; i<18; i++){
2424 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2425 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2429 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2430 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2431 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2432 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2437 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2438 MpegEncContext * const s = &h->s;
2439 const int mb_x= s->mb_x;
2440 const int mb_y= s->mb_y;
2441 const int mb_xy= mb_x + mb_y*s->mb_stride;
2442 const int mb_type= s->current_picture.mb_type[mb_xy];
2443 uint8_t *dest_y, *dest_cb, *dest_cr;
2444 int linesize, uvlinesize /*dct_offset*/;
2446 int *block_offset = &h->block_offset[0];
2447 const unsigned int bottom = mb_y & 1;
2448 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2449 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2450 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2452 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2453 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2454 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2456 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2457 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2459 if (!simple && MB_FIELD) {
2460 linesize = h->mb_linesize = s->linesize * 2;
2461 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2462 block_offset = &h->block_offset[24];
2463 if(mb_y&1){ //FIXME move out of this func?
2464 dest_y -= s->linesize*15;
2465 dest_cb-= s->uvlinesize*7;
2466 dest_cr-= s->uvlinesize*7;
2470 for(list=0; list<h->list_count; list++){
2471 if(!USES_LIST(mb_type, list))
2473 if(IS_16X16(mb_type)){
2474 int8_t *ref = &h->ref_cache[list][scan8[0]];
2475 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2477 for(i=0; i<16; i+=4){
2478 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2479 int ref = h->ref_cache[list][scan8[i]];
2481 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2487 linesize = h->mb_linesize = s->linesize;
2488 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2489 // dct_offset = s->linesize * 16;
2492 if(transform_bypass){
2494 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2495 }else if(IS_8x8DCT(mb_type)){
2496 idct_dc_add = s->dsp.h264_idct8_dc_add;
2497 idct_add = s->dsp.h264_idct8_add;
2499 idct_dc_add = s->dsp.h264_idct_dc_add;
2500 idct_add = s->dsp.h264_idct_add;
2503 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2504 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2505 int mbt_y = mb_y&~1;
2506 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2507 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2508 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2509 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2512 if (!simple && IS_INTRA_PCM(mb_type)) {
2515 // The pixels are stored in h->mb array in the same order as levels,
2516 // copy them in output in the correct order.
2517 for(i=0; i<16; i++) {
2518 for (y=0; y<4; y++) {
2519 for (x=0; x<4; x++) {
2520 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2524 for(i=16; i<16+4; i++) {
2525 for (y=0; y<4; y++) {
2526 for (x=0; x<4; x++) {
2527 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2531 for(i=20; i<20+4; i++) {
2532 for (y=0; y<4; y++) {
2533 for (x=0; x<4; x++) {
2534 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2539 if(IS_INTRA(mb_type)){
2540 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2541 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2543 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2544 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2545 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2548 if(IS_INTRA4x4(mb_type)){
2549 if(simple || !s->encoding){
2550 if(IS_8x8DCT(mb_type)){
2551 for(i=0; i<16; i+=4){
2552 uint8_t * const ptr= dest_y + block_offset[i];
2553 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2554 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2555 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2556 (h->topright_samples_available<<i)&0x4000, linesize);
2558 if(nnz == 1 && h->mb[i*16])
2559 idct_dc_add(ptr, h->mb + i*16, linesize);
2561 idct_add(ptr, h->mb + i*16, linesize);
2565 for(i=0; i<16; i++){
2566 uint8_t * const ptr= dest_y + block_offset[i];
2568 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2571 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2572 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2573 assert(mb_y || linesize <= block_offset[i]);
2574 if(!topright_avail){
2575 tr= ptr[3 - linesize]*0x01010101;
2576 topright= (uint8_t*) &tr;
2578 topright= ptr + 4 - linesize;
2582 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2583 nnz = h->non_zero_count_cache[ scan8[i] ];
2586 if(nnz == 1 && h->mb[i*16])
2587 idct_dc_add(ptr, h->mb + i*16, linesize);
2589 idct_add(ptr, h->mb + i*16, linesize);
2591 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2596 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2598 if(!transform_bypass)
2599 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2601 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2603 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2604 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2606 hl_motion(h, dest_y, dest_cb, dest_cr,
2607 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2608 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2609 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2613 if(!IS_INTRA4x4(mb_type)){
2615 if(IS_INTRA16x16(mb_type)){
2616 for(i=0; i<16; i++){
2617 if(h->non_zero_count_cache[ scan8[i] ])
2618 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2619 else if(h->mb[i*16])
2620 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2623 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2624 for(i=0; i<16; i+=di){
2625 int nnz = h->non_zero_count_cache[ scan8[i] ];
2627 if(nnz==1 && h->mb[i*16])
2628 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2630 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2635 for(i=0; i<16; i++){
2636 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2637 uint8_t * const ptr= dest_y + block_offset[i];
2638 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2644 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2645 uint8_t *dest[2] = {dest_cb, dest_cr};
2646 if(transform_bypass){
2647 idct_add = idct_dc_add = s->dsp.add_pixels4;
2649 idct_add = s->dsp.h264_idct_add;
2650 idct_dc_add = s->dsp.h264_idct_dc_add;
2651 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2652 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2655 for(i=16; i<16+8; i++){
2656 if(h->non_zero_count_cache[ scan8[i] ])
2657 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2658 else if(h->mb[i*16])
2659 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2662 for(i=16; i<16+8; i++){
2663 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2664 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2665 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2671 if(h->deblocking_filter) {
2672 if (!simple && FRAME_MBAFF) {
2673 //FIXME try deblocking one mb at a time?
2674 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2675 const int mb_y = s->mb_y - 1;
2676 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2677 const int mb_xy= mb_x + mb_y*s->mb_stride;
2678 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2679 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2680 if (!bottom) return;
2681 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2682 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2683 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2685 if(IS_INTRA(mb_type_top | mb_type_bottom))
2686 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2688 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2692 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2693 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2694 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2695 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2696 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2699 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2700 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2701 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2702 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2703 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2705 tprintf(h->s.avctx, "call filter_mb\n");
2706 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2707 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2708 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2714 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2716 static void hl_decode_mb_simple(H264Context *h){
2717 hl_decode_mb_internal(h, 1);
2721 * Process a macroblock; this handles edge cases, such as interlacing.
2723 static void av_noinline hl_decode_mb_complex(H264Context *h){
2724 hl_decode_mb_internal(h, 0);
2727 static void hl_decode_mb(H264Context *h){
2728 MpegEncContext * const s = &h->s;
2729 const int mb_x= s->mb_x;
2730 const int mb_y= s->mb_y;
2731 const int mb_xy= mb_x + mb_y*s->mb_stride;
2732 const int mb_type= s->current_picture.mb_type[mb_xy];
2733 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2739 hl_decode_mb_complex(h);
2740 else hl_decode_mb_simple(h);
2743 static void pic_as_field(Picture *pic, const int parity){
2745 for (i = 0; i < 4; ++i) {
2746 if (parity == PICT_BOTTOM_FIELD)
2747 pic->data[i] += pic->linesize[i];
2748 pic->reference = parity;
2749 pic->linesize[i] *= 2;
2753 static int split_field_copy(Picture *dest, Picture *src,
2754 int parity, int id_add){
2755 int match = !!(src->reference & parity);
2759 pic_as_field(dest, parity);
2761 dest->pic_id += id_add;
2768 * Split one reference list into field parts, interleaving by parity
2769 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2770 * set to look at the actual start of data for that field.
2772 * @param dest output list
2773 * @param dest_len maximum number of fields to put in dest
2774 * @param src the source reference list containing fields and/or field pairs
2775 * (aka short_ref/long_ref, or
2776 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2777 * @param src_len number of Picture's in source (pairs and unmatched fields)
2778 * @param parity the parity of the picture being decoded/needing
2779 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2780 * @return number of fields placed in dest
2782 static int split_field_half_ref_list(Picture *dest, int dest_len,
2783 Picture *src, int src_len, int parity){
2784 int same_parity = 1;
2790 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2791 if (same_parity && same_i < src_len) {
2792 field_output = split_field_copy(dest + out_i, src + same_i,
2794 same_parity = !field_output;
2797 } else if (opp_i < src_len) {
2798 field_output = split_field_copy(dest + out_i, src + opp_i,
2799 PICT_FRAME - parity, 0);
2800 same_parity = field_output;
2812 * Split the reference frame list into a reference field list.
2813 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2814 * The input list contains both reference field pairs and
2815 * unmatched reference fields; it is ordered as spec describes
2816 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2817 * unmatched field pairs are also present. Conceptually this is equivalent
2818 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2820 * @param dest output reference list where ordered fields are to be placed
2821 * @param dest_len max number of fields to place at dest
2822 * @param src source reference list, as described above
2823 * @param src_len number of pictures (pairs and unmatched fields) in src
2824 * @param parity parity of field being currently decoded
2825 * (one of PICT_{TOP,BOTTOM}_FIELD)
2826 * @param long_i index into src array that holds first long reference picture,
2827 * or src_len if no long refs present.
2829 static int split_field_ref_list(Picture *dest, int dest_len,
2830 Picture *src, int src_len,
2831 int parity, int long_i){
2833 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2837 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2838 src_len - long_i, parity);
2843 * fills the default_ref_list.
2845 static int fill_default_ref_list(H264Context *h){
2846 MpegEncContext * const s = &h->s;
2848 int smallest_poc_greater_than_current = -1;
2850 Picture sorted_short_ref[32];
2851 Picture field_entry_list[2][32];
2852 Picture *frame_list[2];
2854 if (FIELD_PICTURE) {
2855 structure_sel = PICT_FRAME;
2856 frame_list[0] = field_entry_list[0];
2857 frame_list[1] = field_entry_list[1];
2860 frame_list[0] = h->default_ref_list[0];
2861 frame_list[1] = h->default_ref_list[1];
2864 if(h->slice_type==FF_B_TYPE){
2871 /* sort frame according to poc in B slice */
2872 for(out_i=0; out_i<h->short_ref_count; out_i++){
2874 int best_poc=INT_MAX;
2876 for(i=0; i<h->short_ref_count; i++){
2877 const int poc= h->short_ref[i]->poc;
2878 if(poc > limit && poc < best_poc){
2884 assert(best_i != INT_MIN);
2887 sorted_short_ref[out_i]= *h->short_ref[best_i];
2888 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2889 if (-1 == smallest_poc_greater_than_current) {
2890 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2891 smallest_poc_greater_than_current = out_i;
2896 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2898 // find the largest poc
2899 for(list=0; list<2; list++){
2902 int step= list ? -1 : 1;
2904 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2906 while(j<0 || j>= h->short_ref_count){
2907 if(j != -99 && step == (list ? -1 : 1))
2910 j= smallest_poc_greater_than_current + (step>>1);
2912 sel = sorted_short_ref[j].reference | structure_sel;
2913 if(sel != PICT_FRAME) continue;
2914 frame_list[list][index ]= sorted_short_ref[j];
2915 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2917 short_len[list] = index;
2919 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2921 if(h->long_ref[i] == NULL) continue;
2922 sel = h->long_ref[i]->reference | structure_sel;
2923 if(sel != PICT_FRAME) continue;
2925 frame_list[ list ][index ]= *h->long_ref[i];
2926 frame_list[ list ][index++].pic_id= i;
2931 for(list=0; list<2; list++){
2933 len[list] = split_field_ref_list(h->default_ref_list[list],
2937 s->picture_structure,
2940 // swap the two first elements of L1 when L0 and L1 are identical
2941 if(list && len[0] > 1 && len[0] == len[1])
2942 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2944 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2948 if(len[list] < h->ref_count[ list ])
2949 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2956 for(i=0; i<h->short_ref_count; i++){
2958 sel = h->short_ref[i]->reference | structure_sel;
2959 if(sel != PICT_FRAME) continue;
2960 frame_list[0][index ]= *h->short_ref[i];
2961 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2964 for(i = 0; i < 16; i++){
2966 if(h->long_ref[i] == NULL) continue;
2967 sel = h->long_ref[i]->reference | structure_sel;
2968 if(sel != PICT_FRAME) continue;
2969 frame_list[0][index ]= *h->long_ref[i];
2970 frame_list[0][index++].pic_id= i;
2974 index = split_field_ref_list(h->default_ref_list[0],
2975 h->ref_count[0], frame_list[0],
2976 index, s->picture_structure,
2979 if(index < h->ref_count[0])
2980 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2983 for (i=0; i<h->ref_count[0]; i++) {
2984 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2986 if(h->slice_type==FF_B_TYPE){
2987 for (i=0; i<h->ref_count[1]; i++) {
2988 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2995 static void print_short_term(H264Context *h);
2996 static void print_long_term(H264Context *h);
2999 * Extract structure information about the picture described by pic_num in
3000 * the current decoding context (frame or field). Note that pic_num is
3001 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3002 * @param pic_num picture number for which to extract structure information
3003 * @param structure one of PICT_XXX describing structure of picture
3005 * @return frame number (short term) or long term index of picture
3006 * described by pic_num
3008 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3009 MpegEncContext * const s = &h->s;
3011 *structure = s->picture_structure;
3014 /* opposite field */
3015 *structure ^= PICT_FRAME;
3022 static int decode_ref_pic_list_reordering(H264Context *h){
3023 MpegEncContext * const s = &h->s;
3024 int list, index, pic_structure;
3026 print_short_term(h);
3028 if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
3030 for(list=0; list<h->list_count; list++){
3031 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3033 if(get_bits1(&s->gb)){
3034 int pred= h->curr_pic_num;
3036 for(index=0; ; index++){
3037 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3038 unsigned int pic_id;
3040 Picture *ref = NULL;
3042 if(reordering_of_pic_nums_idc==3)
3045 if(index >= h->ref_count[list]){
3046 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3050 if(reordering_of_pic_nums_idc<3){
3051 if(reordering_of_pic_nums_idc<2){
3052 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3055 if(abs_diff_pic_num > h->max_pic_num){
3056 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3060 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3061 else pred+= abs_diff_pic_num;
3062 pred &= h->max_pic_num - 1;
3064 frame_num = pic_num_extract(h, pred, &pic_structure);
3066 for(i= h->short_ref_count-1; i>=0; i--){
3067 ref = h->short_ref[i];
3068 assert(ref->reference);
3069 assert(!ref->long_ref);
3070 if(ref->data[0] != NULL &&
3071 ref->frame_num == frame_num &&
3072 (ref->reference & pic_structure) &&
3073 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3080 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3082 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3085 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3088 ref = h->long_ref[long_idx];
3089 assert(!(ref && !ref->reference));
3090 if(ref && (ref->reference & pic_structure)){
3091 ref->pic_id= pic_id;
3092 assert(ref->long_ref);
3100 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3101 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3103 for(i=index; i+1<h->ref_count[list]; i++){
3104 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3107 for(; i > index; i--){
3108 h->ref_list[list][i]= h->ref_list[list][i-1];
3110 h->ref_list[list][index]= *ref;
3112 pic_as_field(&h->ref_list[list][index], pic_structure);
3116 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3122 for(list=0; list<h->list_count; list++){
3123 for(index= 0; index < h->ref_count[list]; index++){
3124 if(!h->ref_list[list][index].data[0])
3125 h->ref_list[list][index]= s->current_picture;
3129 if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
3130 direct_dist_scale_factor(h);
3131 direct_ref_list_init(h);
3135 static void fill_mbaff_ref_list(H264Context *h){
3137 for(list=0; list<2; list++){ //FIXME try list_count
3138 for(i=0; i<h->ref_count[list]; i++){
3139 Picture *frame = &h->ref_list[list][i];
3140 Picture *field = &h->ref_list[list][16+2*i];
3143 field[0].linesize[j] <<= 1;
3144 field[0].reference = PICT_TOP_FIELD;
3145 field[1] = field[0];
3147 field[1].data[j] += frame->linesize[j];
3148 field[1].reference = PICT_BOTTOM_FIELD;
3150 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3151 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3153 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3154 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3158 for(j=0; j<h->ref_count[1]; j++){
3159 for(i=0; i<h->ref_count[0]; i++)
3160 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3161 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3162 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3166 static int pred_weight_table(H264Context *h){
3167 MpegEncContext * const s = &h->s;
3169 int luma_def, chroma_def;
3172 h->use_weight_chroma= 0;
3173 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3174 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3175 luma_def = 1<<h->luma_log2_weight_denom;
3176 chroma_def = 1<<h->chroma_log2_weight_denom;
3178 for(list=0; list<2; list++){
3179 for(i=0; i<h->ref_count[list]; i++){
3180 int luma_weight_flag, chroma_weight_flag;
3182 luma_weight_flag= get_bits1(&s->gb);
3183 if(luma_weight_flag){
3184 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3185 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3186 if( h->luma_weight[list][i] != luma_def
3187 || h->luma_offset[list][i] != 0)
3190 h->luma_weight[list][i]= luma_def;
3191 h->luma_offset[list][i]= 0;
3194 chroma_weight_flag= get_bits1(&s->gb);
3195 if(chroma_weight_flag){
3198 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3199 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3200 if( h->chroma_weight[list][i][j] != chroma_def
3201 || h->chroma_offset[list][i][j] != 0)
3202 h->use_weight_chroma= 1;
3207 h->chroma_weight[list][i][j]= chroma_def;
3208 h->chroma_offset[list][i][j]= 0;
3212 if(h->slice_type != FF_B_TYPE) break;
3214 h->use_weight= h->use_weight || h->use_weight_chroma;
3218 static void implicit_weight_table(H264Context *h){
3219 MpegEncContext * const s = &h->s;
3221 int cur_poc = s->current_picture_ptr->poc;
3223 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3224 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3226 h->use_weight_chroma= 0;
3231 h->use_weight_chroma= 2;
3232 h->luma_log2_weight_denom= 5;
3233 h->chroma_log2_weight_denom= 5;
3235 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3236 int poc0 = h->ref_list[0][ref0].poc;
3237 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3238 int poc1 = h->ref_list[1][ref1].poc;
3239 int td = av_clip(poc1 - poc0, -128, 127);
3241 int tb = av_clip(cur_poc - poc0, -128, 127);
3242 int tx = (16384 + (FFABS(td) >> 1)) / td;
3243 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3244 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3245 h->implicit_weight[ref0][ref1] = 32;
3247 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3249 h->implicit_weight[ref0][ref1] = 32;
3255 * Mark a picture as no longer needed for reference. The refmask
3256 * argument allows unreferencing of individual fields or the whole frame.
3257 * If the picture becomes entirely unreferenced, but is being held for
3258 * display purposes, it is marked as such.
3259 * @param refmask mask of fields to unreference; the mask is bitwise
3260 * anded with the reference marking of pic
3261 * @return non-zero if pic becomes entirely unreferenced (except possibly
3262 * for display purposes) zero if one of the fields remains in
3265 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3267 if (pic->reference &= refmask) {
3270 if(pic == h->delayed_output_pic)
3271 pic->reference=DELAYED_PIC_REF;
3273 for(i = 0; h->delayed_pic[i]; i++)
3274 if(pic == h->delayed_pic[i]){
3275 pic->reference=DELAYED_PIC_REF;
3284 * instantaneous decoder refresh.
3286 static void idr(H264Context *h){
3289 for(i=0; i<16; i++){
3290 if (h->long_ref[i] != NULL) {
3291 unreference_pic(h, h->long_ref[i], 0);
3292 h->long_ref[i]= NULL;
3295 h->long_ref_count=0;
3297 for(i=0; i<h->short_ref_count; i++){
3298 unreference_pic(h, h->short_ref[i], 0);
3299 h->short_ref[i]= NULL;
3301 h->short_ref_count=0;
3304 /* forget old pics after a seek */
3305 static void flush_dpb(AVCodecContext *avctx){
3306 H264Context *h= avctx->priv_data;
3308 for(i=0; i<16; i++) {
3309 if(h->delayed_pic[i])
3310 h->delayed_pic[i]->reference= 0;
3311 h->delayed_pic[i]= NULL;
3313 if(h->delayed_output_pic)
3314 h->delayed_output_pic->reference= 0;
3315 h->delayed_output_pic= NULL;
3317 if(h->s.current_picture_ptr)
3318 h->s.current_picture_ptr->reference= 0;
3319 h->s.first_field= 0;
3320 ff_mpeg_flush(avctx);
3324 * Find a Picture in the short term reference list by frame number.
3325 * @param frame_num frame number to search for
3326 * @param idx the index into h->short_ref where returned picture is found
3327 * undefined if no picture found.
3328 * @return pointer to the found picture, or NULL if no pic with the provided
3329 * frame number is found
3331 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3332 MpegEncContext * const s = &h->s;
3335 for(i=0; i<h->short_ref_count; i++){
3336 Picture *pic= h->short_ref[i];
3337 if(s->avctx->debug&FF_DEBUG_MMCO)
3338 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3339 if(pic->frame_num == frame_num) {
3348 * Remove a picture from the short term reference list by its index in
3349 * that list. This does no checking on the provided index; it is assumed
3350 * to be valid. Other list entries are shifted down.
3351 * @param i index into h->short_ref of picture to remove.
3353 static void remove_short_at_index(H264Context *h, int i){
3354 assert(i > 0 && i < h->short_ref_count);
3355 h->short_ref[i]= NULL;
3356 if (--h->short_ref_count)
3357 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3362 * @return the removed picture or NULL if an error occurs
3364 static Picture * remove_short(H264Context *h, int frame_num){
3365 MpegEncContext * const s = &h->s;
3369 if(s->avctx->debug&FF_DEBUG_MMCO)
3370 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3372 pic = find_short(h, frame_num, &i);
3374 remove_short_at_index(h, i);
3380 * Remove a picture from the long term reference list by its index in
3381 * that list. This does no checking on the provided index; it is assumed
3382 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3383 * @param i index into h->long_ref of picture to remove.
3385 static void remove_long_at_index(H264Context *h, int i){
3386 h->long_ref[i]= NULL;
3387 h->long_ref_count--;
3392 * @return the removed picture or NULL if an error occurs
3394 static Picture * remove_long(H264Context *h, int i){
3397 pic= h->long_ref[i];
3399 remove_long_at_index(h, i);
3405 * print short term list
3407 static void print_short_term(H264Context *h) {
3409 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3410 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3411 for(i=0; i<h->short_ref_count; i++){
3412 Picture *pic= h->short_ref[i];
3413 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3419 * print long term list
3421 static void print_long_term(H264Context *h) {
3423 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3424 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3425 for(i = 0; i < 16; i++){
3426 Picture *pic= h->long_ref[i];
3428 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3435 * Executes the reference picture marking (memory management control operations).
3437 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3438 MpegEncContext * const s = &h->s;
3440 int current_ref_assigned=0;
3443 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3444 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3446 for(i=0; i<mmco_count; i++){
3447 int structure, frame_num, unref_pic;
3448 if(s->avctx->debug&FF_DEBUG_MMCO)
3449 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3451 switch(mmco[i].opcode){
3452 case MMCO_SHORT2UNUSED:
3453 if(s->avctx->debug&FF_DEBUG_MMCO)
3454 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3455 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3456 pic = find_short(h, frame_num, &j);
3458 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3459 remove_short_at_index(h, j);
3460 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3461 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3463 case MMCO_SHORT2LONG:
3464 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3465 h->long_ref[mmco[i].long_arg]->frame_num ==
3466 mmco[i].short_pic_num / 2) {
3467 /* do nothing, we've already moved this field pair. */
3469 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3471 pic= remove_long(h, mmco[i].long_arg);
3472 if(pic) unreference_pic(h, pic, 0);
3474 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3475 if (h->long_ref[ mmco[i].long_arg ]){
3476 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3477 h->long_ref_count++;
3481 case MMCO_LONG2UNUSED:
3482 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3483 pic = h->long_ref[j];
3485 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3486 remove_long_at_index(h, j);
3487 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3488 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3492 if (FIELD_PICTURE && !s->first_field) {
3493 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3494 /* Just mark second field as referenced */
3496 } else if (s->current_picture_ptr->reference) {
3497 /* First field in pair is in short term list or
3498 * at a different long term index.
3499 * This is not allowed; see 7.4.3, notes 2 and 3.
3500 * Report the problem and keep the pair where it is,
3501 * and mark this field valid.
3503 av_log(h->s.avctx, AV_LOG_ERROR,
3504 "illegal long term reference assignment for second "
3505 "field in complementary field pair (first field is "
3506 "short term or has non-matching long index)\n");
3512 pic= remove_long(h, mmco[i].long_arg);
3513 if(pic) unreference_pic(h, pic, 0);
3515 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3516 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3517 h->long_ref_count++;
3520 s->current_picture_ptr->reference |= s->picture_structure;
3521 current_ref_assigned=1;
3523 case MMCO_SET_MAX_LONG:
3524 assert(mmco[i].long_arg <= 16);
3525 // just remove the long term which index is greater than new max
3526 for(j = mmco[i].long_arg; j<16; j++){
3527 pic = remove_long(h, j);
3528 if (pic) unreference_pic(h, pic, 0);
3532 while(h->short_ref_count){
3533 pic= remove_short(h, h->short_ref[0]->frame_num);
3534 if(pic) unreference_pic(h, pic, 0);
3536 for(j = 0; j < 16; j++) {
3537 pic= remove_long(h, j);
3538 if(pic) unreference_pic(h, pic, 0);
3545 if (!current_ref_assigned && FIELD_PICTURE &&
3546 !s->first_field && s->current_picture_ptr->reference) {
3548 /* Second field of complementary field pair; the first field of
3549 * which is already referenced. If short referenced, it
3550 * should be first entry in short_ref. If not, it must exist
3551 * in long_ref; trying to put it on the short list here is an
3552 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3554 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3555 /* Just mark the second field valid */
3556 s->current_picture_ptr->reference = PICT_FRAME;
3557 } else if (s->current_picture_ptr->long_ref) {
3558 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3559 "assignment for second field "
3560 "in complementary field pair "
3561 "(first field is long term)\n");
3564 * First field in reference, but not in any sensible place on our
3565 * reference lists. This shouldn't happen unless reference
3566 * handling somewhere else is wrong.
3570 current_ref_assigned = 1;
3573 if(!current_ref_assigned){
3574 pic= remove_short(h, s->current_picture_ptr->frame_num);
3576 unreference_pic(h, pic, 0);
3577 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3580 if(h->short_ref_count)
3581 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3583 h->short_ref[0]= s->current_picture_ptr;
3584 h->short_ref[0]->long_ref=0;
3585 h->short_ref_count++;
3586 s->current_picture_ptr->reference |= s->picture_structure;
3589 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3591 /* We have too many reference frames, probably due to corrupted
3592 * stream. Need to discard one frame. Prevents overrun of the
3593 * short_ref and long_ref buffers.
3595 av_log(h->s.avctx, AV_LOG_ERROR,
3596 "number of reference frames exceeds max (probably "
3597 "corrupt input), discarding one\n");
3599 if (h->long_ref_count) {
3600 for (i = 0; i < 16; ++i)
3605 pic = h->long_ref[i];
3606 remove_long_at_index(h, i);
3608 pic = h->short_ref[h->short_ref_count - 1];
3609 remove_short_at_index(h, h->short_ref_count - 1);
3611 unreference_pic(h, pic, 0);
3614 print_short_term(h);
3619 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3620 MpegEncContext * const s = &h->s;
3623 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3624 s->broken_link= get_bits1(gb) -1;
3625 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3626 if(h->mmco[0].long_arg == -1)
3629 h->mmco[0].opcode= MMCO_LONG;
3633 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3634 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3635 MMCOOpcode opcode= get_ue_golomb(gb);
3637 h->mmco[i].opcode= opcode;
3638 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3639 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3640 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3641 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3645 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3646 unsigned int long_arg= get_ue_golomb(gb);
3647 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3648 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3651 h->mmco[i].long_arg= long_arg;
3654 if(opcode > (unsigned)MMCO_LONG){
3655 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3658 if(opcode == MMCO_END)
3663 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3665 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3666 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3667 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3668 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3670 if (FIELD_PICTURE) {
3671 h->mmco[0].short_pic_num *= 2;
3672 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3673 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3684 static int init_poc(H264Context *h){
3685 MpegEncContext * const s = &h->s;
3686 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3689 if(h->nal_unit_type == NAL_IDR_SLICE){
3690 h->frame_num_offset= 0;
3692 if(h->frame_num < h->prev_frame_num)
3693 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3695 h->frame_num_offset= h->prev_frame_num_offset;
3698 if(h->sps.poc_type==0){
3699 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3701 if(h->nal_unit_type == NAL_IDR_SLICE){
3706 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3707 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3708 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3709 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3711 h->poc_msb = h->prev_poc_msb;
3712 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3714 field_poc[1] = h->poc_msb + h->poc_lsb;
3715 if(s->picture_structure == PICT_FRAME)
3716 field_poc[1] += h->delta_poc_bottom;
3717 }else if(h->sps.poc_type==1){
3718 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3721 if(h->sps.poc_cycle_length != 0)
3722 abs_frame_num = h->frame_num_offset + h->frame_num;
3726 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3729 expected_delta_per_poc_cycle = 0;
3730 for(i=0; i < h->sps.poc_cycle_length; i++)
3731 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3733 if(abs_frame_num > 0){
3734 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3735 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3737 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3738 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3739 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3743 if(h->nal_ref_idc == 0)
3744 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3746 field_poc[0] = expectedpoc + h->delta_poc[0];
3747 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3749 if(s->picture_structure == PICT_FRAME)
3750 field_poc[1] += h->delta_poc[1];
3753 if(h->nal_unit_type == NAL_IDR_SLICE){
3756 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3757 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3763 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3764 s->current_picture_ptr->field_poc[0]= field_poc[0];
3765 s->current_picture_ptr->poc = field_poc[0];
3767 if(s->picture_structure != PICT_TOP_FIELD) {
3768 s->current_picture_ptr->field_poc[1]= field_poc[1];
3769 s->current_picture_ptr->poc = field_poc[1];
3771 if(!FIELD_PICTURE || !s->first_field) {
3772 Picture *cur = s->current_picture_ptr;
3773 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3781 * initialize scan tables
3783 static void init_scan_tables(H264Context *h){
3784 MpegEncContext * const s = &h->s;
3786 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3787 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3788 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3790 for(i=0; i<16; i++){
3791 #define T(x) (x>>2) | ((x<<2) & 0xF)
3792 h->zigzag_scan[i] = T(zigzag_scan[i]);
3793 h-> field_scan[i] = T( field_scan[i]);
3797 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3798 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3799 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3800 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3801 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3803 for(i=0; i<64; i++){
3804 #define T(x) (x>>3) | ((x&7)<<3)
3805 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3806 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3807 h->field_scan8x8[i] = T(field_scan8x8[i]);
3808 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3812 if(h->sps.transform_bypass){ //FIXME same ugly
3813 h->zigzag_scan_q0 = zigzag_scan;
3814 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3815 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3816 h->field_scan_q0 = field_scan;
3817 h->field_scan8x8_q0 = field_scan8x8;
3818 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3820 h->zigzag_scan_q0 = h->zigzag_scan;
3821 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3822 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3823 h->field_scan_q0 = h->field_scan;
3824 h->field_scan8x8_q0 = h->field_scan8x8;
3825 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3830 * Replicates H264 "master" context to thread contexts.
3832 static void clone_slice(H264Context *dst, H264Context *src)
3834 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3835 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3836 dst->s.current_picture = src->s.current_picture;
3837 dst->s.linesize = src->s.linesize;
3838 dst->s.uvlinesize = src->s.uvlinesize;
3839 dst->s.first_field = src->s.first_field;
3841 dst->prev_poc_msb = src->prev_poc_msb;
3842 dst->prev_poc_lsb = src->prev_poc_lsb;
3843 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3844 dst->prev_frame_num = src->prev_frame_num;
3845 dst->short_ref_count = src->short_ref_count;
3847 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3848 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3849 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3850 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3852 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3853 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3857 * decodes a slice header.
3858 * this will allso call MPV_common_init() and frame_start() as needed
3860 * @param h h264context
3861 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3863 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3865 static int decode_slice_header(H264Context *h, H264Context *h0){
3866 MpegEncContext * const s = &h->s;
3867 MpegEncContext * const s0 = &h0->s;
3868 unsigned int first_mb_in_slice;
3869 unsigned int pps_id;
3870 int num_ref_idx_active_override_flag;
3871 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3872 unsigned int slice_type, tmp, i;
3873 int default_ref_list_done = 0;
3874 int last_pic_structure;
3876 s->dropable= h->nal_ref_idc == 0;
3878 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3879 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3880 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3882 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3883 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3886 first_mb_in_slice= get_ue_golomb(&s->gb);
3888 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3889 h0->current_slice = 0;
3890 if (!s0->first_field)
3891 s->current_picture_ptr= NULL;
3894 slice_type= get_ue_golomb(&s->gb);
3896 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3901 h->slice_type_fixed=1;
3903 h->slice_type_fixed=0;
3905 slice_type= slice_type_map[ slice_type ];
3906 if (slice_type == FF_I_TYPE
3907 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3908 default_ref_list_done = 1;
3910 h->slice_type= slice_type;
3912 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3913 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3914 av_log(h->s.avctx, AV_LOG_ERROR,
3915 "B picture before any references, skipping\n");
3919 pps_id= get_ue_golomb(&s->gb);
3920 if(pps_id>=MAX_PPS_COUNT){
3921 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3924 if(!h0->pps_buffers[pps_id]) {
3925 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3928 h->pps= *h0->pps_buffers[pps_id];
3930 if(!h0->sps_buffers[h->pps.sps_id]) {
3931 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3934 h->sps = *h0->sps_buffers[h->pps.sps_id];
3936 if(h == h0 && h->dequant_coeff_pps != pps_id){
3937 h->dequant_coeff_pps = pps_id;
3938 init_dequant_tables(h);
3941 s->mb_width= h->sps.mb_width;
3942 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3944 h->b_stride= s->mb_width*4;
3945 h->b8_stride= s->mb_width*2;
3947 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3948 if(h->sps.frame_mbs_only_flag)
3949 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3951 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3953 if (s->context_initialized
3954 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3956 return -1; // width / height changed during parallelized decoding
3960 if (!s->context_initialized) {
3962 return -1; // we cant (re-)initialize context during parallel decoding
3963 if (MPV_common_init(s) < 0)
3967 init_scan_tables(h);
3970 for(i = 1; i < s->avctx->thread_count; i++) {
3972 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3973 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3974 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3977 init_scan_tables(c);
3981 for(i = 0; i < s->avctx->thread_count; i++)
3982 if(context_init(h->thread_context[i]) < 0)
3985 s->avctx->width = s->width;
3986 s->avctx->height = s->height;
3987 s->avctx->sample_aspect_ratio= h->sps.sar;
3988 if(!s->avctx->sample_aspect_ratio.den)
3989 s->avctx->sample_aspect_ratio.den = 1;
3991 if(h->sps.timing_info_present_flag){
3992 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3993 if(h->x264_build > 0 && h->x264_build < 44)
3994 s->avctx->time_base.den *= 2;
3995 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3996 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4000 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4003 h->mb_aff_frame = 0;
4004 last_pic_structure = s0->picture_structure;
4005 if(h->sps.frame_mbs_only_flag){
4006 s->picture_structure= PICT_FRAME;
4008 if(get_bits1(&s->gb)) { //field_pic_flag
4009 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4011 s->picture_structure= PICT_FRAME;
4012 h->mb_aff_frame = h->sps.mb_aff;
4016 if(h0->current_slice == 0){
4017 /* See if we have a decoded first field looking for a pair... */
4018 if (s0->first_field) {
4019 assert(s0->current_picture_ptr);
4020 assert(s0->current_picture_ptr->data[0]);
4021 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4023 /* figure out if we have a complementary field pair */
4024 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4026 * Previous field is unmatched. Don't display it, but let it
4027 * remain for reference if marked as such.
4029 s0->current_picture_ptr = NULL;
4030 s0->first_field = FIELD_PICTURE;
4033 if (h->nal_ref_idc &&
4034 s0->current_picture_ptr->reference &&
4035 s0->current_picture_ptr->frame_num != h->frame_num) {
4037 * This and previous field were reference, but had
4038 * different frame_nums. Consider this field first in
4039 * pair. Throw away previous field except for reference
4042 s0->first_field = 1;
4043 s0->current_picture_ptr = NULL;
4046 /* Second field in complementary pair */
4047 s0->first_field = 0;
4052 /* Frame or first field in a potentially complementary pair */
4053 assert(!s0->current_picture_ptr);
4054 s0->first_field = FIELD_PICTURE;
4057 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4058 s0->first_field = 0;
4065 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4067 assert(s->mb_num == s->mb_width * s->mb_height);
4068 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4069 first_mb_in_slice >= s->mb_num){
4070 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4073 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4074 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4075 if (s->picture_structure == PICT_BOTTOM_FIELD)
4076 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4077 assert(s->mb_y < s->mb_height);
4079 if(s->picture_structure==PICT_FRAME){
4080 h->curr_pic_num= h->frame_num;
4081 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4083 h->curr_pic_num= 2*h->frame_num + 1;
4084 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4087 if(h->nal_unit_type == NAL_IDR_SLICE){
4088 get_ue_golomb(&s->gb); /* idr_pic_id */
4091 if(h->sps.poc_type==0){
4092 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4094 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4095 h->delta_poc_bottom= get_se_golomb(&s->gb);
4099 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4100 h->delta_poc[0]= get_se_golomb(&s->gb);
4102 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4103 h->delta_poc[1]= get_se_golomb(&s->gb);
4108 if(h->pps.redundant_pic_cnt_present){
4109 h->redundant_pic_count= get_ue_golomb(&s->gb);
4112 //set defaults, might be overriden a few line later
4113 h->ref_count[0]= h->pps.ref_count[0];
4114 h->ref_count[1]= h->pps.ref_count[1];
4116 if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
4117 if(h->slice_type == FF_B_TYPE){
4118 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4120 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4122 if(num_ref_idx_active_override_flag){
4123 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4124 if(h->slice_type==FF_B_TYPE)
4125 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4127 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4128 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4129 h->ref_count[0]= h->ref_count[1]= 1;
4133 if(h->slice_type == FF_B_TYPE)
4140 if(!default_ref_list_done){
4141 fill_default_ref_list(h);
4144 if(decode_ref_pic_list_reordering(h) < 0)
4147 if( (h->pps.weighted_pred && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
4148 || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
4149 pred_weight_table(h);
4150 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
4151 implicit_weight_table(h);
4156 decode_ref_pic_marking(h0, &s->gb);
4159 fill_mbaff_ref_list(h);
4161 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
4162 tmp = get_ue_golomb(&s->gb);
4164 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4167 h->cabac_init_idc= tmp;
4170 h->last_qscale_diff = 0;
4171 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4173 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4177 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4178 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4179 //FIXME qscale / qp ... stuff
4180 if(h->slice_type == FF_SP_TYPE){
4181 get_bits1(&s->gb); /* sp_for_switch_flag */
4183 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4184 get_se_golomb(&s->gb); /* slice_qs_delta */
4187 h->deblocking_filter = 1;
4188 h->slice_alpha_c0_offset = 0;
4189 h->slice_beta_offset = 0;
4190 if( h->pps.deblocking_filter_parameters_present ) {
4191 tmp= get_ue_golomb(&s->gb);
4193 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4196 h->deblocking_filter= tmp;
4197 if(h->deblocking_filter < 2)
4198 h->deblocking_filter^= 1; // 1<->0
4200 if( h->deblocking_filter ) {
4201 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4202 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4206 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4207 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
4208 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
4209 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4210 h->deblocking_filter= 0;
4212 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4213 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4214 /* Cheat slightly for speed:
4215 Do not bother to deblock across slices. */
4216 h->deblocking_filter = 2;
4218 h0->max_contexts = 1;
4219 if(!h0->single_decode_warning) {
4220 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4221 h0->single_decode_warning = 1;
4224 return 1; // deblocking switched inside frame
4229 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4230 slice_group_change_cycle= get_bits(&s->gb, ?);
4233 h0->last_slice_type = slice_type;
4234 h->slice_num = ++h0->current_slice;
4236 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4237 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4239 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4240 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4242 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4244 av_get_pict_type_char(h->slice_type),
4245 pps_id, h->frame_num,
4246 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4247 h->ref_count[0], h->ref_count[1],
4249 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4251 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4261 static inline int get_level_prefix(GetBitContext *gb){
4265 OPEN_READER(re, gb);
4266 UPDATE_CACHE(re, gb);
4267 buf=GET_CACHE(re, gb);
4269 log= 32 - av_log2(buf);
4271 print_bin(buf>>(32-log), log);
4272 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4275 LAST_SKIP_BITS(re, gb, log);
4276 CLOSE_READER(re, gb);
4281 static inline int get_dct8x8_allowed(H264Context *h){
4284 if(!IS_SUB_8X8(h->sub_mb_type[i])
4285 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4292 * decodes a residual block.
4293 * @param n block index
4294 * @param scantable scantable
4295 * @param max_coeff number of coefficients in the block
4296 * @return <0 if an error occurred
4298 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4299 MpegEncContext * const s = &h->s;
4300 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4302 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4304 //FIXME put trailing_onex into the context
4306 if(n == CHROMA_DC_BLOCK_INDEX){
4307 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4308 total_coeff= coeff_token>>2;
4310 if(n == LUMA_DC_BLOCK_INDEX){
4311 total_coeff= pred_non_zero_count(h, 0);
4312 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4313 total_coeff= coeff_token>>2;
4315 total_coeff= pred_non_zero_count(h, n);
4316 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4317 total_coeff= coeff_token>>2;
4318 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4322 //FIXME set last_non_zero?
4326 if(total_coeff > (unsigned)max_coeff) {
4327 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4331 trailing_ones= coeff_token&3;
4332 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4333 assert(total_coeff<=16);
4335 for(i=0; i<trailing_ones; i++){
4336 level[i]= 1 - 2*get_bits1(gb);
4340 int level_code, mask;
4341 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4342 int prefix= get_level_prefix(gb);
4344 //first coefficient has suffix_length equal to 0 or 1
4345 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4347 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4349 level_code= (prefix<<suffix_length); //part
4350 }else if(prefix==14){
4352 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4354 level_code= prefix + get_bits(gb, 4); //part
4355 }else if(prefix==15){
4356 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4357 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4359 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4363 if(trailing_ones < 3) level_code += 2;
4368 mask= -(level_code&1);
4369 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4372 //remaining coefficients have suffix_length > 0
4373 for(;i<total_coeff;i++) {
4374 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4375 prefix = get_level_prefix(gb);
4377 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4378 }else if(prefix==15){
4379 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4381 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4384 mask= -(level_code&1);
4385 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4386 if(level_code > suffix_limit[suffix_length])
4391 if(total_coeff == max_coeff)
4394 if(n == CHROMA_DC_BLOCK_INDEX)
4395 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4397 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4400 coeff_num = zeros_left + total_coeff - 1;
4401 j = scantable[coeff_num];
4403 block[j] = level[0];
4404 for(i=1;i<total_coeff;i++) {
4407 else if(zeros_left < 7){
4408 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4410 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4412 zeros_left -= run_before;
4413 coeff_num -= 1 + run_before;
4414 j= scantable[ coeff_num ];
4419 block[j] = (level[0] * qmul[j] + 32)>>6;
4420 for(i=1;i<total_coeff;i++) {
4423 else if(zeros_left < 7){
4424 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4426 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4428 zeros_left -= run_before;
4429 coeff_num -= 1 + run_before;
4430 j= scantable[ coeff_num ];
4432 block[j]= (level[i] * qmul[j] + 32)>>6;
4437 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4444 static void predict_field_decoding_flag(H264Context *h){
4445 MpegEncContext * const s = &h->s;
4446 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4447 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4448 ? s->current_picture.mb_type[mb_xy-1]
4449 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4450 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4452 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4456 * decodes a P_SKIP or B_SKIP macroblock
4458 static void decode_mb_skip(H264Context *h){
4459 MpegEncContext * const s = &h->s;
4460 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4463 memset(h->non_zero_count[mb_xy], 0, 16);
4464 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4467 mb_type|= MB_TYPE_INTERLACED;
4469 if( h->slice_type == FF_B_TYPE )
4471 // just for fill_caches. pred_direct_motion will set the real mb_type
4472 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4474 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4475 pred_direct_motion(h, &mb_type);
4476 mb_type|= MB_TYPE_SKIP;
4481 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4483 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4484 pred_pskip_motion(h, &mx, &my);
4485 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4486 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4489 write_back_motion(h, mb_type);
4490 s->current_picture.mb_type[mb_xy]= mb_type;
4491 s->current_picture.qscale_table[mb_xy]= s->qscale;
4492 h->slice_table[ mb_xy ]= h->slice_num;
4493 h->prev_mb_skipped= 1;
4497 * decodes a macroblock
4498 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4500 static int decode_mb_cavlc(H264Context *h){
4501 MpegEncContext * const s = &h->s;
4502 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4503 int partition_count;
4504 unsigned int mb_type, cbp;
4505 int dct8x8_allowed= h->pps.transform_8x8_mode;
4507 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4509 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4510 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4512 if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
4513 if(s->mb_skip_run==-1)
4514 s->mb_skip_run= get_ue_golomb(&s->gb);
4516 if (s->mb_skip_run--) {
4517 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4518 if(s->mb_skip_run==0)
4519 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4521 predict_field_decoding_flag(h);
4528 if( (s->mb_y&1) == 0 )
4529 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4531 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4533 h->prev_mb_skipped= 0;
4535 mb_type= get_ue_golomb(&s->gb);
4536 if(h->slice_type == FF_B_TYPE){
4538 partition_count= b_mb_type_info[mb_type].partition_count;
4539 mb_type= b_mb_type_info[mb_type].type;
4542 goto decode_intra_mb;
4544 }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
4546 partition_count= p_mb_type_info[mb_type].partition_count;
4547 mb_type= p_mb_type_info[mb_type].type;
4550 goto decode_intra_mb;
4553 assert(h->slice_type == FF_I_TYPE);
4556 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4560 cbp= i_mb_type_info[mb_type].cbp;
4561 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4562 mb_type= i_mb_type_info[mb_type].type;
4566 mb_type |= MB_TYPE_INTERLACED;
4568 h->slice_table[ mb_xy ]= h->slice_num;
4570 if(IS_INTRA_PCM(mb_type)){
4573 // We assume these blocks are very rare so we do not optimize it.
4574 align_get_bits(&s->gb);
4576 // The pixels are stored in the same order as levels in h->mb array.
4577 for(y=0; y<16; y++){
4578 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4579 for(x=0; x<16; x++){
4580 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4581 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4585 const int index= 256 + 4*(y&3) + 32*(y>>2);
4587 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4588 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4592 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4594 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4595 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4599 // In deblocking, the quantizer is 0
4600 s->current_picture.qscale_table[mb_xy]= 0;
4601 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4602 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4603 // All coeffs are present
4604 memset(h->non_zero_count[mb_xy], 16, 16);
4606 s->current_picture.mb_type[mb_xy]= mb_type;
4611 h->ref_count[0] <<= 1;
4612 h->ref_count[1] <<= 1;
4615 fill_caches(h, mb_type, 0);
4618 if(IS_INTRA(mb_type)){
4620 // init_top_left_availability(h);
4621 if(IS_INTRA4x4(mb_type)){
4624 if(dct8x8_allowed && get_bits1(&s->gb)){
4625 mb_type |= MB_TYPE_8x8DCT;
4629 // fill_intra4x4_pred_table(h);
4630 for(i=0; i<16; i+=di){
4631 int mode= pred_intra_mode(h, i);
4633 if(!get_bits1(&s->gb)){
4634 const int rem_mode= get_bits(&s->gb, 3);
4635 mode = rem_mode + (rem_mode >= mode);
4639 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4641 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4643 write_back_intra_pred_mode(h);
4644 if( check_intra4x4_pred_mode(h) < 0)
4647 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4648 if(h->intra16x16_pred_mode < 0)
4652 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4655 h->chroma_pred_mode= pred_mode;
4656 }else if(partition_count==4){
4657 int i, j, sub_partition_count[4], list, ref[2][4];
4659 if(h->slice_type == FF_B_TYPE){
4661 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4662 if(h->sub_mb_type[i] >=13){
4663 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4666 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4667 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4669 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4670 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4671 pred_direct_motion(h, &mb_type);
4672 h->ref_cache[0][scan8[4]] =
4673 h->ref_cache[1][scan8[4]] =
4674 h->ref_cache[0][scan8[12]] =
4675 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4678 assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
4680 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4681 if(h->sub_mb_type[i] >=4){
4682 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4685 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4686 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4690 for(list=0; list<h->list_count; list++){
4691 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4693 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4694 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4695 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4697 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4709 dct8x8_allowed = get_dct8x8_allowed(h);
4711 for(list=0; list<h->list_count; list++){
4713 if(IS_DIRECT(h->sub_mb_type[i])) {
4714 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4717 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4718 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4720 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4721 const int sub_mb_type= h->sub_mb_type[i];
4722 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4723 for(j=0; j<sub_partition_count[i]; j++){
4725 const int index= 4*i + block_width*j;
4726 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4727 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4728 mx += get_se_golomb(&s->gb);
4729 my += get_se_golomb(&s->gb);
4730 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4732 if(IS_SUB_8X8(sub_mb_type)){
4734 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4736 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4737 }else if(IS_SUB_8X4(sub_mb_type)){
4738 mv_cache[ 1 ][0]= mx;
4739 mv_cache[ 1 ][1]= my;
4740 }else if(IS_SUB_4X8(sub_mb_type)){
4741 mv_cache[ 8 ][0]= mx;
4742 mv_cache[ 8 ][1]= my;
4744 mv_cache[ 0 ][0]= mx;
4745 mv_cache[ 0 ][1]= my;
4748 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4754 }else if(IS_DIRECT(mb_type)){
4755 pred_direct_motion(h, &mb_type);
4756 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4758 int list, mx, my, i;
4759 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4760 if(IS_16X16(mb_type)){
4761 for(list=0; list<h->list_count; list++){
4763 if(IS_DIR(mb_type, 0, list)){
4764 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4765 if(val >= h->ref_count[list]){
4766 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4770 val= LIST_NOT_USED&0xFF;
4771 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4773 for(list=0; list<h->list_count; list++){
4775 if(IS_DIR(mb_type, 0, list)){
4776 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4777 mx += get_se_golomb(&s->gb);
4778 my += get_se_golomb(&s->gb);
4779 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4781 val= pack16to32(mx,my);
4784 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4787 else if(IS_16X8(mb_type)){
4788 for(list=0; list<h->list_count; list++){
4791 if(IS_DIR(mb_type, i, list)){
4792 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4793 if(val >= h->ref_count[list]){
4794 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4798 val= LIST_NOT_USED&0xFF;
4799 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4802 for(list=0; list<h->list_count; list++){
4805 if(IS_DIR(mb_type, i, list)){
4806 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4807 mx += get_se_golomb(&s->gb);
4808 my += get_se_golomb(&s->gb);
4809 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4811 val= pack16to32(mx,my);
4814 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4818 assert(IS_8X16(mb_type));
4819 for(list=0; list<h->list_count; list++){
4822 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4823 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4824 if(val >= h->ref_count[list]){
4825 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4829 val= LIST_NOT_USED&0xFF;
4830 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4833 for(list=0; list<h->list_count; list++){
4836 if(IS_DIR(mb_type, i, list)){
4837 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4838 mx += get_se_golomb(&s->gb);
4839 my += get_se_golomb(&s->gb);
4840 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4842 val= pack16to32(mx,my);
4845 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4851 if(IS_INTER(mb_type))
4852 write_back_motion(h, mb_type);
4854 if(!IS_INTRA16x16(mb_type)){
4855 cbp= get_ue_golomb(&s->gb);
4857 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4861 if(IS_INTRA4x4(mb_type))
4862 cbp= golomb_to_intra4x4_cbp[cbp];
4864 cbp= golomb_to_inter_cbp[cbp];
4868 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4869 if(get_bits1(&s->gb))
4870 mb_type |= MB_TYPE_8x8DCT;
4872 s->current_picture.mb_type[mb_xy]= mb_type;
4874 if(cbp || IS_INTRA16x16(mb_type)){
4875 int i8x8, i4x4, chroma_idx;
4877 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4878 const uint8_t *scan, *scan8x8, *dc_scan;
4880 // fill_non_zero_count_cache(h);
4882 if(IS_INTERLACED(mb_type)){
4883 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4884 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4885 dc_scan= luma_dc_field_scan;
4887 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4888 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4889 dc_scan= luma_dc_zigzag_scan;
4892 dquant= get_se_golomb(&s->gb);
4894 if( dquant > 25 || dquant < -26 ){
4895 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4899 s->qscale += dquant;
4900 if(((unsigned)s->qscale) > 51){
4901 if(s->qscale<0) s->qscale+= 52;
4902 else s->qscale-= 52;
4905 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4906 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4907 if(IS_INTRA16x16(mb_type)){
4908 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4909 return -1; //FIXME continue if partitioned and other return -1 too
4912 assert((cbp&15) == 0 || (cbp&15) == 15);
4915 for(i8x8=0; i8x8<4; i8x8++){
4916 for(i4x4=0; i4x4<4; i4x4++){
4917 const int index= i4x4 + 4*i8x8;
4918 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4924 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4927 for(i8x8=0; i8x8<4; i8x8++){
4928 if(cbp & (1<<i8x8)){
4929 if(IS_8x8DCT(mb_type)){
4930 DCTELEM *buf = &h->mb[64*i8x8];
4932 for(i4x4=0; i4x4<4; i4x4++){
4933 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4934 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4937 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4938 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4940 for(i4x4=0; i4x4<4; i4x4++){
4941 const int index= i4x4 + 4*i8x8;
4943 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4949 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4950 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4956 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4957 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4963 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4964 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4965 for(i4x4=0; i4x4<4; i4x4++){
4966 const int index= 16 + 4*chroma_idx + i4x4;
4967 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4973 uint8_t * const nnz= &h->non_zero_count_cache[0];
4974 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4975 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4978 uint8_t * const nnz= &h->non_zero_count_cache[0];
4979 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4980 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4981 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4983 s->current_picture.qscale_table[mb_xy]= s->qscale;
4984 write_back_non_zero_count(h);
4987 h->ref_count[0] >>= 1;
4988 h->ref_count[1] >>= 1;
4994 static int decode_cabac_field_decoding_flag(H264Context *h) {
4995 MpegEncContext * const s = &h->s;
4996 const int mb_x = s->mb_x;
4997 const int mb_y = s->mb_y & ~1;
4998 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4999 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5001 unsigned int ctx = 0;
5003 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5006 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5010 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5013 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5014 uint8_t *state= &h->cabac_state[ctx_base];
5018 MpegEncContext * const s = &h->s;
5019 const int mba_xy = h->left_mb_xy[0];
5020 const int mbb_xy = h->top_mb_xy;
5022 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5024 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5026 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5027 return 0; /* I4x4 */
5030 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5031 return 0; /* I4x4 */
5034 if( get_cabac_terminate( &h->cabac ) )
5035 return 25; /* PCM */
5037 mb_type = 1; /* I16x16 */
5038 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5039 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5040 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5041 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5042 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5046 static int decode_cabac_mb_type( H264Context *h ) {
5047 MpegEncContext * const s = &h->s;
5049 if( h->slice_type == FF_I_TYPE ) {
5050 return decode_cabac_intra_mb_type(h, 3, 1);
5051 } else if( h->slice_type == FF_P_TYPE ) {
5052 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5054 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5055 /* P_L0_D16x16, P_8x8 */
5056 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5058 /* P_L0_D8x16, P_L0_D16x8 */
5059 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5062 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5064 } else if( h->slice_type == FF_B_TYPE ) {
5065 const int mba_xy = h->left_mb_xy[0];
5066 const int mbb_xy = h->top_mb_xy;
5070 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5072 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5075 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5076 return 0; /* B_Direct_16x16 */
5078 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5079 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5082 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5083 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5084 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5085 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5087 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5088 else if( bits == 13 ) {
5089 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5090 } else if( bits == 14 )
5091 return 11; /* B_L1_L0_8x16 */
5092 else if( bits == 15 )
5093 return 22; /* B_8x8 */
5095 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5096 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5098 /* TODO SI/SP frames? */
5103 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5104 MpegEncContext * const s = &h->s;
5108 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5109 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5112 && h->slice_table[mba_xy] == h->slice_num
5113 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5114 mba_xy += s->mb_stride;
5116 mbb_xy = mb_xy - s->mb_stride;
5118 && h->slice_table[mbb_xy] == h->slice_num
5119 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5120 mbb_xy -= s->mb_stride;
5122 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5124 int mb_xy = mb_x + mb_y*s->mb_stride;
5126 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5129 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5131 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5134 if( h->slice_type == FF_B_TYPE )
5136 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5139 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5142 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5145 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5146 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5147 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5149 if( mode >= pred_mode )
5155 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5156 const int mba_xy = h->left_mb_xy[0];
5157 const int mbb_xy = h->top_mb_xy;
5161 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5162 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5165 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5168 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5171 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5173 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5179 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5180 int cbp_b, cbp_a, ctx, cbp = 0;
5182 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5183 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5185 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5186 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5187 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5188 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5189 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5190 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5191 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5192 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5195 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5199 cbp_a = (h->left_cbp>>4)&0x03;
5200 cbp_b = (h-> top_cbp>>4)&0x03;
5203 if( cbp_a > 0 ) ctx++;
5204 if( cbp_b > 0 ) ctx += 2;
5205 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5209 if( cbp_a == 2 ) ctx++;
5210 if( cbp_b == 2 ) ctx += 2;
5211 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5213 static int decode_cabac_mb_dqp( H264Context *h) {
5217 if( h->last_qscale_diff != 0 )
5220 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5226 if(val > 102) //prevent infinite loop
5233 return -(val + 1)/2;
5235 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5236 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5238 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5240 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5244 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5246 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5247 return 0; /* B_Direct_8x8 */
5248 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5249 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5251 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5252 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5253 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5256 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5257 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5261 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5262 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5265 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5266 int refa = h->ref_cache[list][scan8[n] - 1];
5267 int refb = h->ref_cache[list][scan8[n] - 8];
5271 if( h->slice_type == FF_B_TYPE) {
5272 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5274 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5283 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5289 if(ref >= 32 /*h->ref_list[list]*/){
5290 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5291 return 0; //FIXME we should return -1 and check the return everywhere
5297 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5298 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5299 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5300 int ctxbase = (l == 0) ? 40 : 47;
5305 else if( amvd > 32 )
5310 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5315 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5323 while( get_cabac_bypass( &h->cabac ) ) {
5327 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5332 if( get_cabac_bypass( &h->cabac ) )
5336 return get_cabac_bypass_sign( &h->cabac, -mvd );
5339 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5344 nza = h->left_cbp&0x100;
5345 nzb = h-> top_cbp&0x100;
5346 } else if( cat == 1 || cat == 2 ) {
5347 nza = h->non_zero_count_cache[scan8[idx] - 1];
5348 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5349 } else if( cat == 3 ) {
5350 nza = (h->left_cbp>>(6+idx))&0x01;
5351 nzb = (h-> top_cbp>>(6+idx))&0x01;
5354 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5355 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5364 return ctx + 4 * cat;
5367 DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8[63]) = {
5368 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5369 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5370 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5371 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5374 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5375 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5376 static const int significant_coeff_flag_offset[2][6] = {
5377 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5378 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5380 static const int last_coeff_flag_offset[2][6] = {
5381 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5382 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5384 static const int coeff_abs_level_m1_offset[6] = {
5385 227+0, 227+10, 227+20, 227+30, 227+39, 426
5387 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5388 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5389 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5390 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5391 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5392 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5393 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5394 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5395 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5401 int coeff_count = 0;
5404 int abslevelgt1 = 0;
5406 uint8_t *significant_coeff_ctx_base;
5407 uint8_t *last_coeff_ctx_base;
5408 uint8_t *abs_level_m1_ctx_base;
5411 #define CABAC_ON_STACK
5413 #ifdef CABAC_ON_STACK
5416 cc.range = h->cabac.range;
5417 cc.low = h->cabac.low;
5418 cc.bytestream= h->cabac.bytestream;
5420 #define CC &h->cabac
5424 /* cat: 0-> DC 16x16 n = 0
5425 * 1-> AC 16x16 n = luma4x4idx
5426 * 2-> Luma4x4 n = luma4x4idx
5427 * 3-> DC Chroma n = iCbCr
5428 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5429 * 5-> Luma8x8 n = 4 * luma8x8idx
5432 /* read coded block flag */
5434 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5435 if( cat == 1 || cat == 2 )
5436 h->non_zero_count_cache[scan8[n]] = 0;
5438 h->non_zero_count_cache[scan8[16+n]] = 0;
5439 #ifdef CABAC_ON_STACK
5440 h->cabac.range = cc.range ;
5441 h->cabac.low = cc.low ;
5442 h->cabac.bytestream= cc.bytestream;
5448 significant_coeff_ctx_base = h->cabac_state
5449 + significant_coeff_flag_offset[MB_FIELD][cat];
5450 last_coeff_ctx_base = h->cabac_state
5451 + last_coeff_flag_offset[MB_FIELD][cat];
5452 abs_level_m1_ctx_base = h->cabac_state
5453 + coeff_abs_level_m1_offset[cat];
5456 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5457 for(last= 0; last < coefs; last++) { \
5458 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5459 if( get_cabac( CC, sig_ctx )) { \
5460 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5461 index[coeff_count++] = last; \
5462 if( get_cabac( CC, last_ctx ) ) { \
5468 if( last == max_coeff -1 ) {\
5469 index[coeff_count++] = last;\
5471 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5472 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5473 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5475 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5477 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5479 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5482 assert(coeff_count > 0);
5485 h->cbp_table[mb_xy] |= 0x100;
5486 else if( cat == 1 || cat == 2 )
5487 h->non_zero_count_cache[scan8[n]] = coeff_count;
5489 h->cbp_table[mb_xy] |= 0x40 << n;
5491 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5494 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5497 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5498 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5499 int j= scantable[index[coeff_count]];
5501 if( get_cabac( CC, ctx ) == 0 ) {
5503 block[j] = get_cabac_bypass_sign( CC, -1);
5505 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5511 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5512 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5516 if( coeff_abs >= 15 ) {
5518 while( get_cabac_bypass( CC ) ) {
5524 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5530 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5531 else block[j] = coeff_abs;
5533 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5534 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5540 #ifdef CABAC_ON_STACK
5541 h->cabac.range = cc.range ;
5542 h->cabac.low = cc.low ;
5543 h->cabac.bytestream= cc.bytestream;
5548 static inline void compute_mb_neighbors(H264Context *h)
5550 MpegEncContext * const s = &h->s;
5551 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5552 h->top_mb_xy = mb_xy - s->mb_stride;
5553 h->left_mb_xy[0] = mb_xy - 1;
5555 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5556 const int top_pair_xy = pair_xy - s->mb_stride;
5557 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5558 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5559 const int curr_mb_frame_flag = !MB_FIELD;
5560 const int bottom = (s->mb_y & 1);
5562 ? !curr_mb_frame_flag // bottom macroblock
5563 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5565 h->top_mb_xy -= s->mb_stride;
5567 if (left_mb_frame_flag != curr_mb_frame_flag) {
5568 h->left_mb_xy[0] = pair_xy - 1;
5570 } else if (FIELD_PICTURE) {
5571 h->top_mb_xy -= s->mb_stride;
5577 * decodes a macroblock
5578 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5580 static int decode_mb_cabac(H264Context *h) {
5581 MpegEncContext * const s = &h->s;
5582 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5583 int mb_type, partition_count, cbp = 0;
5584 int dct8x8_allowed= h->pps.transform_8x8_mode;
5586 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5588 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5589 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
5591 /* a skipped mb needs the aff flag from the following mb */
5592 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5593 predict_field_decoding_flag(h);
5594 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5595 skip = h->next_mb_skipped;
5597 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5598 /* read skip flags */
5600 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5601 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5602 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5603 if(h->next_mb_skipped)
5604 predict_field_decoding_flag(h);
5606 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5611 h->cbp_table[mb_xy] = 0;
5612 h->chroma_pred_mode_table[mb_xy] = 0;
5613 h->last_qscale_diff = 0;
5620 if( (s->mb_y&1) == 0 )
5622 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5624 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5626 h->prev_mb_skipped = 0;
5628 compute_mb_neighbors(h);
5629 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5630 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5634 if( h->slice_type == FF_B_TYPE ) {
5636 partition_count= b_mb_type_info[mb_type].partition_count;
5637 mb_type= b_mb_type_info[mb_type].type;
5640 goto decode_intra_mb;
5642 } else if( h->slice_type == FF_P_TYPE ) {
5644 partition_count= p_mb_type_info[mb_type].partition_count;
5645 mb_type= p_mb_type_info[mb_type].type;
5648 goto decode_intra_mb;
5651 assert(h->slice_type == FF_I_TYPE);
5653 partition_count = 0;
5654 cbp= i_mb_type_info[mb_type].cbp;
5655 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5656 mb_type= i_mb_type_info[mb_type].type;
5659 mb_type |= MB_TYPE_INTERLACED;
5661 h->slice_table[ mb_xy ]= h->slice_num;
5663 if(IS_INTRA_PCM(mb_type)) {
5667 // We assume these blocks are very rare so we do not optimize it.
5668 // FIXME The two following lines get the bitstream position in the cabac
5669 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5670 ptr= h->cabac.bytestream;
5671 if(h->cabac.low&0x1) ptr--;
5673 if(h->cabac.low&0x1FF) ptr--;
5676 // The pixels are stored in the same order as levels in h->mb array.
5677 for(y=0; y<16; y++){
5678 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5679 for(x=0; x<16; x++){
5680 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5681 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5685 const int index= 256 + 4*(y&3) + 32*(y>>2);
5687 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5688 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5692 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5694 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5695 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5699 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5701 // All blocks are present
5702 h->cbp_table[mb_xy] = 0x1ef;
5703 h->chroma_pred_mode_table[mb_xy] = 0;
5704 // In deblocking, the quantizer is 0
5705 s->current_picture.qscale_table[mb_xy]= 0;
5706 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5707 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5708 // All coeffs are present
5709 memset(h->non_zero_count[mb_xy], 16, 16);
5710 s->current_picture.mb_type[mb_xy]= mb_type;
5715 h->ref_count[0] <<= 1;
5716 h->ref_count[1] <<= 1;
5719 fill_caches(h, mb_type, 0);
5721 if( IS_INTRA( mb_type ) ) {
5723 if( IS_INTRA4x4( mb_type ) ) {
5724 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5725 mb_type |= MB_TYPE_8x8DCT;
5726 for( i = 0; i < 16; i+=4 ) {
5727 int pred = pred_intra_mode( h, i );
5728 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5729 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5732 for( i = 0; i < 16; i++ ) {
5733 int pred = pred_intra_mode( h, i );
5734 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5736 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5739 write_back_intra_pred_mode(h);
5740 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5742 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5743 if( h->intra16x16_pred_mode < 0 ) return -1;
5745 h->chroma_pred_mode_table[mb_xy] =
5746 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5748 pred_mode= check_intra_pred_mode( h, pred_mode );
5749 if( pred_mode < 0 ) return -1;
5750 h->chroma_pred_mode= pred_mode;
5751 } else if( partition_count == 4 ) {
5752 int i, j, sub_partition_count[4], list, ref[2][4];
5754 if( h->slice_type == FF_B_TYPE ) {
5755 for( i = 0; i < 4; i++ ) {
5756 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5757 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5758 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5760 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5761 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5762 pred_direct_motion(h, &mb_type);
5763 h->ref_cache[0][scan8[4]] =
5764 h->ref_cache[1][scan8[4]] =
5765 h->ref_cache[0][scan8[12]] =
5766 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5767 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5768 for( i = 0; i < 4; i++ )
5769 if( IS_DIRECT(h->sub_mb_type[i]) )
5770 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5774 for( i = 0; i < 4; i++ ) {
5775 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5776 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5777 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5781 for( list = 0; list < h->list_count; list++ ) {
5782 for( i = 0; i < 4; i++ ) {
5783 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5784 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5785 if( h->ref_count[list] > 1 )
5786 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5792 h->ref_cache[list][ scan8[4*i]+1 ]=
5793 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5798 dct8x8_allowed = get_dct8x8_allowed(h);
5800 for(list=0; list<h->list_count; list++){
5802 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5803 if(IS_DIRECT(h->sub_mb_type[i])){
5804 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5808 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5809 const int sub_mb_type= h->sub_mb_type[i];
5810 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5811 for(j=0; j<sub_partition_count[i]; j++){
5814 const int index= 4*i + block_width*j;
5815 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5816 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5817 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5819 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5820 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5821 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5823 if(IS_SUB_8X8(sub_mb_type)){
5825 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5827 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5830 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5832 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5833 }else if(IS_SUB_8X4(sub_mb_type)){
5834 mv_cache[ 1 ][0]= mx;
5835 mv_cache[ 1 ][1]= my;
5837 mvd_cache[ 1 ][0]= mx - mpx;
5838 mvd_cache[ 1 ][1]= my - mpy;
5839 }else if(IS_SUB_4X8(sub_mb_type)){
5840 mv_cache[ 8 ][0]= mx;
5841 mv_cache[ 8 ][1]= my;
5843 mvd_cache[ 8 ][0]= mx - mpx;
5844 mvd_cache[ 8 ][1]= my - mpy;
5846 mv_cache[ 0 ][0]= mx;
5847 mv_cache[ 0 ][1]= my;
5849 mvd_cache[ 0 ][0]= mx - mpx;
5850 mvd_cache[ 0 ][1]= my - mpy;
5853 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5854 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5855 p[0] = p[1] = p[8] = p[9] = 0;
5856 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5860 } else if( IS_DIRECT(mb_type) ) {
5861 pred_direct_motion(h, &mb_type);
5862 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5863 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5864 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5866 int list, mx, my, i, mpx, mpy;
5867 if(IS_16X16(mb_type)){
5868 for(list=0; list<h->list_count; list++){
5869 if(IS_DIR(mb_type, 0, list)){
5870 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5871 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5873 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5875 for(list=0; list<h->list_count; list++){
5876 if(IS_DIR(mb_type, 0, list)){
5877 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5879 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5880 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5881 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5883 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5884 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5886 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5889 else if(IS_16X8(mb_type)){
5890 for(list=0; list<h->list_count; list++){
5892 if(IS_DIR(mb_type, i, list)){
5893 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5894 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5896 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5899 for(list=0; list<h->list_count; list++){
5901 if(IS_DIR(mb_type, i, list)){
5902 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5903 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5904 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5905 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5907 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5908 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5910 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5911 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5916 assert(IS_8X16(mb_type));
5917 for(list=0; list<h->list_count; list++){
5919 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5920 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5921 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5923 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5926 for(list=0; list<h->list_count; list++){
5928 if(IS_DIR(mb_type, i, list)){
5929 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5930 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5931 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5933 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5934 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5935 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5937 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5938 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5945 if( IS_INTER( mb_type ) ) {
5946 h->chroma_pred_mode_table[mb_xy] = 0;
5947 write_back_motion( h, mb_type );
5950 if( !IS_INTRA16x16( mb_type ) ) {
5951 cbp = decode_cabac_mb_cbp_luma( h );
5952 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5955 h->cbp_table[mb_xy] = h->cbp = cbp;
5957 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5958 if( decode_cabac_mb_transform_size( h ) )
5959 mb_type |= MB_TYPE_8x8DCT;
5961 s->current_picture.mb_type[mb_xy]= mb_type;
5963 if( cbp || IS_INTRA16x16( mb_type ) ) {
5964 const uint8_t *scan, *scan8x8, *dc_scan;
5965 const uint32_t *qmul;
5968 if(IS_INTERLACED(mb_type)){
5969 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5970 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5971 dc_scan= luma_dc_field_scan;
5973 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5974 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5975 dc_scan= luma_dc_zigzag_scan;
5978 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5979 if( dqp == INT_MIN ){
5980 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5984 if(((unsigned)s->qscale) > 51){
5985 if(s->qscale<0) s->qscale+= 52;
5986 else s->qscale-= 52;
5988 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5989 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5991 if( IS_INTRA16x16( mb_type ) ) {
5993 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5994 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5997 qmul = h->dequant4_coeff[0][s->qscale];
5998 for( i = 0; i < 16; i++ ) {
5999 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6000 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6003 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6007 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6008 if( cbp & (1<<i8x8) ) {
6009 if( IS_8x8DCT(mb_type) ) {
6010 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6011 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6013 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6014 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6015 const int index = 4*i8x8 + i4x4;
6016 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6018 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6019 //STOP_TIMER("decode_residual")
6023 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6024 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6031 for( c = 0; c < 2; c++ ) {
6032 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6033 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6039 for( c = 0; c < 2; c++ ) {
6040 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6041 for( i = 0; i < 4; i++ ) {
6042 const int index = 16 + 4 * c + i;
6043 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6044 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6048 uint8_t * const nnz= &h->non_zero_count_cache[0];
6049 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6050 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6053 uint8_t * const nnz= &h->non_zero_count_cache[0];
6054 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6055 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6056 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6057 h->last_qscale_diff = 0;
6060 s->current_picture.qscale_table[mb_xy]= s->qscale;
6061 write_back_non_zero_count(h);
6064 h->ref_count[0] >>= 1;
6065 h->ref_count[1] >>= 1;
6072 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6074 const int index_a = qp + h->slice_alpha_c0_offset;
6075 const int alpha = (alpha_table+52)[index_a];
6076 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6081 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6082 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6084 /* 16px edge length, because bS=4 is triggered by being at
6085 * the edge of an intra MB, so all 4 bS are the same */
6086 for( d = 0; d < 16; d++ ) {
6087 const int p0 = pix[-1];
6088 const int p1 = pix[-2];
6089 const int p2 = pix[-3];
6091 const int q0 = pix[0];
6092 const int q1 = pix[1];
6093 const int q2 = pix[2];
6095 if( FFABS( p0 - q0 ) < alpha &&
6096 FFABS( p1 - p0 ) < beta &&
6097 FFABS( q1 - q0 ) < beta ) {
6099 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6100 if( FFABS( p2 - p0 ) < beta)
6102 const int p3 = pix[-4];
6104 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6105 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6106 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6109 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6111 if( FFABS( q2 - q0 ) < beta)
6113 const int q3 = pix[3];
6115 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6116 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6117 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6120 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6124 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6125 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6127 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6133 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6135 const int index_a = qp + h->slice_alpha_c0_offset;
6136 const int alpha = (alpha_table+52)[index_a];
6137 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6142 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6143 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6145 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6149 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6151 for( i = 0; i < 16; i++, pix += stride) {
6157 int bS_index = (i >> 1);
6160 bS_index |= (i & 1);
6163 if( bS[bS_index] == 0 ) {
6167 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6168 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6169 alpha = (alpha_table+52)[index_a];
6170 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6172 if( bS[bS_index] < 4 ) {
6173 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6174 const int p0 = pix[-1];
6175 const int p1 = pix[-2];
6176 const int p2 = pix[-3];
6177 const int q0 = pix[0];
6178 const int q1 = pix[1];
6179 const int q2 = pix[2];
6181 if( FFABS( p0 - q0 ) < alpha &&
6182 FFABS( p1 - p0 ) < beta &&
6183 FFABS( q1 - q0 ) < beta ) {
6187 if( FFABS( p2 - p0 ) < beta ) {
6188 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6191 if( FFABS( q2 - q0 ) < beta ) {
6192 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6196 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6197 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6198 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6199 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6202 const int p0 = pix[-1];
6203 const int p1 = pix[-2];
6204 const int p2 = pix[-3];
6206 const int q0 = pix[0];
6207 const int q1 = pix[1];
6208 const int q2 = pix[2];
6210 if( FFABS( p0 - q0 ) < alpha &&
6211 FFABS( p1 - p0 ) < beta &&
6212 FFABS( q1 - q0 ) < beta ) {
6214 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6215 if( FFABS( p2 - p0 ) < beta)
6217 const int p3 = pix[-4];
6219 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6220 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6221 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6224 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6226 if( FFABS( q2 - q0 ) < beta)
6228 const int q3 = pix[3];
6230 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6231 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6232 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6235 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6239 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6240 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6242 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6247 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6249 for( i = 0; i < 8; i++, pix += stride) {
6257 if( bS[bS_index] == 0 ) {
6261 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6262 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6263 alpha = (alpha_table+52)[index_a];
6264 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6266 if( bS[bS_index] < 4 ) {
6267 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6268 const int p0 = pix[-1];
6269 const int p1 = pix[-2];
6270 const int q0 = pix[0];
6271 const int q1 = pix[1];
6273 if( FFABS( p0 - q0 ) < alpha &&
6274 FFABS( p1 - p0 ) < beta &&
6275 FFABS( q1 - q0 ) < beta ) {
6276 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6278 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6279 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6280 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6283 const int p0 = pix[-1];
6284 const int p1 = pix[-2];
6285 const int q0 = pix[0];
6286 const int q1 = pix[1];
6288 if( FFABS( p0 - q0 ) < alpha &&
6289 FFABS( p1 - p0 ) < beta &&
6290 FFABS( q1 - q0 ) < beta ) {
6292 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6293 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6294 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6300 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6302 const int index_a = qp + h->slice_alpha_c0_offset;
6303 const int alpha = (alpha_table+52)[index_a];
6304 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6305 const int pix_next = stride;
6310 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6311 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6313 /* 16px edge length, see filter_mb_edgev */
6314 for( d = 0; d < 16; d++ ) {
6315 const int p0 = pix[-1*pix_next];
6316 const int p1 = pix[-2*pix_next];
6317 const int p2 = pix[-3*pix_next];
6318 const int q0 = pix[0];
6319 const int q1 = pix[1*pix_next];
6320 const int q2 = pix[2*pix_next];
6322 if( FFABS( p0 - q0 ) < alpha &&
6323 FFABS( p1 - p0 ) < beta &&
6324 FFABS( q1 - q0 ) < beta ) {
6326 const int p3 = pix[-4*pix_next];
6327 const int q3 = pix[ 3*pix_next];
6329 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6330 if( FFABS( p2 - p0 ) < beta) {
6332 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6333 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6334 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6337 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6339 if( FFABS( q2 - q0 ) < beta) {
6341 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6342 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6343 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6346 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6350 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6351 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6353 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6360 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6362 const int index_a = qp + h->slice_alpha_c0_offset;
6363 const int alpha = (alpha_table+52)[index_a];
6364 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6369 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6370 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6372 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6376 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6377 MpegEncContext * const s = &h->s;
6378 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6380 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6382 mb_xy = mb_x + mb_y*s->mb_stride;
6384 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6385 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6386 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6387 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6390 assert(!FRAME_MBAFF);
6392 mb_type = s->current_picture.mb_type[mb_xy];
6393 qp = s->current_picture.qscale_table[mb_xy];
6394 qp0 = s->current_picture.qscale_table[mb_xy-1];
6395 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6396 qpc = get_chroma_qp( h, 0, qp );
6397 qpc0 = get_chroma_qp( h, 0, qp0 );
6398 qpc1 = get_chroma_qp( h, 0, qp1 );
6399 qp0 = (qp + qp0 + 1) >> 1;
6400 qp1 = (qp + qp1 + 1) >> 1;
6401 qpc0 = (qpc + qpc0 + 1) >> 1;
6402 qpc1 = (qpc + qpc1 + 1) >> 1;
6403 qp_thresh = 15 - h->slice_alpha_c0_offset;
6404 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6405 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6408 if( IS_INTRA(mb_type) ) {
6409 int16_t bS4[4] = {4,4,4,4};
6410 int16_t bS3[4] = {3,3,3,3};
6411 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6412 if( IS_8x8DCT(mb_type) ) {
6413 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6414 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6415 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6416 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6418 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6419 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6420 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6421 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6422 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6423 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6424 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6425 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6427 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6428 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6429 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6430 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6431 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6432 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6433 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6434 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6437 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6438 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6440 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6442 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6444 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6445 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6446 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6447 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6449 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6450 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6451 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6452 (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
6454 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6455 bSv[0][0] = 0x0004000400040004ULL;
6456 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6457 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6459 #define FILTER(hv,dir,edge)\
6460 if(bSv[dir][edge]) {\
6461 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6463 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6464 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6470 } else if( IS_8x8DCT(mb_type) ) {
6489 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6490 MpegEncContext * const s = &h->s;
6491 const int mb_xy= mb_x + mb_y*s->mb_stride;
6492 const int mb_type = s->current_picture.mb_type[mb_xy];
6493 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6494 int first_vertical_edge_done = 0;
6496 /* FIXME: A given frame may occupy more than one position in
6497 * the reference list. So ref2frm should be populated with
6498 * frame numbers, not indices. */
6499 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6500 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6502 //for sufficiently low qp, filtering wouldn't do anything
6503 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6505 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6506 int qp = s->current_picture.qscale_table[mb_xy];
6508 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6509 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6515 // left mb is in picture
6516 && h->slice_table[mb_xy-1] != 255
6517 // and current and left pair do not have the same interlaced type
6518 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6519 // and left mb is in the same slice if deblocking_filter == 2
6520 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6521 /* First vertical edge is different in MBAFF frames
6522 * There are 8 different bS to compute and 2 different Qp
6524 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6525 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6530 int mb_qp, mbn0_qp, mbn1_qp;
6532 first_vertical_edge_done = 1;
6534 if( IS_INTRA(mb_type) )
6535 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6537 for( i = 0; i < 8; i++ ) {
6538 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6540 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6542 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6543 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6544 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6551 mb_qp = s->current_picture.qscale_table[mb_xy];
6552 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6553 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6554 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6555 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6556 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6557 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6558 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6559 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6560 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6561 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6562 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6563 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6566 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6567 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6568 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6569 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6570 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6572 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6573 for( dir = 0; dir < 2; dir++ )
6576 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6577 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6578 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6580 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6581 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6582 // how often to recheck mv-based bS when iterating between edges
6583 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6584 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6585 // how often to recheck mv-based bS when iterating along each edge
6586 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6588 if (first_vertical_edge_done) {
6590 first_vertical_edge_done = 0;
6593 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6596 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6597 && !IS_INTERLACED(mb_type)
6598 && IS_INTERLACED(mbm_type)
6600 // This is a special case in the norm where the filtering must
6601 // be done twice (one each of the field) even if we are in a
6602 // frame macroblock.
6604 static const int nnz_idx[4] = {4,5,6,3};
6605 unsigned int tmp_linesize = 2 * linesize;
6606 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6607 int mbn_xy = mb_xy - 2 * s->mb_stride;
6612 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6613 if( IS_INTRA(mb_type) ||
6614 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6615 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6617 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6618 for( i = 0; i < 4; i++ ) {
6619 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6620 mbn_nnz[nnz_idx[i]] != 0 )
6626 // Do not use s->qscale as luma quantizer because it has not the same
6627 // value in IPCM macroblocks.
6628 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6629 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6630 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6631 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6632 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6633 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6634 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6635 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6642 for( edge = start; edge < edges; edge++ ) {
6643 /* mbn_xy: neighbor macroblock */
6644 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6645 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6649 if( (edge&1) && IS_8x8DCT(mb_type) )
6652 if( IS_INTRA(mb_type) ||
6653 IS_INTRA(mbn_type) ) {
6656 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6657 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6666 bS[0] = bS[1] = bS[2] = bS[3] = value;
6671 if( edge & mask_edge ) {
6672 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6675 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6676 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6679 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6680 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6681 int bn_idx= b_idx - (dir ? 8:1);
6683 for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6684 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6685 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6686 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6688 bS[0] = bS[1] = bS[2] = bS[3] = v;
6694 for( i = 0; i < 4; i++ ) {
6695 int x = dir == 0 ? edge : i;
6696 int y = dir == 0 ? i : edge;
6697 int b_idx= 8 + 4 + x + 8*y;
6698 int bn_idx= b_idx - (dir ? 8:1);
6700 if( h->non_zero_count_cache[b_idx] != 0 ||
6701 h->non_zero_count_cache[bn_idx] != 0 ) {
6707 for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6708 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6709 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6710 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6718 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6723 // Do not use s->qscale as luma quantizer because it has not the same
6724 // value in IPCM macroblocks.
6725 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6726 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6727 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6728 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6730 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6731 if( (edge&1) == 0 ) {
6732 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6733 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6734 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6735 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6738 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6739 if( (edge&1) == 0 ) {
6740 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6741 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6742 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6743 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6750 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6751 MpegEncContext * const s = &h->s;
6752 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6756 if( h->pps.cabac ) {
6760 align_get_bits( &s->gb );
6763 ff_init_cabac_states( &h->cabac);
6764 ff_init_cabac_decoder( &h->cabac,
6765 s->gb.buffer + get_bits_count(&s->gb)/8,
6766 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6767 /* calculate pre-state */
6768 for( i= 0; i < 460; i++ ) {
6770 if( h->slice_type == FF_I_TYPE )
6771 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6773 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6776 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6778 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6783 int ret = decode_mb_cabac(h);
6785 //STOP_TIMER("decode_mb_cabac")
6787 if(ret>=0) hl_decode_mb(h);
6789 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6792 if(ret>=0) ret = decode_mb_cabac(h);
6794 if(ret>=0) hl_decode_mb(h);
6797 eos = get_cabac_terminate( &h->cabac );
6799 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6800 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6801 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6805 if( ++s->mb_x >= s->mb_width ) {
6807 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6809 if(FIELD_OR_MBAFF_PICTURE) {
6814 if( eos || s->mb_y >= s->mb_height ) {
6815 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6816 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6823 int ret = decode_mb_cavlc(h);
6825 if(ret>=0) hl_decode_mb(h);
6827 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6829 ret = decode_mb_cavlc(h);
6831 if(ret>=0) hl_decode_mb(h);
6836 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6837 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6842 if(++s->mb_x >= s->mb_width){
6844 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6846 if(FIELD_OR_MBAFF_PICTURE) {
6849 if(s->mb_y >= s->mb_height){
6850 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6852 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6853 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6857 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6864 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6865 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6866 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6867 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6871 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6880 for(;s->mb_y < s->mb_height; s->mb_y++){
6881 for(;s->mb_x < s->mb_width; s->mb_x++){
6882 int ret= decode_mb(h);
6887 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6888 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6893 if(++s->mb_x >= s->mb_width){
6895 if(++s->mb_y >= s->mb_height){
6896 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6897 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6901 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6908 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6909 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6910 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6914 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6921 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6924 return -1; //not reached
6927 static int decode_unregistered_user_data(H264Context *h, int size){
6928 MpegEncContext * const s = &h->s;
6929 uint8_t user_data[16+256];
6935 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6936 user_data[i]= get_bits(&s->gb, 8);
6940 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6941 if(e==1 && build>=0)
6942 h->x264_build= build;
6944 if(s->avctx->debug & FF_DEBUG_BUGS)
6945 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6948 skip_bits(&s->gb, 8);
6953 static int decode_sei(H264Context *h){
6954 MpegEncContext * const s = &h->s;
6956 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6961 type+= show_bits(&s->gb, 8);
6962 }while(get_bits(&s->gb, 8) == 255);
6966 size+= show_bits(&s->gb, 8);
6967 }while(get_bits(&s->gb, 8) == 255);
6971 if(decode_unregistered_user_data(h, size) < 0)
6975 skip_bits(&s->gb, 8*size);
6978 //FIXME check bits here
6979 align_get_bits(&s->gb);
6985 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6986 MpegEncContext * const s = &h->s;
6988 cpb_count = get_ue_golomb(&s->gb) + 1;
6989 get_bits(&s->gb, 4); /* bit_rate_scale */
6990 get_bits(&s->gb, 4); /* cpb_size_scale */
6991 for(i=0; i<cpb_count; i++){
6992 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6993 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6994 get_bits1(&s->gb); /* cbr_flag */
6996 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6997 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6998 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6999 get_bits(&s->gb, 5); /* time_offset_length */
7002 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7003 MpegEncContext * const s = &h->s;
7004 int aspect_ratio_info_present_flag;
7005 unsigned int aspect_ratio_idc;
7006 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7008 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7010 if( aspect_ratio_info_present_flag ) {
7011 aspect_ratio_idc= get_bits(&s->gb, 8);
7012 if( aspect_ratio_idc == EXTENDED_SAR ) {
7013 sps->sar.num= get_bits(&s->gb, 16);
7014 sps->sar.den= get_bits(&s->gb, 16);
7015 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7016 sps->sar= pixel_aspect[aspect_ratio_idc];
7018 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7025 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7027 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7028 get_bits1(&s->gb); /* overscan_appropriate_flag */
7031 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7032 get_bits(&s->gb, 3); /* video_format */
7033 get_bits1(&s->gb); /* video_full_range_flag */
7034 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7035 get_bits(&s->gb, 8); /* colour_primaries */
7036 get_bits(&s->gb, 8); /* transfer_characteristics */
7037 get_bits(&s->gb, 8); /* matrix_coefficients */
7041 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7042 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7043 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7046 sps->timing_info_present_flag = get_bits1(&s->gb);
7047 if(sps->timing_info_present_flag){
7048 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7049 sps->time_scale = get_bits_long(&s->gb, 32);
7050 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7053 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7054 if(nal_hrd_parameters_present_flag)
7055 decode_hrd_parameters(h, sps);
7056 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7057 if(vcl_hrd_parameters_present_flag)
7058 decode_hrd_parameters(h, sps);
7059 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7060 get_bits1(&s->gb); /* low_delay_hrd_flag */
7061 get_bits1(&s->gb); /* pic_struct_present_flag */
7063 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7064 if(sps->bitstream_restriction_flag){
7065 unsigned int num_reorder_frames;
7066 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7067 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7068 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7069 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7070 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7071 num_reorder_frames= get_ue_golomb(&s->gb);
7072 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7074 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7075 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7079 sps->num_reorder_frames= num_reorder_frames;
7085 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7086 const uint8_t *jvt_list, const uint8_t *fallback_list){
7087 MpegEncContext * const s = &h->s;
7088 int i, last = 8, next = 8;
7089 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7090 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7091 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7093 for(i=0;i<size;i++){
7095 next = (last + get_se_golomb(&s->gb)) & 0xff;
7096 if(!i && !next){ /* matrix not written, we use the preset one */
7097 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7100 last = factors[scan[i]] = next ? next : last;
7104 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7105 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7106 MpegEncContext * const s = &h->s;
7107 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7108 const uint8_t *fallback[4] = {
7109 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7110 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7111 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7112 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7114 if(get_bits1(&s->gb)){
7115 sps->scaling_matrix_present |= is_sps;
7116 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7117 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7118 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7119 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7120 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7121 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7122 if(is_sps || pps->transform_8x8_mode){
7123 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7124 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7126 } else if(fallback_sps) {
7127 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7128 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7133 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7136 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7137 const size_t size, const char *name)
7140 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7145 vec[id] = av_mallocz(size);
7147 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7152 static inline int decode_seq_parameter_set(H264Context *h){
7153 MpegEncContext * const s = &h->s;
7154 int profile_idc, level_idc;
7155 unsigned int sps_id, tmp, mb_width, mb_height;
7159 profile_idc= get_bits(&s->gb, 8);
7160 get_bits1(&s->gb); //constraint_set0_flag
7161 get_bits1(&s->gb); //constraint_set1_flag
7162 get_bits1(&s->gb); //constraint_set2_flag
7163 get_bits1(&s->gb); //constraint_set3_flag
7164 get_bits(&s->gb, 4); // reserved
7165 level_idc= get_bits(&s->gb, 8);
7166 sps_id= get_ue_golomb(&s->gb);
7168 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7172 sps->profile_idc= profile_idc;
7173 sps->level_idc= level_idc;
7175 if(sps->profile_idc >= 100){ //high profile
7176 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7177 get_bits1(&s->gb); //residual_color_transform_flag
7178 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7179 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7180 sps->transform_bypass = get_bits1(&s->gb);
7181 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7183 sps->scaling_matrix_present = 0;
7185 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7186 sps->poc_type= get_ue_golomb(&s->gb);
7188 if(sps->poc_type == 0){ //FIXME #define
7189 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7190 } else if(sps->poc_type == 1){//FIXME #define
7191 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7192 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7193 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7194 tmp= get_ue_golomb(&s->gb);
7196 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7197 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7200 sps->poc_cycle_length= tmp;
7202 for(i=0; i<sps->poc_cycle_length; i++)
7203 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7204 }else if(sps->poc_type != 2){
7205 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7209 tmp= get_ue_golomb(&s->gb);
7210 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7211 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7214 sps->ref_frame_count= tmp;
7215 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7216 mb_width= get_ue_golomb(&s->gb) + 1;
7217 mb_height= get_ue_golomb(&s->gb) + 1;
7218 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7219 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7220 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7223 sps->mb_width = mb_width;
7224 sps->mb_height= mb_height;
7226 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7227 if(!sps->frame_mbs_only_flag)
7228 sps->mb_aff= get_bits1(&s->gb);
7232 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7234 #ifndef ALLOW_INTERLACE
7236 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7238 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7239 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7241 sps->crop= get_bits1(&s->gb);
7243 sps->crop_left = get_ue_golomb(&s->gb);
7244 sps->crop_right = get_ue_golomb(&s->gb);
7245 sps->crop_top = get_ue_golomb(&s->gb);
7246 sps->crop_bottom= get_ue_golomb(&s->gb);
7247 if(sps->crop_left || sps->crop_top){
7248 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7250 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7251 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7257 sps->crop_bottom= 0;
7260 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7261 if( sps->vui_parameters_present_flag )
7262 decode_vui_parameters(h, sps);
7264 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7265 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7266 sps_id, sps->profile_idc, sps->level_idc,
7268 sps->ref_frame_count,
7269 sps->mb_width, sps->mb_height,
7270 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7271 sps->direct_8x8_inference_flag ? "8B8" : "",
7272 sps->crop_left, sps->crop_right,
7273 sps->crop_top, sps->crop_bottom,
7274 sps->vui_parameters_present_flag ? "VUI" : ""
7281 build_qp_table(PPS *pps, int t, int index)
7284 for(i = 0; i < 255; i++)
7285 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7288 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7289 MpegEncContext * const s = &h->s;
7290 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7293 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7297 tmp= get_ue_golomb(&s->gb);
7298 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7299 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7304 pps->cabac= get_bits1(&s->gb);
7305 pps->pic_order_present= get_bits1(&s->gb);
7306 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7307 if(pps->slice_group_count > 1 ){
7308 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7309 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7310 switch(pps->mb_slice_group_map_type){
7313 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7314 | run_length[ i ] |1 |ue(v) |
7319 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7321 | top_left_mb[ i ] |1 |ue(v) |
7322 | bottom_right_mb[ i ] |1 |ue(v) |
7330 | slice_group_change_direction_flag |1 |u(1) |
7331 | slice_group_change_rate_minus1 |1 |ue(v) |
7336 | slice_group_id_cnt_minus1 |1 |ue(v) |
7337 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7339 | slice_group_id[ i ] |1 |u(v) |
7344 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7345 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7346 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7347 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7348 pps->ref_count[0]= pps->ref_count[1]= 1;
7352 pps->weighted_pred= get_bits1(&s->gb);
7353 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7354 pps->init_qp= get_se_golomb(&s->gb) + 26;
7355 pps->init_qs= get_se_golomb(&s->gb) + 26;
7356 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7357 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7358 pps->constrained_intra_pred= get_bits1(&s->gb);
7359 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7361 pps->transform_8x8_mode= 0;
7362 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7363 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7364 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7366 if(get_bits_count(&s->gb) < bit_length){
7367 pps->transform_8x8_mode= get_bits1(&s->gb);
7368 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7369 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7371 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7374 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7375 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7376 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7377 h->pps.chroma_qp_diff= 1;
7379 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7381 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7382 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7383 pps_id, pps->sps_id,
7384 pps->cabac ? "CABAC" : "CAVLC",
7385 pps->slice_group_count,
7386 pps->ref_count[0], pps->ref_count[1],
7387 pps->weighted_pred ? "weighted" : "",
7388 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7389 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7390 pps->constrained_intra_pred ? "CONSTR" : "",
7391 pps->redundant_pic_cnt_present ? "REDU" : "",
7392 pps->transform_8x8_mode ? "8x8DCT" : ""
7400 * Call decode_slice() for each context.
7402 * @param h h264 master context
7403 * @param context_count number of contexts to execute
7405 static void execute_decode_slices(H264Context *h, int context_count){
7406 MpegEncContext * const s = &h->s;
7407 AVCodecContext * const avctx= s->avctx;
7411 if(context_count == 1) {
7412 decode_slice(avctx, h);
7414 for(i = 1; i < context_count; i++) {
7415 hx = h->thread_context[i];
7416 hx->s.error_resilience = avctx->error_resilience;
7417 hx->s.error_count = 0;
7420 avctx->execute(avctx, (void *)decode_slice,
7421 (void **)h->thread_context, NULL, context_count);
7423 /* pull back stuff from slices to master context */
7424 hx = h->thread_context[context_count - 1];
7425 s->mb_x = hx->s.mb_x;
7426 s->mb_y = hx->s.mb_y;
7427 s->dropable = hx->s.dropable;
7428 s->picture_structure = hx->s.picture_structure;
7429 for(i = 1; i < context_count; i++)
7430 h->s.error_count += h->thread_context[i]->s.error_count;
7435 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7436 MpegEncContext * const s = &h->s;
7437 AVCodecContext * const avctx= s->avctx;
7439 H264Context *hx; ///< thread context
7440 int context_count = 0;
7442 h->max_contexts = avctx->thread_count;
7445 for(i=0; i<50; i++){
7446 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7449 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7450 h->current_slice = 0;
7451 if (!s->first_field)
7452 s->current_picture_ptr= NULL;
7464 if(buf_index >= buf_size) break;
7466 for(i = 0; i < h->nal_length_size; i++)
7467 nalsize = (nalsize << 8) | buf[buf_index++];
7468 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7473 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7478 // start code prefix search
7479 for(; buf_index + 3 < buf_size; buf_index++){
7480 // This should always succeed in the first iteration.
7481 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7485 if(buf_index+3 >= buf_size) break;
7490 hx = h->thread_context[context_count];
7492 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7493 if (ptr==NULL || dst_length < 0){
7496 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7498 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7500 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7501 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7504 if (h->is_avc && (nalsize != consumed)){
7505 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7509 buf_index += consumed;
7511 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7512 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7517 switch(hx->nal_unit_type){
7519 if (h->nal_unit_type != NAL_IDR_SLICE) {
7520 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7523 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7525 init_get_bits(&hx->s.gb, ptr, bit_length);
7527 hx->inter_gb_ptr= &hx->s.gb;
7528 hx->s.data_partitioning = 0;
7530 if((err = decode_slice_header(hx, h)))
7533 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7534 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7535 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7536 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7537 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7538 && avctx->skip_frame < AVDISCARD_ALL)
7542 init_get_bits(&hx->s.gb, ptr, bit_length);
7544 hx->inter_gb_ptr= NULL;
7545 hx->s.data_partitioning = 1;
7547 err = decode_slice_header(hx, h);
7550 init_get_bits(&hx->intra_gb, ptr, bit_length);
7551 hx->intra_gb_ptr= &hx->intra_gb;
7554 init_get_bits(&hx->inter_gb, ptr, bit_length);
7555 hx->inter_gb_ptr= &hx->inter_gb;
7557 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7558 && s->context_initialized
7560 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7561 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7562 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7563 && avctx->skip_frame < AVDISCARD_ALL)
7567 init_get_bits(&s->gb, ptr, bit_length);
7571 init_get_bits(&s->gb, ptr, bit_length);
7572 decode_seq_parameter_set(h);
7574 if(s->flags& CODEC_FLAG_LOW_DELAY)
7577 if(avctx->has_b_frames < 2)
7578 avctx->has_b_frames= !s->low_delay;
7581 init_get_bits(&s->gb, ptr, bit_length);
7583 decode_picture_parameter_set(h, bit_length);
7587 case NAL_END_SEQUENCE:
7588 case NAL_END_STREAM:
7589 case NAL_FILLER_DATA:
7591 case NAL_AUXILIARY_SLICE:
7594 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7597 if(context_count == h->max_contexts) {
7598 execute_decode_slices(h, context_count);
7603 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7605 /* Slice could not be decoded in parallel mode, copy down
7606 * NAL unit stuff to context 0 and restart. Note that
7607 * rbsp_buffer is not transfered, but since we no longer
7608 * run in parallel mode this should not be an issue. */
7609 h->nal_unit_type = hx->nal_unit_type;
7610 h->nal_ref_idc = hx->nal_ref_idc;
7616 execute_decode_slices(h, context_count);
7621 * returns the number of bytes consumed for building the current frame
7623 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7624 if(s->flags&CODEC_FLAG_TRUNCATED){
7625 pos -= s->parse_context.last_index;
7626 if(pos<0) pos=0; // FIXME remove (unneeded?)
7630 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7631 if(pos+10>buf_size) pos=buf_size; // oops ;)
7637 static int decode_frame(AVCodecContext *avctx,
7638 void *data, int *data_size,
7639 const uint8_t *buf, int buf_size)
7641 H264Context *h = avctx->priv_data;
7642 MpegEncContext *s = &h->s;
7643 AVFrame *pict = data;
7646 s->flags= avctx->flags;
7647 s->flags2= avctx->flags2;
7649 /* no supplementary picture */
7650 if (buf_size == 0) {
7654 //FIXME factorize this with the output code below
7655 out = h->delayed_pic[0];
7657 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7658 if(h->delayed_pic[i]->poc < out->poc){
7659 out = h->delayed_pic[i];
7663 for(i=out_idx; h->delayed_pic[i]; i++)
7664 h->delayed_pic[i] = h->delayed_pic[i+1];
7667 *data_size = sizeof(AVFrame);
7668 *pict= *(AVFrame*)out;
7674 if(s->flags&CODEC_FLAG_TRUNCATED){
7675 int next= ff_h264_find_frame_end(h, buf, buf_size);
7677 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7679 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7682 if(h->is_avc && !h->got_avcC) {
7683 int i, cnt, nalsize;
7684 unsigned char *p = avctx->extradata;
7685 if(avctx->extradata_size < 7) {
7686 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7690 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7693 /* sps and pps in the avcC always have length coded with 2 bytes,
7694 so put a fake nal_length_size = 2 while parsing them */
7695 h->nal_length_size = 2;
7696 // Decode sps from avcC
7697 cnt = *(p+5) & 0x1f; // Number of sps
7699 for (i = 0; i < cnt; i++) {
7700 nalsize = AV_RB16(p) + 2;
7701 if(decode_nal_units(h, p, nalsize) < 0) {
7702 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7707 // Decode pps from avcC
7708 cnt = *(p++); // Number of pps
7709 for (i = 0; i < cnt; i++) {
7710 nalsize = AV_RB16(p) + 2;
7711 if(decode_nal_units(h, p, nalsize) != nalsize) {
7712 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7717 // Now store right nal length size, that will be use to parse all other nals
7718 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7719 // Do not reparse avcC
7723 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7724 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7728 buf_index=decode_nal_units(h, buf, buf_size);
7732 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7733 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7734 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7738 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7739 Picture *out = s->current_picture_ptr;
7740 Picture *cur = s->current_picture_ptr;
7741 Picture *prev = h->delayed_output_pic;
7742 int i, pics, cross_idr, out_of_order, out_idx;
7746 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7747 s->current_picture_ptr->pict_type= s->pict_type;
7749 h->prev_frame_num_offset= h->frame_num_offset;
7750 h->prev_frame_num= h->frame_num;
7752 h->prev_poc_msb= h->poc_msb;
7753 h->prev_poc_lsb= h->poc_lsb;
7754 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7758 * FIXME: Error handling code does not seem to support interlaced
7759 * when slices span multiple rows
7760 * The ff_er_add_slice calls don't work right for bottom
7761 * fields; they cause massive erroneous error concealing
7762 * Error marking covers both fields (top and bottom).
7763 * This causes a mismatched s->error_count
7764 * and a bad error table. Further, the error count goes to
7765 * INT_MAX when called for bottom field, because mb_y is
7766 * past end by one (callers fault) and resync_mb_y != 0
7767 * causes problems for the first MB line, too.
7774 if (s->first_field) {
7775 /* Wait for second field. */
7779 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7780 /* Derive top_field_first from field pocs. */
7781 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7783 //FIXME do something with unavailable reference frames
7785 #if 0 //decode order
7786 *data_size = sizeof(AVFrame);
7788 /* Sort B-frames into display order */
7790 if(h->sps.bitstream_restriction_flag
7791 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7792 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7797 while(h->delayed_pic[pics]) pics++;
7799 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7801 h->delayed_pic[pics++] = cur;
7802 if(cur->reference == 0)
7803 cur->reference = DELAYED_PIC_REF;
7806 for(i=0; h->delayed_pic[i]; i++)
7807 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7810 out = h->delayed_pic[0];
7812 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7813 if(h->delayed_pic[i]->poc < out->poc){
7814 out = h->delayed_pic[i];
7818 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7819 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7821 else if(prev && pics <= s->avctx->has_b_frames)
7823 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7825 ((!cross_idr && prev && out->poc > prev->poc + 2)
7826 || cur->pict_type == FF_B_TYPE)))
7829 s->avctx->has_b_frames++;
7832 else if(out_of_order)
7835 if(out_of_order || pics > s->avctx->has_b_frames){
7836 for(i=out_idx; h->delayed_pic[i]; i++)
7837 h->delayed_pic[i] = h->delayed_pic[i+1];
7843 *data_size = sizeof(AVFrame);
7844 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7845 prev->reference = 0;
7846 h->delayed_output_pic = out;
7850 *pict= *(AVFrame*)out;
7852 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7856 assert(pict->data[0] || !*data_size);
7857 ff_print_debug_info(s, pict);
7858 //printf("out %d\n", (int)pict->data[0]);
7861 /* Return the Picture timestamp as the frame number */
7862 /* we subtract 1 because it is added on utils.c */
7863 avctx->frame_number = s->picture_number - 1;
7865 return get_consumed_bytes(s, buf_index, buf_size);
7868 static inline void fill_mb_avail(H264Context *h){
7869 MpegEncContext * const s = &h->s;
7870 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7873 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7874 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7875 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7881 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7882 h->mb_avail[4]= 1; //FIXME move out
7883 h->mb_avail[5]= 0; //FIXME move out
7891 #define SIZE (COUNT*40)
7897 // int int_temp[10000];
7899 AVCodecContext avctx;
7901 dsputil_init(&dsp, &avctx);
7903 init_put_bits(&pb, temp, SIZE);
7904 printf("testing unsigned exp golomb\n");
7905 for(i=0; i<COUNT; i++){
7907 set_ue_golomb(&pb, i);
7908 STOP_TIMER("set_ue_golomb");
7910 flush_put_bits(&pb);
7912 init_get_bits(&gb, temp, 8*SIZE);
7913 for(i=0; i<COUNT; i++){
7916 s= show_bits(&gb, 24);
7919 j= get_ue_golomb(&gb);
7921 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7924 STOP_TIMER("get_ue_golomb");
7928 init_put_bits(&pb, temp, SIZE);
7929 printf("testing signed exp golomb\n");
7930 for(i=0; i<COUNT; i++){
7932 set_se_golomb(&pb, i - COUNT/2);
7933 STOP_TIMER("set_se_golomb");
7935 flush_put_bits(&pb);
7937 init_get_bits(&gb, temp, 8*SIZE);
7938 for(i=0; i<COUNT; i++){
7941 s= show_bits(&gb, 24);
7944 j= get_se_golomb(&gb);
7945 if(j != i - COUNT/2){
7946 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7949 STOP_TIMER("get_se_golomb");
7953 printf("testing 4x4 (I)DCT\n");
7956 uint8_t src[16], ref[16];
7957 uint64_t error= 0, max_error=0;
7959 for(i=0; i<COUNT; i++){
7961 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7962 for(j=0; j<16; j++){
7963 ref[j]= random()%255;
7964 src[j]= random()%255;
7967 h264_diff_dct_c(block, src, ref, 4);
7970 for(j=0; j<16; j++){
7971 // printf("%d ", block[j]);
7972 block[j]= block[j]*4;
7973 if(j&1) block[j]= (block[j]*4 + 2)/5;
7974 if(j&4) block[j]= (block[j]*4 + 2)/5;
7978 s->dsp.h264_idct_add(ref, block, 4);
7979 /* for(j=0; j<16; j++){
7980 printf("%d ", ref[j]);
7984 for(j=0; j<16; j++){
7985 int diff= FFABS(src[j] - ref[j]);
7988 max_error= FFMAX(max_error, diff);
7991 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7992 printf("testing quantizer\n");
7993 for(qp=0; qp<52; qp++){
7995 src1_block[i]= src2_block[i]= random()%255;
7998 printf("Testing NAL layer\n");
8000 uint8_t bitstream[COUNT];
8001 uint8_t nal[COUNT*2];
8003 memset(&h, 0, sizeof(H264Context));
8005 for(i=0; i<COUNT; i++){
8013 for(j=0; j<COUNT; j++){
8014 bitstream[j]= (random() % 255) + 1;
8017 for(j=0; j<zeros; j++){
8018 int pos= random() % COUNT;
8019 while(bitstream[pos] == 0){
8028 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8030 printf("encoding failed\n");
8034 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8038 if(out_length != COUNT){
8039 printf("incorrect length %d %d\n", out_length, COUNT);
8043 if(consumed != nal_length){
8044 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8048 if(memcmp(bitstream, out, COUNT)){
8049 printf("mismatch\n");
8055 printf("Testing RBSP\n");
8063 static av_cold int decode_end(AVCodecContext *avctx)
8065 H264Context *h = avctx->priv_data;
8066 MpegEncContext *s = &h->s;
8068 av_freep(&h->rbsp_buffer[0]);
8069 av_freep(&h->rbsp_buffer[1]);
8070 free_tables(h); //FIXME cleanup init stuff perhaps
8073 // memset(h, 0, sizeof(H264Context));
8079 AVCodec h264_decoder = {
8083 sizeof(H264Context),
8088 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,