2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
64 static Picture * remove_long(H264Context *h, int i, int ref_mask);
66 static av_always_inline uint32_t pack16to32(int a, int b){
67 #ifdef WORDS_BIGENDIAN
68 return (b&0xFFFF) + (a<<16);
70 return (a&0xFFFF) + (b<<16);
74 const uint8_t ff_rem6[52]={
75 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
78 const uint8_t ff_div6[52]={
79 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
83 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
84 MpegEncContext * const s = &h->s;
85 const int mb_xy= h->mb_xy;
86 int topleft_xy, top_xy, topright_xy, left_xy[2];
87 int topleft_type, top_type, topright_type, left_type[2];
89 int topleft_partition= -1;
92 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
94 //FIXME deblocking could skip the intra and nnz parts.
95 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
98 /* Wow, what a mess, why didn't they simplify the interlacing & intra
99 * stuff, I can't imagine that these complex rules are worth it. */
101 topleft_xy = top_xy - 1;
102 topright_xy= top_xy + 1;
103 left_xy[1] = left_xy[0] = mb_xy-1;
113 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
114 const int top_pair_xy = pair_xy - s->mb_stride;
115 const int topleft_pair_xy = top_pair_xy - 1;
116 const int topright_pair_xy = top_pair_xy + 1;
117 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
118 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
119 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
120 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
121 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
122 const int bottom = (s->mb_y & 1);
123 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
125 ? !curr_mb_frame_flag // bottom macroblock
126 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
128 top_xy -= s->mb_stride;
131 ? !curr_mb_frame_flag // bottom macroblock
132 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
134 topleft_xy -= s->mb_stride;
135 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
136 topleft_xy += s->mb_stride;
137 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
138 topleft_partition = 0;
141 ? !curr_mb_frame_flag // bottom macroblock
142 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
144 topright_xy -= s->mb_stride;
146 if (left_mb_frame_flag != curr_mb_frame_flag) {
147 left_xy[1] = left_xy[0] = pair_xy - 1;
148 if (curr_mb_frame_flag) {
169 left_xy[1] += s->mb_stride;
182 h->top_mb_xy = top_xy;
183 h->left_mb_xy[0] = left_xy[0];
184 h->left_mb_xy[1] = left_xy[1];
188 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
189 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
190 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
192 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
194 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
196 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
197 for(list=0; list<h->list_count; list++){
198 if(USES_LIST(mb_type,list)){
199 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
200 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
201 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
202 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
208 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
209 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
211 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
212 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
214 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
215 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
220 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
221 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
222 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
223 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
224 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
227 if(IS_INTRA(mb_type)){
228 h->topleft_samples_available=
229 h->top_samples_available=
230 h->left_samples_available= 0xFFFF;
231 h->topright_samples_available= 0xEEEA;
233 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
234 h->topleft_samples_available= 0xB3FF;
235 h->top_samples_available= 0x33FF;
236 h->topright_samples_available= 0x26EA;
239 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
240 h->topleft_samples_available&= 0xDF5F;
241 h->left_samples_available&= 0x5F5F;
245 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
246 h->topleft_samples_available&= 0x7FFF;
248 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
249 h->topright_samples_available&= 0xFBFF;
251 if(IS_INTRA4x4(mb_type)){
252 if(IS_INTRA4x4(top_type)){
253 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
254 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
255 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
256 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
259 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
264 h->intra4x4_pred_mode_cache[4+8*0]=
265 h->intra4x4_pred_mode_cache[5+8*0]=
266 h->intra4x4_pred_mode_cache[6+8*0]=
267 h->intra4x4_pred_mode_cache[7+8*0]= pred;
270 if(IS_INTRA4x4(left_type[i])){
271 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
272 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
275 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
280 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
281 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
296 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
298 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
299 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
300 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
301 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
303 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
304 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
306 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
307 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
310 h->non_zero_count_cache[4+8*0]=
311 h->non_zero_count_cache[5+8*0]=
312 h->non_zero_count_cache[6+8*0]=
313 h->non_zero_count_cache[7+8*0]=
315 h->non_zero_count_cache[1+8*0]=
316 h->non_zero_count_cache[2+8*0]=
318 h->non_zero_count_cache[1+8*3]=
319 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
323 for (i=0; i<2; i++) {
325 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
326 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
327 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
330 h->non_zero_count_cache[3+8*1 + 2*8*i]=
331 h->non_zero_count_cache[3+8*2 + 2*8*i]=
332 h->non_zero_count_cache[0+8*1 + 8*i]=
333 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
340 h->top_cbp = h->cbp_table[top_xy];
341 } else if(IS_INTRA(mb_type)) {
348 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
349 } else if(IS_INTRA(mb_type)) {
355 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
358 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
363 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
365 for(list=0; list<h->list_count; list++){
366 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
367 /*if(!h->mv_cache_clean[list]){
368 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
369 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
370 h->mv_cache_clean[list]= 1;
374 h->mv_cache_clean[list]= 0;
376 if(USES_LIST(top_type, list)){
377 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
378 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
383 h->ref_cache[list][scan8[0] + 0 - 1*8]=
384 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
385 h->ref_cache[list][scan8[0] + 2 - 1*8]=
386 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
392 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
396 int cache_idx = scan8[0] - 1 + i*2*8;
397 if(USES_LIST(left_type[i], list)){
398 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
399 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
400 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
401 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
402 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
403 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
405 *(uint32_t*)h->mv_cache [list][cache_idx ]=
406 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
407 h->ref_cache[list][cache_idx ]=
408 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
412 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
415 if(USES_LIST(topleft_type, list)){
416 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
417 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
418 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
419 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
421 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
422 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
425 if(USES_LIST(topright_type, list)){
426 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
427 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
428 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
429 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
431 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
432 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
435 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
438 h->ref_cache[list][scan8[5 ]+1] =
439 h->ref_cache[list][scan8[7 ]+1] =
440 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
441 h->ref_cache[list][scan8[4 ]] =
442 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
443 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
445 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
446 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
447 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
450 /* XXX beurk, Load mvd */
451 if(USES_LIST(top_type, list)){
452 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
463 if(USES_LIST(left_type[0], list)){
464 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
466 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
469 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
471 if(USES_LIST(left_type[1], list)){
472 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
474 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
477 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
479 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
481 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
482 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
483 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
485 if(h->slice_type_nos == FF_B_TYPE){
486 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
488 if(IS_DIRECT(top_type)){
489 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
490 }else if(IS_8X8(top_type)){
491 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
492 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
493 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
495 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
498 if(IS_DIRECT(left_type[0]))
499 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
500 else if(IS_8X8(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
503 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
505 if(IS_DIRECT(left_type[1]))
506 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
507 else if(IS_8X8(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
510 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
516 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
517 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
522 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
524 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
525 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
527 #define MAP_F2F(idx, mb_type)\
528 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
529 h->ref_cache[list][idx] <<= 1;\
530 h->mv_cache[list][idx][1] /= 2;\
531 h->mvd_cache[list][idx][1] /= 2;\
536 #define MAP_F2F(idx, mb_type)\
537 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
538 h->ref_cache[list][idx] >>= 1;\
539 h->mv_cache[list][idx][1] <<= 1;\
540 h->mvd_cache[list][idx][1] <<= 1;\
550 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
553 static inline void write_back_intra_pred_mode(H264Context *h){
554 const int mb_xy= h->mb_xy;
556 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
557 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
558 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
559 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
560 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
561 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
562 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
566 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
568 static inline int check_intra4x4_pred_mode(H264Context *h){
569 MpegEncContext * const s = &h->s;
570 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
571 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 if(!(h->top_samples_available&0x8000)){
576 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
578 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
581 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
586 if(!(h->left_samples_available&0x8000)){
588 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
590 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
593 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
599 } //FIXME cleanup like next
602 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
604 static inline int check_intra_pred_mode(H264Context *h, int mode){
605 MpegEncContext * const s = &h->s;
606 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
607 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
610 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
614 if(!(h->top_samples_available&0x8000)){
617 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
622 if(!(h->left_samples_available&0x8000)){
625 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 * gets the predicted intra4x4 prediction mode.
636 static inline int pred_intra_mode(H264Context *h, int n){
637 const int index8= scan8[n];
638 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
639 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
640 const int min= FFMIN(left, top);
642 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
644 if(min<0) return DC_PRED;
648 static inline void write_back_non_zero_count(H264Context *h){
649 const int mb_xy= h->mb_xy;
651 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
652 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
653 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
654 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
655 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
656 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
657 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
659 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
660 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
661 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
663 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
664 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
665 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
668 // store all luma nnzs, for deblocking
671 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
672 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static inline void direct_dist_scale_factor(H264Context * const h){
898 const int poc = h->s.current_picture_ptr->poc;
899 const int poc1 = h->ref_list[1][0].poc;
901 for(i=0; i<h->ref_count[0]; i++){
902 int poc0 = h->ref_list[0][i].poc;
903 int td = av_clip(poc1 - poc0, -128, 127);
904 if(td == 0 /* FIXME || pic0 is a long-term ref */){
905 h->dist_scale_factor[i] = 256;
907 int tb = av_clip(poc - poc0, -128, 127);
908 int tx = (16384 + (FFABS(td) >> 1)) / td;
909 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
913 for(i=0; i<h->ref_count[0]; i++){
914 h->dist_scale_factor_field[2*i] =
915 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
919 static inline void direct_ref_list_init(H264Context * const h){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 Picture * const cur = s->current_picture_ptr;
924 if(cur->pict_type == FF_I_TYPE)
925 cur->ref_count[0] = 0;
926 if(cur->pict_type != FF_B_TYPE)
927 cur->ref_count[1] = 0;
928 for(list=0; list<2; list++){
929 cur->ref_count[list] = h->ref_count[list];
930 for(j=0; j<h->ref_count[list]; j++)
931 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
933 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
935 for(list=0; list<2; list++){
936 for(i=0; i<ref1->ref_count[list]; i++){
937 const int poc = ref1->ref_poc[list][i];
938 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
939 for(j=0; j<h->ref_count[list]; j++)
940 if(h->ref_list[list][j].poc == poc){
941 h->map_col_to_list0[list][i] = j;
947 for(list=0; list<2; list++){
948 for(i=0; i<ref1->ref_count[list]; i++){
949 j = h->map_col_to_list0[list][i];
950 h->map_col_to_list0_field[list][2*i] = 2*j;
951 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
957 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
958 MpegEncContext * const s = &h->s;
959 const int mb_xy = h->mb_xy;
960 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
961 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
962 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
963 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
964 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
965 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
966 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
967 const int is_b8x8 = IS_8X8(*mb_type);
968 unsigned int sub_mb_type;
971 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
972 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
973 /* FIXME save sub mb types from previous frames (or derive from MVs)
974 * so we know exactly what block size to use */
975 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
976 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
977 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
978 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
979 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
981 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
982 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
985 *mb_type |= MB_TYPE_DIRECT2;
987 *mb_type |= MB_TYPE_INTERLACED;
989 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
991 if(h->direct_spatial_mv_pred){
996 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
998 /* ref = min(neighbors) */
999 for(list=0; list<2; list++){
1000 int refa = h->ref_cache[list][scan8[0] - 1];
1001 int refb = h->ref_cache[list][scan8[0] - 8];
1002 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1004 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1005 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1010 if(ref[0] < 0 && ref[1] < 0){
1011 ref[0] = ref[1] = 0;
1012 mv[0][0] = mv[0][1] =
1013 mv[1][0] = mv[1][1] = 0;
1015 for(list=0; list<2; list++){
1017 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1019 mv[list][0] = mv[list][1] = 0;
1025 *mb_type &= ~MB_TYPE_L1;
1026 sub_mb_type &= ~MB_TYPE_L1;
1027 }else if(ref[0] < 0){
1029 *mb_type &= ~MB_TYPE_L0;
1030 sub_mb_type &= ~MB_TYPE_L0;
1033 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1034 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1035 int mb_types_col[2];
1036 int b8_stride = h->b8_stride;
1037 int b4_stride = h->b_stride;
1039 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1041 if(IS_INTERLACED(*mb_type)){
1042 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1043 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1045 l1ref0 -= 2*b8_stride;
1046 l1ref1 -= 2*b8_stride;
1047 l1mv0 -= 4*b4_stride;
1048 l1mv1 -= 4*b4_stride;
1053 int cur_poc = s->current_picture_ptr->poc;
1054 int *col_poc = h->ref_list[1]->field_poc;
1055 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1056 int dy = 2*col_parity - (s->mb_y&1);
1058 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1059 l1ref0 += dy*b8_stride;
1060 l1ref1 += dy*b8_stride;
1061 l1mv0 += 2*dy*b4_stride;
1062 l1mv1 += 2*dy*b4_stride;
1066 for(i8=0; i8<4; i8++){
1069 int xy8 = x8+y8*b8_stride;
1070 int xy4 = 3*x8+y8*b4_stride;
1073 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1075 h->sub_mb_type[i8] = sub_mb_type;
1077 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1078 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1079 if(!IS_INTRA(mb_types_col[y8])
1080 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1081 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1083 a= pack16to32(mv[0][0],mv[0][1]);
1085 b= pack16to32(mv[1][0],mv[1][1]);
1087 a= pack16to32(mv[0][0],mv[0][1]);
1088 b= pack16to32(mv[1][0],mv[1][1]);
1090 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1091 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1093 }else if(IS_16X16(*mb_type)){
1096 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1097 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1098 if(!IS_INTRA(mb_type_col)
1099 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1100 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1101 && (h->x264_build>33 || !h->x264_build)))){
1103 a= pack16to32(mv[0][0],mv[0][1]);
1105 b= pack16to32(mv[1][0],mv[1][1]);
1107 a= pack16to32(mv[0][0],mv[0][1]);
1108 b= pack16to32(mv[1][0],mv[1][1]);
1110 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1111 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1113 for(i8=0; i8<4; i8++){
1114 const int x8 = i8&1;
1115 const int y8 = i8>>1;
1117 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1119 h->sub_mb_type[i8] = sub_mb_type;
1121 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1122 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1123 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1124 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1127 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1128 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1129 && (h->x264_build>33 || !h->x264_build)))){
1130 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1131 if(IS_SUB_8X8(sub_mb_type)){
1132 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1133 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1135 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1137 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1140 for(i4=0; i4<4; i4++){
1141 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1142 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1144 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1146 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1152 }else{ /* direct temporal mv pred */
1153 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1154 const int *dist_scale_factor = h->dist_scale_factor;
1157 if(IS_INTERLACED(*mb_type)){
1158 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1159 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1160 dist_scale_factor = h->dist_scale_factor_field;
1162 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1163 /* FIXME assumes direct_8x8_inference == 1 */
1164 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1165 int mb_types_col[2];
1168 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1169 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1170 | (*mb_type & MB_TYPE_INTERLACED);
1171 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1173 if(IS_INTERLACED(*mb_type)){
1174 /* frame to field scaling */
1175 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1176 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1178 l1ref0 -= 2*h->b8_stride;
1179 l1ref1 -= 2*h->b8_stride;
1180 l1mv0 -= 4*h->b_stride;
1181 l1mv1 -= 4*h->b_stride;
1185 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1186 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1188 *mb_type |= MB_TYPE_16x8;
1190 *mb_type |= MB_TYPE_8x8;
1192 /* field to frame scaling */
1193 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1194 * but in MBAFF, top and bottom POC are equal */
1195 int dy = (s->mb_y&1) ? 1 : 2;
1197 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1198 l1ref0 += dy*h->b8_stride;
1199 l1ref1 += dy*h->b8_stride;
1200 l1mv0 += 2*dy*h->b_stride;
1201 l1mv1 += 2*dy*h->b_stride;
1204 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1206 *mb_type |= MB_TYPE_16x16;
1208 *mb_type |= MB_TYPE_8x8;
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_types_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1231 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1252 /* one-to-one mv scaling */
1254 if(IS_16X16(*mb_type)){
1257 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1258 if(IS_INTRA(mb_type_col)){
1261 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1262 : map_col_to_list0[1][l1ref1[0]];
1263 const int scale = dist_scale_factor[ref0];
1264 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1266 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1267 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1269 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1270 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1272 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1273 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1274 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1276 for(i8=0; i8<4; i8++){
1277 const int x8 = i8&1;
1278 const int y8 = i8>>1;
1280 const int16_t (*l1mv)[2]= l1mv0;
1282 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1284 h->sub_mb_type[i8] = sub_mb_type;
1285 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1286 if(IS_INTRA(mb_type_col)){
1287 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1288 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1289 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1293 ref0 = l1ref0[x8 + y8*h->b8_stride];
1295 ref0 = map_col_to_list0[0][ref0];
1297 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1300 scale = dist_scale_factor[ref0];
1302 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1303 if(IS_SUB_8X8(sub_mb_type)){
1304 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1305 int mx = (scale * mv_col[0] + 128) >> 8;
1306 int my = (scale * mv_col[1] + 128) >> 8;
1307 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1308 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1310 for(i4=0; i4<4; i4++){
1311 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1312 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1313 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1314 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1315 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1316 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1323 static inline void write_back_motion(H264Context *h, int mb_type){
1324 MpegEncContext * const s = &h->s;
1325 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1326 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1329 if(!USES_LIST(mb_type, 0))
1330 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1332 for(list=0; list<h->list_count; list++){
1334 if(!USES_LIST(mb_type, list))
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1341 if( h->pps.cabac ) {
1342 if(IS_SKIP(mb_type))
1343 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1346 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1347 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1352 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1353 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1354 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1355 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1356 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1360 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1361 if(IS_8X8(mb_type)){
1362 uint8_t *direct_table = &h->direct_table[b8_xy];
1363 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1364 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1365 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1371 * Decodes a network abstraction layer unit.
1372 * @param consumed is the number of bytes used as input
1373 * @param length is the length of the array
1374 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1375 * @returns decoded bytes, might be src+1 if no escapes
1377 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1382 // src[0]&0x80; //forbidden bit
1383 h->nal_ref_idc= src[0]>>5;
1384 h->nal_unit_type= src[0]&0x1F;
1388 for(i=0; i<length; i++)
1389 printf("%2X ", src[i]);
1391 for(i=0; i+1<length; i+=2){
1392 if(src[i]) continue;
1393 if(i>0 && src[i-1]==0) i--;
1394 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1396 /* startcode, so we must be past the end */
1403 if(i>=length-1){ //no escaped 0
1404 *dst_length= length;
1405 *consumed= length+1; //+1 for the header
1409 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1410 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1411 dst= h->rbsp_buffer[bufidx];
1417 //printf("decoding esc\n");
1420 //remove escapes (very rare 1:2^22)
1421 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1422 if(src[si+2]==3){ //escape
1427 }else //next start code
1431 dst[di++]= src[si++];
1435 *consumed= si + 1;//+1 for the header
1436 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1441 * identifies the exact end of the bitstream
1442 * @return the length of the trailing, or 0 if damaged
1444 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1448 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1458 * IDCT transforms the 16 dc values and dequantizes them.
1459 * @param qp quantization parameter
1461 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1464 int temp[16]; //FIXME check if this is a good idea
1465 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1466 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1468 //memset(block, 64, 2*256);
1471 const int offset= y_offset[i];
1472 const int z0= block[offset+stride*0] + block[offset+stride*4];
1473 const int z1= block[offset+stride*0] - block[offset+stride*4];
1474 const int z2= block[offset+stride*1] - block[offset+stride*5];
1475 const int z3= block[offset+stride*1] + block[offset+stride*5];
1484 const int offset= x_offset[i];
1485 const int z0= temp[4*0+i] + temp[4*2+i];
1486 const int z1= temp[4*0+i] - temp[4*2+i];
1487 const int z2= temp[4*1+i] - temp[4*3+i];
1488 const int z3= temp[4*1+i] + temp[4*3+i];
1490 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1491 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1492 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1493 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1499 * DCT transforms the 16 dc values.
1500 * @param qp quantization parameter ??? FIXME
1502 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1503 // const int qmul= dequant_coeff[qp][0];
1505 int temp[16]; //FIXME check if this is a good idea
1506 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1507 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1510 const int offset= y_offset[i];
1511 const int z0= block[offset+stride*0] + block[offset+stride*4];
1512 const int z1= block[offset+stride*0] - block[offset+stride*4];
1513 const int z2= block[offset+stride*1] - block[offset+stride*5];
1514 const int z3= block[offset+stride*1] + block[offset+stride*5];
1523 const int offset= x_offset[i];
1524 const int z0= temp[4*0+i] + temp[4*2+i];
1525 const int z1= temp[4*0+i] - temp[4*2+i];
1526 const int z2= temp[4*1+i] - temp[4*3+i];
1527 const int z3= temp[4*1+i] + temp[4*3+i];
1529 block[stride*0 +offset]= (z0 + z3)>>1;
1530 block[stride*2 +offset]= (z1 + z2)>>1;
1531 block[stride*8 +offset]= (z1 - z2)>>1;
1532 block[stride*10+offset]= (z0 - z3)>>1;
1540 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1541 const int stride= 16*2;
1542 const int xStride= 16;
1545 a= block[stride*0 + xStride*0];
1546 b= block[stride*0 + xStride*1];
1547 c= block[stride*1 + xStride*0];
1548 d= block[stride*1 + xStride*1];
1555 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1556 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1557 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1558 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1562 static void chroma_dc_dct_c(DCTELEM *block){
1563 const int stride= 16*2;
1564 const int xStride= 16;
1567 a= block[stride*0 + xStride*0];
1568 b= block[stride*0 + xStride*1];
1569 c= block[stride*1 + xStride*0];
1570 d= block[stride*1 + xStride*1];
1577 block[stride*0 + xStride*0]= (a+c);
1578 block[stride*0 + xStride*1]= (e+b);
1579 block[stride*1 + xStride*0]= (a-c);
1580 block[stride*1 + xStride*1]= (e-b);
1585 * gets the chroma qp.
1587 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1588 return h->pps.chroma_qp_table[t][qscale];
1591 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1592 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1593 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1595 const int * const quant_table= quant_coeff[qscale];
1596 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1597 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1598 const unsigned int threshold2= (threshold1<<1);
1604 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1605 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1606 const unsigned int dc_threshold2= (dc_threshold1<<1);
1608 int level= block[0]*quant_coeff[qscale+18][0];
1609 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1611 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1614 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1617 // last_non_zero = i;
1622 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1623 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1624 const unsigned int dc_threshold2= (dc_threshold1<<1);
1626 int level= block[0]*quant_table[0];
1627 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1629 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1632 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1635 // last_non_zero = i;
1648 const int j= scantable[i];
1649 int level= block[j]*quant_table[j];
1651 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1652 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1653 if(((unsigned)(level+threshold1))>threshold2){
1655 level= (bias + level)>>QUANT_SHIFT;
1658 level= (bias - level)>>QUANT_SHIFT;
1667 return last_non_zero;
1670 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1671 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1672 int src_x_offset, int src_y_offset,
1673 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1674 MpegEncContext * const s = &h->s;
1675 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1676 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1677 const int luma_xy= (mx&3) + ((my&3)<<2);
1678 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1679 uint8_t * src_cb, * src_cr;
1680 int extra_width= h->emu_edge_width;
1681 int extra_height= h->emu_edge_height;
1683 const int full_mx= mx>>2;
1684 const int full_my= my>>2;
1685 const int pic_width = 16*s->mb_width;
1686 const int pic_height = 16*s->mb_height >> MB_FIELD;
1688 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1691 if(mx&7) extra_width -= 3;
1692 if(my&7) extra_height -= 3;
1694 if( full_mx < 0-extra_width
1695 || full_my < 0-extra_height
1696 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1697 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1698 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1699 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1703 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1705 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1708 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1711 // chroma offset when predicting from a field of opposite parity
1712 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1713 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1715 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1716 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1719 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1720 src_cb= s->edge_emu_buffer;
1722 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1725 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1726 src_cr= s->edge_emu_buffer;
1728 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1731 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1732 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1733 int x_offset, int y_offset,
1734 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1735 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1736 int list0, int list1){
1737 MpegEncContext * const s = &h->s;
1738 qpel_mc_func *qpix_op= qpix_put;
1739 h264_chroma_mc_func chroma_op= chroma_put;
1741 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1742 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1743 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1744 x_offset += 8*s->mb_x;
1745 y_offset += 8*(s->mb_y >> MB_FIELD);
1748 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1749 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1750 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1751 qpix_op, chroma_op);
1754 chroma_op= chroma_avg;
1758 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1759 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1760 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1761 qpix_op, chroma_op);
1765 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1766 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1767 int x_offset, int y_offset,
1768 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1769 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1770 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1771 int list0, int list1){
1772 MpegEncContext * const s = &h->s;
1774 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1775 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1776 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1777 x_offset += 8*s->mb_x;
1778 y_offset += 8*(s->mb_y >> MB_FIELD);
1781 /* don't optimize for luma-only case, since B-frames usually
1782 * use implicit weights => chroma too. */
1783 uint8_t *tmp_cb = s->obmc_scratchpad;
1784 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1785 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1786 int refn0 = h->ref_cache[0][ scan8[n] ];
1787 int refn1 = h->ref_cache[1][ scan8[n] ];
1789 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1790 dest_y, dest_cb, dest_cr,
1791 x_offset, y_offset, qpix_put, chroma_put);
1792 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1793 tmp_y, tmp_cb, tmp_cr,
1794 x_offset, y_offset, qpix_put, chroma_put);
1796 if(h->use_weight == 2){
1797 int weight0 = h->implicit_weight[refn0][refn1];
1798 int weight1 = 64 - weight0;
1799 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1801 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1803 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1804 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1805 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1806 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1807 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1808 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1809 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1810 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1811 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1814 int list = list1 ? 1 : 0;
1815 int refn = h->ref_cache[list][ scan8[n] ];
1816 Picture *ref= &h->ref_list[list][refn];
1817 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1818 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1819 qpix_put, chroma_put);
1821 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1822 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1823 if(h->use_weight_chroma){
1824 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1825 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1826 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1827 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1832 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1833 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1834 int x_offset, int y_offset,
1835 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1836 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1837 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1838 int list0, int list1){
1839 if((h->use_weight==2 && list0 && list1
1840 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1841 || h->use_weight==1)
1842 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1843 x_offset, y_offset, qpix_put, chroma_put,
1844 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1846 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1847 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1850 static inline void prefetch_motion(H264Context *h, int list){
1851 /* fetch pixels for estimated mv 4 macroblocks ahead
1852 * optimized for 64byte cache lines */
1853 MpegEncContext * const s = &h->s;
1854 const int refn = h->ref_cache[list][scan8[0]];
1856 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1857 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1858 uint8_t **src= h->ref_list[list][refn].data;
1859 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1860 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1861 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1862 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1866 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1867 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1868 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1869 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1870 MpegEncContext * const s = &h->s;
1871 const int mb_xy= h->mb_xy;
1872 const int mb_type= s->current_picture.mb_type[mb_xy];
1874 assert(IS_INTER(mb_type));
1876 prefetch_motion(h, 0);
1878 if(IS_16X16(mb_type)){
1879 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1880 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1881 &weight_op[0], &weight_avg[0],
1882 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1883 }else if(IS_16X8(mb_type)){
1884 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1885 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1886 &weight_op[1], &weight_avg[1],
1887 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1888 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1889 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1890 &weight_op[1], &weight_avg[1],
1891 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1892 }else if(IS_8X16(mb_type)){
1893 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1894 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1895 &weight_op[2], &weight_avg[2],
1896 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1897 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1898 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1899 &weight_op[2], &weight_avg[2],
1900 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1904 assert(IS_8X8(mb_type));
1907 const int sub_mb_type= h->sub_mb_type[i];
1909 int x_offset= (i&1)<<2;
1910 int y_offset= (i&2)<<1;
1912 if(IS_SUB_8X8(sub_mb_type)){
1913 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1914 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1915 &weight_op[3], &weight_avg[3],
1916 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1917 }else if(IS_SUB_8X4(sub_mb_type)){
1918 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1919 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1920 &weight_op[4], &weight_avg[4],
1921 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1922 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1923 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1924 &weight_op[4], &weight_avg[4],
1925 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1926 }else if(IS_SUB_4X8(sub_mb_type)){
1927 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1928 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1929 &weight_op[5], &weight_avg[5],
1930 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1931 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1932 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1933 &weight_op[5], &weight_avg[5],
1934 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1937 assert(IS_SUB_4X4(sub_mb_type));
1939 int sub_x_offset= x_offset + 2*(j&1);
1940 int sub_y_offset= y_offset + (j&2);
1941 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1942 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1943 &weight_op[6], &weight_avg[6],
1944 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1950 prefetch_motion(h, 1);
1953 static av_cold void decode_init_vlc(void){
1954 static int done = 0;
1960 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1961 &chroma_dc_coeff_token_len [0], 1, 1,
1962 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1965 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1966 &coeff_token_len [i][0], 1, 1,
1967 &coeff_token_bits[i][0], 1, 1, 1);
1971 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1972 &chroma_dc_total_zeros_len [i][0], 1, 1,
1973 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1975 for(i=0; i<15; i++){
1976 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1977 &total_zeros_len [i][0], 1, 1,
1978 &total_zeros_bits[i][0], 1, 1, 1);
1982 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1983 &run_len [i][0], 1, 1,
1984 &run_bits[i][0], 1, 1, 1);
1986 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1987 &run_len [6][0], 1, 1,
1988 &run_bits[6][0], 1, 1, 1);
1992 static void free_tables(H264Context *h){
1995 av_freep(&h->intra4x4_pred_mode);
1996 av_freep(&h->chroma_pred_mode_table);
1997 av_freep(&h->cbp_table);
1998 av_freep(&h->mvd_table[0]);
1999 av_freep(&h->mvd_table[1]);
2000 av_freep(&h->direct_table);
2001 av_freep(&h->non_zero_count);
2002 av_freep(&h->slice_table_base);
2003 h->slice_table= NULL;
2005 av_freep(&h->mb2b_xy);
2006 av_freep(&h->mb2b8_xy);
2008 for(i = 0; i < MAX_SPS_COUNT; i++)
2009 av_freep(h->sps_buffers + i);
2011 for(i = 0; i < MAX_PPS_COUNT; i++)
2012 av_freep(h->pps_buffers + i);
2014 for(i = 0; i < h->s.avctx->thread_count; i++) {
2015 hx = h->thread_context[i];
2017 av_freep(&hx->top_borders[1]);
2018 av_freep(&hx->top_borders[0]);
2019 av_freep(&hx->s.obmc_scratchpad);
2023 static void init_dequant8_coeff_table(H264Context *h){
2025 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2026 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2027 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2029 for(i=0; i<2; i++ ){
2030 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2031 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2035 for(q=0; q<52; q++){
2036 int shift = ff_div6[q];
2037 int idx = ff_rem6[q];
2039 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2040 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2041 h->pps.scaling_matrix8[i][x]) << shift;
2046 static void init_dequant4_coeff_table(H264Context *h){
2048 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2049 for(i=0; i<6; i++ ){
2050 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2052 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2053 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2060 for(q=0; q<52; q++){
2061 int shift = ff_div6[q] + 2;
2062 int idx = ff_rem6[q];
2064 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2065 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2066 h->pps.scaling_matrix4[i][x]) << shift;
2071 static void init_dequant_tables(H264Context *h){
2073 init_dequant4_coeff_table(h);
2074 if(h->pps.transform_8x8_mode)
2075 init_dequant8_coeff_table(h);
2076 if(h->sps.transform_bypass){
2079 h->dequant4_coeff[i][0][x] = 1<<6;
2080 if(h->pps.transform_8x8_mode)
2083 h->dequant8_coeff[i][0][x] = 1<<6;
2090 * needs width/height
2092 static int alloc_tables(H264Context *h){
2093 MpegEncContext * const s = &h->s;
2094 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2097 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2103 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2106 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2108 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2109 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2111 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2112 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2113 for(y=0; y<s->mb_height; y++){
2114 for(x=0; x<s->mb_width; x++){
2115 const int mb_xy= x + y*s->mb_stride;
2116 const int b_xy = 4*x + 4*y*h->b_stride;
2117 const int b8_xy= 2*x + 2*y*h->b8_stride;
2119 h->mb2b_xy [mb_xy]= b_xy;
2120 h->mb2b8_xy[mb_xy]= b8_xy;
2124 s->obmc_scratchpad = NULL;
2126 if(!h->dequant4_coeff[0])
2127 init_dequant_tables(h);
2136 * Mimic alloc_tables(), but for every context thread.
2138 static void clone_tables(H264Context *dst, H264Context *src){
2139 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2140 dst->non_zero_count = src->non_zero_count;
2141 dst->slice_table = src->slice_table;
2142 dst->cbp_table = src->cbp_table;
2143 dst->mb2b_xy = src->mb2b_xy;
2144 dst->mb2b8_xy = src->mb2b8_xy;
2145 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2146 dst->mvd_table[0] = src->mvd_table[0];
2147 dst->mvd_table[1] = src->mvd_table[1];
2148 dst->direct_table = src->direct_table;
2150 dst->s.obmc_scratchpad = NULL;
2151 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2156 * Allocate buffers which are not shared amongst multiple threads.
2158 static int context_init(H264Context *h){
2159 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2160 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2164 return -1; // free_tables will clean up for us
2167 static av_cold void common_init(H264Context *h){
2168 MpegEncContext * const s = &h->s;
2170 s->width = s->avctx->width;
2171 s->height = s->avctx->height;
2172 s->codec_id= s->avctx->codec->id;
2174 ff_h264_pred_init(&h->hpc, s->codec_id);
2176 h->dequant_coeff_pps= -1;
2177 s->unrestricted_mv=1;
2178 s->decode=1; //FIXME
2180 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2181 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2184 static av_cold int decode_init(AVCodecContext *avctx){
2185 H264Context *h= avctx->priv_data;
2186 MpegEncContext * const s = &h->s;
2188 MPV_decode_defaults(s);
2193 s->out_format = FMT_H264;
2194 s->workaround_bugs= avctx->workaround_bugs;
2197 // s->decode_mb= ff_h263_decode_mb;
2198 s->quarter_sample = 1;
2201 if(avctx->codec_id == CODEC_ID_SVQ3)
2202 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2204 avctx->pix_fmt= PIX_FMT_YUV420P;
2208 if(avctx->extradata_size > 0 && avctx->extradata &&
2209 *(char *)avctx->extradata == 1){
2216 h->thread_context[0] = h;
2220 static int frame_start(H264Context *h){
2221 MpegEncContext * const s = &h->s;
2224 if(MPV_frame_start(s, s->avctx) < 0)
2226 ff_er_frame_start(s);
2228 * MPV_frame_start uses pict_type to derive key_frame.
2229 * This is incorrect for H.264; IDR markings must be used.
2230 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2231 * See decode_nal_units().
2233 s->current_picture_ptr->key_frame= 0;
2235 assert(s->linesize && s->uvlinesize);
2237 for(i=0; i<16; i++){
2238 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2239 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2242 h->block_offset[16+i]=
2243 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2244 h->block_offset[24+16+i]=
2245 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2248 /* can't be in alloc_tables because linesize isn't known there.
2249 * FIXME: redo bipred weight to not require extra buffer? */
2250 for(i = 0; i < s->avctx->thread_count; i++)
2251 if(!h->thread_context[i]->s.obmc_scratchpad)
2252 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2254 /* some macroblocks will be accessed before they're available */
2255 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2256 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2258 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2260 // We mark the current picture as non-reference after allocating it, so
2261 // that if we break out due to an error it can be released automatically
2262 // in the next MPV_frame_start().
2263 // SVQ3 as well as most other codecs have only last/next/current and thus
2264 // get released even with set reference, besides SVQ3 and others do not
2265 // mark frames as reference later "naturally".
2266 if(s->codec_id != CODEC_ID_SVQ3)
2267 s->current_picture_ptr->reference= 0;
2269 s->current_picture_ptr->field_poc[0]=
2270 s->current_picture_ptr->field_poc[1]= INT_MAX;
2271 assert(s->current_picture_ptr->long_ref==0);
2276 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2277 MpegEncContext * const s = &h->s;
2281 src_cb -= uvlinesize;
2282 src_cr -= uvlinesize;
2284 // There are two lines saved, the line above the the top macroblock of a pair,
2285 // and the line above the bottom macroblock
2286 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2287 for(i=1; i<17; i++){
2288 h->left_border[i]= src_y[15+i* linesize];
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2294 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2295 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2296 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2298 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2299 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2301 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2306 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2307 MpegEncContext * const s = &h->s;
2314 if(h->deblocking_filter == 2) {
2316 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2317 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2319 deblock_left = (s->mb_x > 0);
2320 deblock_top = (s->mb_y > 0);
2323 src_y -= linesize + 1;
2324 src_cb -= uvlinesize + 1;
2325 src_cr -= uvlinesize + 1;
2327 #define XCHG(a,b,t,xchg)\
2334 for(i = !deblock_top; i<17; i++){
2335 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2340 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2341 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2342 if(s->mb_x+1 < s->mb_width){
2343 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2347 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2349 for(i = !deblock_top; i<9; i++){
2350 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2351 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2355 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2356 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2361 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2362 MpegEncContext * const s = &h->s;
2365 src_y -= 2 * linesize;
2366 src_cb -= 2 * uvlinesize;
2367 src_cr -= 2 * uvlinesize;
2369 // There are two lines saved, the line above the the top macroblock of a pair,
2370 // and the line above the bottom macroblock
2371 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2372 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2373 for(i=2; i<34; i++){
2374 h->left_border[i]= src_y[15+i* linesize];
2377 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2378 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2379 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2380 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2382 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2383 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2384 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2385 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2386 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2387 for(i=2; i<18; i++){
2388 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2389 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2391 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2392 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2393 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2394 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2398 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2399 MpegEncContext * const s = &h->s;
2402 int deblock_left = (s->mb_x > 0);
2403 int deblock_top = (s->mb_y > 1);
2405 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2407 src_y -= 2 * linesize + 1;
2408 src_cb -= 2 * uvlinesize + 1;
2409 src_cr -= 2 * uvlinesize + 1;
2411 #define XCHG(a,b,t,xchg)\
2418 for(i = (!deblock_top)<<1; i<34; i++){
2419 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2424 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2425 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2426 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2427 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2428 if(s->mb_x+1 < s->mb_width){
2429 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2430 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2434 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2436 for(i = (!deblock_top) << 1; i<18; i++){
2437 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2438 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2442 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2443 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2444 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2445 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2450 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2451 MpegEncContext * const s = &h->s;
2452 const int mb_x= s->mb_x;
2453 const int mb_y= s->mb_y;
2454 const int mb_xy= h->mb_xy;
2455 const int mb_type= s->current_picture.mb_type[mb_xy];
2456 uint8_t *dest_y, *dest_cb, *dest_cr;
2457 int linesize, uvlinesize /*dct_offset*/;
2459 int *block_offset = &h->block_offset[0];
2460 const unsigned int bottom = mb_y & 1;
2461 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2462 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2463 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2465 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2466 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2467 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2469 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2470 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2472 if (!simple && MB_FIELD) {
2473 linesize = h->mb_linesize = s->linesize * 2;
2474 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2475 block_offset = &h->block_offset[24];
2476 if(mb_y&1){ //FIXME move out of this function?
2477 dest_y -= s->linesize*15;
2478 dest_cb-= s->uvlinesize*7;
2479 dest_cr-= s->uvlinesize*7;
2483 for(list=0; list<h->list_count; list++){
2484 if(!USES_LIST(mb_type, list))
2486 if(IS_16X16(mb_type)){
2487 int8_t *ref = &h->ref_cache[list][scan8[0]];
2488 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2490 for(i=0; i<16; i+=4){
2491 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2492 int ref = h->ref_cache[list][scan8[i]];
2494 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2500 linesize = h->mb_linesize = s->linesize;
2501 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2502 // dct_offset = s->linesize * 16;
2505 if(transform_bypass){
2507 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2508 }else if(IS_8x8DCT(mb_type)){
2509 idct_dc_add = s->dsp.h264_idct8_dc_add;
2510 idct_add = s->dsp.h264_idct8_add;
2512 idct_dc_add = s->dsp.h264_idct_dc_add;
2513 idct_add = s->dsp.h264_idct_add;
2516 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2517 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2518 int mbt_y = mb_y&~1;
2519 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2520 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2521 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2522 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2525 if (!simple && IS_INTRA_PCM(mb_type)) {
2528 // The pixels are stored in h->mb array in the same order as levels,
2529 // copy them in output in the correct order.
2530 for(i=0; i<16; i++) {
2531 for (y=0; y<4; y++) {
2532 for (x=0; x<4; x++) {
2533 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2537 for(i=16; i<16+4; i++) {
2538 for (y=0; y<4; y++) {
2539 for (x=0; x<4; x++) {
2540 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2544 for(i=20; i<20+4; i++) {
2545 for (y=0; y<4; y++) {
2546 for (x=0; x<4; x++) {
2547 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2552 if(IS_INTRA(mb_type)){
2553 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2554 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2556 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2557 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2558 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2561 if(IS_INTRA4x4(mb_type)){
2562 if(simple || !s->encoding){
2563 if(IS_8x8DCT(mb_type)){
2564 for(i=0; i<16; i+=4){
2565 uint8_t * const ptr= dest_y + block_offset[i];
2566 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2567 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2568 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2569 (h->topright_samples_available<<i)&0x4000, linesize);
2571 if(nnz == 1 && h->mb[i*16])
2572 idct_dc_add(ptr, h->mb + i*16, linesize);
2574 idct_add(ptr, h->mb + i*16, linesize);
2578 for(i=0; i<16; i++){
2579 uint8_t * const ptr= dest_y + block_offset[i];
2581 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2584 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2585 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2586 assert(mb_y || linesize <= block_offset[i]);
2587 if(!topright_avail){
2588 tr= ptr[3 - linesize]*0x01010101;
2589 topright= (uint8_t*) &tr;
2591 topright= ptr + 4 - linesize;
2595 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2596 nnz = h->non_zero_count_cache[ scan8[i] ];
2599 if(nnz == 1 && h->mb[i*16])
2600 idct_dc_add(ptr, h->mb + i*16, linesize);
2602 idct_add(ptr, h->mb + i*16, linesize);
2604 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2609 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2611 if(!transform_bypass)
2612 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2614 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2616 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2617 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2619 hl_motion(h, dest_y, dest_cb, dest_cr,
2620 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2621 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2622 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2626 if(!IS_INTRA4x4(mb_type)){
2628 if(IS_INTRA16x16(mb_type)){
2629 for(i=0; i<16; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ])
2631 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2632 else if(h->mb[i*16])
2633 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2636 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2637 for(i=0; i<16; i+=di){
2638 int nnz = h->non_zero_count_cache[ scan8[i] ];
2640 if(nnz==1 && h->mb[i*16])
2641 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2643 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2648 for(i=0; i<16; i++){
2649 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2650 uint8_t * const ptr= dest_y + block_offset[i];
2651 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2657 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2658 uint8_t *dest[2] = {dest_cb, dest_cr};
2659 if(transform_bypass){
2660 idct_add = idct_dc_add = s->dsp.add_pixels4;
2662 idct_add = s->dsp.h264_idct_add;
2663 idct_dc_add = s->dsp.h264_idct_dc_add;
2664 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2665 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2668 for(i=16; i<16+8; i++){
2669 if(h->non_zero_count_cache[ scan8[i] ])
2670 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2671 else if(h->mb[i*16])
2672 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2675 for(i=16; i<16+8; i++){
2676 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2677 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2678 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2684 if(h->deblocking_filter) {
2685 if (!simple && FRAME_MBAFF) {
2686 //FIXME try deblocking one mb at a time?
2687 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2688 const int mb_y = s->mb_y - 1;
2689 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2690 const int mb_xy= mb_x + mb_y*s->mb_stride;
2691 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2692 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2693 if (!bottom) return;
2694 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2695 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2696 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2698 if(IS_INTRA(mb_type_top | mb_type_bottom))
2699 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2701 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2704 s->mb_y--; h->mb_xy -= s->mb_stride;
2705 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2706 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2707 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2708 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2709 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2711 s->mb_y++; h->mb_xy += s->mb_stride;
2712 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2713 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2714 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2715 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2716 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2718 tprintf(h->s.avctx, "call filter_mb\n");
2719 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2720 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2721 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2722 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2723 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2729 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2731 static void hl_decode_mb_simple(H264Context *h){
2732 hl_decode_mb_internal(h, 1);
2736 * Process a macroblock; this handles edge cases, such as interlacing.
2738 static void av_noinline hl_decode_mb_complex(H264Context *h){
2739 hl_decode_mb_internal(h, 0);
2742 static void hl_decode_mb(H264Context *h){
2743 MpegEncContext * const s = &h->s;
2744 const int mb_xy= h->mb_xy;
2745 const int mb_type= s->current_picture.mb_type[mb_xy];
2746 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2747 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2749 if(ENABLE_H264_ENCODER && !s->decode)
2753 hl_decode_mb_complex(h);
2754 else hl_decode_mb_simple(h);
2757 static void pic_as_field(Picture *pic, const int parity){
2759 for (i = 0; i < 4; ++i) {
2760 if (parity == PICT_BOTTOM_FIELD)
2761 pic->data[i] += pic->linesize[i];
2762 pic->reference = parity;
2763 pic->linesize[i] *= 2;
2767 static int split_field_copy(Picture *dest, Picture *src,
2768 int parity, int id_add){
2769 int match = !!(src->reference & parity);
2773 pic_as_field(dest, parity);
2775 dest->pic_id += id_add;
2782 * Split one reference list into field parts, interleaving by parity
2783 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2784 * set to look at the actual start of data for that field.
2786 * @param dest output list
2787 * @param dest_len maximum number of fields to put in dest
2788 * @param src the source reference list containing fields and/or field pairs
2789 * (aka short_ref/long_ref, or
2790 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2791 * @param src_len number of Picture's in source (pairs and unmatched fields)
2792 * @param parity the parity of the picture being decoded/needing
2793 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2794 * @return number of fields placed in dest
2796 static int split_field_half_ref_list(Picture *dest, int dest_len,
2797 Picture *src, int src_len, int parity){
2798 int same_parity = 1;
2804 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2805 if (same_parity && same_i < src_len) {
2806 field_output = split_field_copy(dest + out_i, src + same_i,
2808 same_parity = !field_output;
2811 } else if (opp_i < src_len) {
2812 field_output = split_field_copy(dest + out_i, src + opp_i,
2813 PICT_FRAME - parity, 0);
2814 same_parity = field_output;
2826 * Split the reference frame list into a reference field list.
2827 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2828 * The input list contains both reference field pairs and
2829 * unmatched reference fields; it is ordered as spec describes
2830 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2831 * unmatched field pairs are also present. Conceptually this is equivalent
2832 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2834 * @param dest output reference list where ordered fields are to be placed
2835 * @param dest_len max number of fields to place at dest
2836 * @param src source reference list, as described above
2837 * @param src_len number of pictures (pairs and unmatched fields) in src
2838 * @param parity parity of field being currently decoded
2839 * (one of PICT_{TOP,BOTTOM}_FIELD)
2840 * @param long_i index into src array that holds first long reference picture,
2841 * or src_len if no long refs present.
2843 static int split_field_ref_list(Picture *dest, int dest_len,
2844 Picture *src, int src_len,
2845 int parity, int long_i){
2847 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2851 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2852 src_len - long_i, parity);
2857 * fills the default_ref_list.
2859 static int fill_default_ref_list(H264Context *h){
2860 MpegEncContext * const s = &h->s;
2862 int smallest_poc_greater_than_current = -1;
2864 Picture sorted_short_ref[32];
2865 Picture field_entry_list[2][32];
2866 Picture *frame_list[2];
2868 if (FIELD_PICTURE) {
2869 structure_sel = PICT_FRAME;
2870 frame_list[0] = field_entry_list[0];
2871 frame_list[1] = field_entry_list[1];
2874 frame_list[0] = h->default_ref_list[0];
2875 frame_list[1] = h->default_ref_list[1];
2878 if(h->slice_type_nos==FF_B_TYPE){
2885 /* sort frame according to POC in B slice */
2886 for(out_i=0; out_i<h->short_ref_count; out_i++){
2888 int best_poc=INT_MAX;
2890 for(i=0; i<h->short_ref_count; i++){
2891 const int poc= h->short_ref[i]->poc;
2892 if(poc > limit && poc < best_poc){
2898 assert(best_i != INT_MIN);
2901 sorted_short_ref[out_i]= *h->short_ref[best_i];
2902 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2903 if (-1 == smallest_poc_greater_than_current) {
2904 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2905 smallest_poc_greater_than_current = out_i;
2910 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2912 // find the largest POC
2913 for(list=0; list<2; list++){
2916 int step= list ? -1 : 1;
2918 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2920 while(j<0 || j>= h->short_ref_count){
2921 if(j != -99 && step == (list ? -1 : 1))
2924 j= smallest_poc_greater_than_current + (step>>1);
2926 sel = sorted_short_ref[j].reference | structure_sel;
2927 if(sel != PICT_FRAME) continue;
2928 frame_list[list][index ]= sorted_short_ref[j];
2929 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2931 short_len[list] = index;
2933 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2935 if(h->long_ref[i] == NULL) continue;
2936 sel = h->long_ref[i]->reference | structure_sel;
2937 if(sel != PICT_FRAME) continue;
2939 frame_list[ list ][index ]= *h->long_ref[i];
2940 frame_list[ list ][index++].pic_id= i;
2945 for(list=0; list<2; list++){
2947 len[list] = split_field_ref_list(h->default_ref_list[list],
2951 s->picture_structure,
2954 // swap the two first elements of L1 when L0 and L1 are identical
2955 if(list && len[0] > 1 && len[0] == len[1])
2956 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2958 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2962 if(len[list] < h->ref_count[ list ])
2963 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2970 for(i=0; i<h->short_ref_count; i++){
2972 sel = h->short_ref[i]->reference | structure_sel;
2973 if(sel != PICT_FRAME) continue;
2974 frame_list[0][index ]= *h->short_ref[i];
2975 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2978 for(i = 0; i < 16; i++){
2980 if(h->long_ref[i] == NULL) continue;
2981 sel = h->long_ref[i]->reference | structure_sel;
2982 if(sel != PICT_FRAME) continue;
2983 frame_list[0][index ]= *h->long_ref[i];
2984 frame_list[0][index++].pic_id= i;
2988 index = split_field_ref_list(h->default_ref_list[0],
2989 h->ref_count[0], frame_list[0],
2990 index, s->picture_structure,
2993 if(index < h->ref_count[0])
2994 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2997 for (i=0; i<h->ref_count[0]; i++) {
2998 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3000 if(h->slice_type_nos==FF_B_TYPE){
3001 for (i=0; i<h->ref_count[1]; i++) {
3002 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
3009 static void print_short_term(H264Context *h);
3010 static void print_long_term(H264Context *h);
3013 * Extract structure information about the picture described by pic_num in
3014 * the current decoding context (frame or field). Note that pic_num is
3015 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3016 * @param pic_num picture number for which to extract structure information
3017 * @param structure one of PICT_XXX describing structure of picture
3019 * @return frame number (short term) or long term index of picture
3020 * described by pic_num
3022 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3023 MpegEncContext * const s = &h->s;
3025 *structure = s->picture_structure;
3028 /* opposite field */
3029 *structure ^= PICT_FRAME;
3036 static int decode_ref_pic_list_reordering(H264Context *h){
3037 MpegEncContext * const s = &h->s;
3038 int list, index, pic_structure;
3040 print_short_term(h);
3042 if(h->slice_type_nos==FF_I_TYPE) return 0; //FIXME move before function
3044 for(list=0; list<h->list_count; list++){
3045 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3047 if(get_bits1(&s->gb)){
3048 int pred= h->curr_pic_num;
3050 for(index=0; ; index++){
3051 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3052 unsigned int pic_id;
3054 Picture *ref = NULL;
3056 if(reordering_of_pic_nums_idc==3)
3059 if(index >= h->ref_count[list]){
3060 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3064 if(reordering_of_pic_nums_idc<3){
3065 if(reordering_of_pic_nums_idc<2){
3066 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3069 if(abs_diff_pic_num > h->max_pic_num){
3070 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3074 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3075 else pred+= abs_diff_pic_num;
3076 pred &= h->max_pic_num - 1;
3078 frame_num = pic_num_extract(h, pred, &pic_structure);
3080 for(i= h->short_ref_count-1; i>=0; i--){
3081 ref = h->short_ref[i];
3082 assert(ref->reference);
3083 assert(!ref->long_ref);
3084 if(ref->data[0] != NULL &&
3085 ref->frame_num == frame_num &&
3086 (ref->reference & pic_structure) &&
3087 ref->long_ref == 0) // ignore non-existing pictures by testing data[0] pointer
3094 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3096 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3099 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3102 ref = h->long_ref[long_idx];
3103 assert(!(ref && !ref->reference));
3104 if(ref && (ref->reference & pic_structure)){
3105 ref->pic_id= pic_id;
3106 assert(ref->long_ref);
3114 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3115 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3117 for(i=index; i+1<h->ref_count[list]; i++){
3118 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3121 for(; i > index; i--){
3122 h->ref_list[list][i]= h->ref_list[list][i-1];
3124 h->ref_list[list][index]= *ref;
3126 pic_as_field(&h->ref_list[list][index], pic_structure);
3130 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3136 for(list=0; list<h->list_count; list++){
3137 for(index= 0; index < h->ref_count[list]; index++){
3138 if(!h->ref_list[list][index].data[0])
3139 h->ref_list[list][index]= s->current_picture;
3143 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3144 direct_dist_scale_factor(h);
3145 direct_ref_list_init(h);
3149 static void fill_mbaff_ref_list(H264Context *h){
3151 for(list=0; list<2; list++){ //FIXME try list_count
3152 for(i=0; i<h->ref_count[list]; i++){
3153 Picture *frame = &h->ref_list[list][i];
3154 Picture *field = &h->ref_list[list][16+2*i];
3157 field[0].linesize[j] <<= 1;
3158 field[0].reference = PICT_TOP_FIELD;
3159 field[1] = field[0];
3161 field[1].data[j] += frame->linesize[j];
3162 field[1].reference = PICT_BOTTOM_FIELD;
3164 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3165 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3167 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3168 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3172 for(j=0; j<h->ref_count[1]; j++){
3173 for(i=0; i<h->ref_count[0]; i++)
3174 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3175 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3176 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3180 static int pred_weight_table(H264Context *h){
3181 MpegEncContext * const s = &h->s;
3183 int luma_def, chroma_def;
3186 h->use_weight_chroma= 0;
3187 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3188 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3189 luma_def = 1<<h->luma_log2_weight_denom;
3190 chroma_def = 1<<h->chroma_log2_weight_denom;
3192 for(list=0; list<2; list++){
3193 for(i=0; i<h->ref_count[list]; i++){
3194 int luma_weight_flag, chroma_weight_flag;
3196 luma_weight_flag= get_bits1(&s->gb);
3197 if(luma_weight_flag){
3198 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3199 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3200 if( h->luma_weight[list][i] != luma_def
3201 || h->luma_offset[list][i] != 0)
3204 h->luma_weight[list][i]= luma_def;
3205 h->luma_offset[list][i]= 0;
3208 chroma_weight_flag= get_bits1(&s->gb);
3209 if(chroma_weight_flag){
3212 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3213 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3214 if( h->chroma_weight[list][i][j] != chroma_def
3215 || h->chroma_offset[list][i][j] != 0)
3216 h->use_weight_chroma= 1;
3221 h->chroma_weight[list][i][j]= chroma_def;
3222 h->chroma_offset[list][i][j]= 0;
3226 if(h->slice_type_nos != FF_B_TYPE) break;
3228 h->use_weight= h->use_weight || h->use_weight_chroma;
3232 static void implicit_weight_table(H264Context *h){
3233 MpegEncContext * const s = &h->s;
3235 int cur_poc = s->current_picture_ptr->poc;
3237 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3238 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3240 h->use_weight_chroma= 0;
3245 h->use_weight_chroma= 2;
3246 h->luma_log2_weight_denom= 5;
3247 h->chroma_log2_weight_denom= 5;
3249 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3250 int poc0 = h->ref_list[0][ref0].poc;
3251 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3252 int poc1 = h->ref_list[1][ref1].poc;
3253 int td = av_clip(poc1 - poc0, -128, 127);
3255 int tb = av_clip(cur_poc - poc0, -128, 127);
3256 int tx = (16384 + (FFABS(td) >> 1)) / td;
3257 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3258 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3259 h->implicit_weight[ref0][ref1] = 32;
3261 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3263 h->implicit_weight[ref0][ref1] = 32;
3269 * Mark a picture as no longer needed for reference. The refmask
3270 * argument allows unreferencing of individual fields or the whole frame.
3271 * If the picture becomes entirely unreferenced, but is being held for
3272 * display purposes, it is marked as such.
3273 * @param refmask mask of fields to unreference; the mask is bitwise
3274 * anded with the reference marking of pic
3275 * @return non-zero if pic becomes entirely unreferenced (except possibly
3276 * for display purposes) zero if one of the fields remains in
3279 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3281 if (pic->reference &= refmask) {
3284 for(i = 0; h->delayed_pic[i]; i++)
3285 if(pic == h->delayed_pic[i]){
3286 pic->reference=DELAYED_PIC_REF;
3294 * instantaneous decoder refresh.
3296 static void idr(H264Context *h){
3299 for(i=0; i<16; i++){
3300 remove_long(h, i, 0);
3302 assert(h->long_ref_count==0);
3304 for(i=0; i<h->short_ref_count; i++){
3305 unreference_pic(h, h->short_ref[i], 0);
3306 h->short_ref[i]= NULL;
3308 h->short_ref_count=0;
3309 h->prev_frame_num= 0;
3310 h->prev_frame_num_offset= 0;
3315 /* forget old pics after a seek */
3316 static void flush_dpb(AVCodecContext *avctx){
3317 H264Context *h= avctx->priv_data;
3319 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3320 if(h->delayed_pic[i])
3321 h->delayed_pic[i]->reference= 0;
3322 h->delayed_pic[i]= NULL;
3324 h->outputed_poc= INT_MIN;
3326 if(h->s.current_picture_ptr)
3327 h->s.current_picture_ptr->reference= 0;
3328 h->s.first_field= 0;
3329 ff_mpeg_flush(avctx);
3333 * Find a Picture in the short term reference list by frame number.
3334 * @param frame_num frame number to search for
3335 * @param idx the index into h->short_ref where returned picture is found
3336 * undefined if no picture found.
3337 * @return pointer to the found picture, or NULL if no pic with the provided
3338 * frame number is found
3340 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3341 MpegEncContext * const s = &h->s;
3344 for(i=0; i<h->short_ref_count; i++){
3345 Picture *pic= h->short_ref[i];
3346 if(s->avctx->debug&FF_DEBUG_MMCO)
3347 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3348 if(pic->frame_num == frame_num) {
3357 * Remove a picture from the short term reference list by its index in
3358 * that list. This does no checking on the provided index; it is assumed
3359 * to be valid. Other list entries are shifted down.
3360 * @param i index into h->short_ref of picture to remove.
3362 static void remove_short_at_index(H264Context *h, int i){
3363 assert(i >= 0 && i < h->short_ref_count);
3364 h->short_ref[i]= NULL;
3365 if (--h->short_ref_count)
3366 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3371 * @return the removed picture or NULL if an error occurs
3373 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3374 MpegEncContext * const s = &h->s;
3378 if(s->avctx->debug&FF_DEBUG_MMCO)
3379 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3381 pic = find_short(h, frame_num, &i);
3383 if(unreference_pic(h, pic, ref_mask))
3384 remove_short_at_index(h, i);
3391 * Remove a picture from the long term reference list by its index in
3393 * @return the removed picture or NULL if an error occurs
3395 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3398 pic= h->long_ref[i];
3400 if(unreference_pic(h, pic, ref_mask)){
3401 assert(h->long_ref[i]->long_ref == 1);
3402 h->long_ref[i]->long_ref= 0;
3403 h->long_ref[i]= NULL;
3404 h->long_ref_count--;
3412 * print short term list
3414 static void print_short_term(H264Context *h) {
3416 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3417 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3418 for(i=0; i<h->short_ref_count; i++){
3419 Picture *pic= h->short_ref[i];
3420 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3426 * print long term list
3428 static void print_long_term(H264Context *h) {
3430 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3431 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3432 for(i = 0; i < 16; i++){
3433 Picture *pic= h->long_ref[i];
3435 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3442 * Executes the reference picture marking (memory management control operations).
3444 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3445 MpegEncContext * const s = &h->s;
3447 int current_ref_assigned=0;
3450 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3451 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3453 for(i=0; i<mmco_count; i++){
3454 int structure, frame_num;
3455 if(s->avctx->debug&FF_DEBUG_MMCO)
3456 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3458 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3459 || mmco[i].opcode == MMCO_SHORT2LONG){
3460 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3461 pic = find_short(h, frame_num, &j);
3463 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3464 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3465 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3470 switch(mmco[i].opcode){
3471 case MMCO_SHORT2UNUSED:
3472 if(s->avctx->debug&FF_DEBUG_MMCO)
3473 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3474 remove_short(h, frame_num, structure ^ PICT_FRAME);
3476 case MMCO_SHORT2LONG:
3477 if (h->long_ref[mmco[i].long_arg] != pic)
3478 remove_long(h, mmco[i].long_arg, 0);
3480 remove_short_at_index(h, j);
3481 h->long_ref[ mmco[i].long_arg ]= pic;
3482 if (h->long_ref[ mmco[i].long_arg ]){
3483 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3484 h->long_ref_count++;
3487 case MMCO_LONG2UNUSED:
3488 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3489 pic = h->long_ref[j];
3491 remove_long(h, j, structure ^ PICT_FRAME);
3492 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3493 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3496 // Comment below left from previous code as it is an interresting note.
3497 /* First field in pair is in short term list or
3498 * at a different long term index.
3499 * This is not allowed; see 7.4.3, notes 2 and 3.
3500 * Report the problem and keep the pair where it is,
3501 * and mark this field valid.
3504 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3505 remove_long(h, mmco[i].long_arg, 0);
3507 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3508 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3509 h->long_ref_count++;
3512 s->current_picture_ptr->reference |= s->picture_structure;
3513 current_ref_assigned=1;
3515 case MMCO_SET_MAX_LONG:
3516 assert(mmco[i].long_arg <= 16);
3517 // just remove the long term which index is greater than new max
3518 for(j = mmco[i].long_arg; j<16; j++){
3519 remove_long(h, j, 0);
3523 while(h->short_ref_count){
3524 remove_short(h, h->short_ref[0]->frame_num, 0);
3526 for(j = 0; j < 16; j++) {
3527 remove_long(h, j, 0);
3529 s->current_picture_ptr->poc=
3530 s->current_picture_ptr->field_poc[0]=
3531 s->current_picture_ptr->field_poc[1]=
3535 s->current_picture_ptr->frame_num= 0;
3541 if (!current_ref_assigned) {
3542 /* Second field of complementary field pair; the first field of
3543 * which is already referenced. If short referenced, it
3544 * should be first entry in short_ref. If not, it must exist
3545 * in long_ref; trying to put it on the short list here is an
3546 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3548 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3549 /* Just mark the second field valid */
3550 s->current_picture_ptr->reference = PICT_FRAME;
3551 } else if (s->current_picture_ptr->long_ref) {
3552 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3553 "assignment for second field "
3554 "in complementary field pair "
3555 "(first field is long term)\n");
3557 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3559 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3562 if(h->short_ref_count)
3563 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3565 h->short_ref[0]= s->current_picture_ptr;
3566 h->short_ref_count++;
3567 s->current_picture_ptr->reference |= s->picture_structure;
3571 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3573 /* We have too many reference frames, probably due to corrupted
3574 * stream. Need to discard one frame. Prevents overrun of the
3575 * short_ref and long_ref buffers.
3577 av_log(h->s.avctx, AV_LOG_ERROR,
3578 "number of reference frames exceeds max (probably "
3579 "corrupt input), discarding one\n");
3581 if (h->long_ref_count && !h->short_ref_count) {
3582 for (i = 0; i < 16; ++i)
3587 remove_long(h, i, 0);
3589 pic = h->short_ref[h->short_ref_count - 1];
3590 remove_short(h, pic->frame_num, 0);
3594 print_short_term(h);
3599 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3600 MpegEncContext * const s = &h->s;
3604 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3605 s->broken_link= get_bits1(gb) -1;
3607 h->mmco[0].opcode= MMCO_LONG;
3608 h->mmco[0].long_arg= 0;
3612 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3613 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3614 MMCOOpcode opcode= get_ue_golomb(gb);
3616 h->mmco[i].opcode= opcode;
3617 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3618 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3619 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3620 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3624 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3625 unsigned int long_arg= get_ue_golomb(gb);
3626 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3627 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3630 h->mmco[i].long_arg= long_arg;
3633 if(opcode > (unsigned)MMCO_LONG){
3634 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3637 if(opcode == MMCO_END)
3642 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3644 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3645 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3646 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3647 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3649 if (FIELD_PICTURE) {
3650 h->mmco[0].short_pic_num *= 2;
3651 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3652 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3662 static int init_poc(H264Context *h){
3663 MpegEncContext * const s = &h->s;
3664 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3666 Picture *cur = s->current_picture_ptr;
3668 h->frame_num_offset= h->prev_frame_num_offset;
3669 if(h->frame_num < h->prev_frame_num)
3670 h->frame_num_offset += max_frame_num;
3672 if(h->sps.poc_type==0){
3673 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3675 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3676 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3677 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3678 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3680 h->poc_msb = h->prev_poc_msb;
3681 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3683 field_poc[1] = h->poc_msb + h->poc_lsb;
3684 if(s->picture_structure == PICT_FRAME)
3685 field_poc[1] += h->delta_poc_bottom;
3686 }else if(h->sps.poc_type==1){
3687 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3690 if(h->sps.poc_cycle_length != 0)
3691 abs_frame_num = h->frame_num_offset + h->frame_num;
3695 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3698 expected_delta_per_poc_cycle = 0;
3699 for(i=0; i < h->sps.poc_cycle_length; i++)
3700 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3702 if(abs_frame_num > 0){
3703 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3704 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3706 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3707 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3708 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3712 if(h->nal_ref_idc == 0)
3713 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3715 field_poc[0] = expectedpoc + h->delta_poc[0];
3716 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3718 if(s->picture_structure == PICT_FRAME)
3719 field_poc[1] += h->delta_poc[1];
3721 int poc= 2*(h->frame_num_offset + h->frame_num);
3730 if(s->picture_structure != PICT_BOTTOM_FIELD)
3731 s->current_picture_ptr->field_poc[0]= field_poc[0];
3732 if(s->picture_structure != PICT_TOP_FIELD)
3733 s->current_picture_ptr->field_poc[1]= field_poc[1];
3734 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3741 * initialize scan tables
3743 static void init_scan_tables(H264Context *h){
3744 MpegEncContext * const s = &h->s;
3746 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3747 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3748 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3750 for(i=0; i<16; i++){
3751 #define T(x) (x>>2) | ((x<<2) & 0xF)
3752 h->zigzag_scan[i] = T(zigzag_scan[i]);
3753 h-> field_scan[i] = T( field_scan[i]);
3757 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3758 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3759 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3760 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3761 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3763 for(i=0; i<64; i++){
3764 #define T(x) (x>>3) | ((x&7)<<3)
3765 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3766 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3767 h->field_scan8x8[i] = T(field_scan8x8[i]);
3768 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3772 if(h->sps.transform_bypass){ //FIXME same ugly
3773 h->zigzag_scan_q0 = zigzag_scan;
3774 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3775 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3776 h->field_scan_q0 = field_scan;
3777 h->field_scan8x8_q0 = field_scan8x8;
3778 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3780 h->zigzag_scan_q0 = h->zigzag_scan;
3781 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3782 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3783 h->field_scan_q0 = h->field_scan;
3784 h->field_scan8x8_q0 = h->field_scan8x8;
3785 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3790 * Replicates H264 "master" context to thread contexts.
3792 static void clone_slice(H264Context *dst, H264Context *src)
3794 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3795 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3796 dst->s.current_picture = src->s.current_picture;
3797 dst->s.linesize = src->s.linesize;
3798 dst->s.uvlinesize = src->s.uvlinesize;
3799 dst->s.first_field = src->s.first_field;
3801 dst->prev_poc_msb = src->prev_poc_msb;
3802 dst->prev_poc_lsb = src->prev_poc_lsb;
3803 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3804 dst->prev_frame_num = src->prev_frame_num;
3805 dst->short_ref_count = src->short_ref_count;
3807 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3808 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3809 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3810 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3812 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3813 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3817 * decodes a slice header.
3818 * This will also call MPV_common_init() and frame_start() as needed.
3820 * @param h h264context
3821 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3823 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3825 static int decode_slice_header(H264Context *h, H264Context *h0){
3826 MpegEncContext * const s = &h->s;
3827 MpegEncContext * const s0 = &h0->s;
3828 unsigned int first_mb_in_slice;
3829 unsigned int pps_id;
3830 int num_ref_idx_active_override_flag;
3831 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3832 unsigned int slice_type, tmp, i, j;
3833 int default_ref_list_done = 0;
3834 int last_pic_structure;
3836 s->dropable= h->nal_ref_idc == 0;
3838 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3839 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3840 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3842 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3843 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3846 first_mb_in_slice= get_ue_golomb(&s->gb);
3848 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3849 h0->current_slice = 0;
3850 if (!s0->first_field)
3851 s->current_picture_ptr= NULL;
3854 slice_type= get_ue_golomb(&s->gb);
3856 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3861 h->slice_type_fixed=1;
3863 h->slice_type_fixed=0;
3865 slice_type= slice_type_map[ slice_type ];
3866 if (slice_type == FF_I_TYPE
3867 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3868 default_ref_list_done = 1;
3870 h->slice_type= slice_type;
3871 h->slice_type_nos= slice_type & 3;
3873 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3874 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3875 av_log(h->s.avctx, AV_LOG_ERROR,
3876 "B picture before any references, skipping\n");
3880 pps_id= get_ue_golomb(&s->gb);
3881 if(pps_id>=MAX_PPS_COUNT){
3882 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3885 if(!h0->pps_buffers[pps_id]) {
3886 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3889 h->pps= *h0->pps_buffers[pps_id];
3891 if(!h0->sps_buffers[h->pps.sps_id]) {
3892 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3895 h->sps = *h0->sps_buffers[h->pps.sps_id];
3897 if(h == h0 && h->dequant_coeff_pps != pps_id){
3898 h->dequant_coeff_pps = pps_id;
3899 init_dequant_tables(h);
3902 s->mb_width= h->sps.mb_width;
3903 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3905 h->b_stride= s->mb_width*4;
3906 h->b8_stride= s->mb_width*2;
3908 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3909 if(h->sps.frame_mbs_only_flag)
3910 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3912 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3914 if (s->context_initialized
3915 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3917 return -1; // width / height changed during parallelized decoding
3921 if (!s->context_initialized) {
3923 return -1; // we cant (re-)initialize context during parallel decoding
3924 if (MPV_common_init(s) < 0)
3928 init_scan_tables(h);
3931 for(i = 1; i < s->avctx->thread_count; i++) {
3933 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3934 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3935 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3938 init_scan_tables(c);
3942 for(i = 0; i < s->avctx->thread_count; i++)
3943 if(context_init(h->thread_context[i]) < 0)
3946 s->avctx->width = s->width;
3947 s->avctx->height = s->height;
3948 s->avctx->sample_aspect_ratio= h->sps.sar;
3949 if(!s->avctx->sample_aspect_ratio.den)
3950 s->avctx->sample_aspect_ratio.den = 1;
3952 if(h->sps.timing_info_present_flag){
3953 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3954 if(h->x264_build > 0 && h->x264_build < 44)
3955 s->avctx->time_base.den *= 2;
3956 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3957 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3961 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3964 h->mb_aff_frame = 0;
3965 last_pic_structure = s0->picture_structure;
3966 if(h->sps.frame_mbs_only_flag){
3967 s->picture_structure= PICT_FRAME;
3969 if(get_bits1(&s->gb)) { //field_pic_flag
3970 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3972 s->picture_structure= PICT_FRAME;
3973 h->mb_aff_frame = h->sps.mb_aff;
3977 if(h0->current_slice == 0){
3978 while(h->frame_num != h->prev_frame_num &&
3979 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3980 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3982 h->prev_frame_num++;
3983 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3984 s->current_picture_ptr->frame_num= h->prev_frame_num;
3985 execute_ref_pic_marking(h, NULL, 0);
3988 /* See if we have a decoded first field looking for a pair... */
3989 if (s0->first_field) {
3990 assert(s0->current_picture_ptr);
3991 assert(s0->current_picture_ptr->data[0]);
3992 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3994 /* figure out if we have a complementary field pair */
3995 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3997 * Previous field is unmatched. Don't display it, but let it
3998 * remain for reference if marked as such.
4000 s0->current_picture_ptr = NULL;
4001 s0->first_field = FIELD_PICTURE;
4004 if (h->nal_ref_idc &&
4005 s0->current_picture_ptr->reference &&
4006 s0->current_picture_ptr->frame_num != h->frame_num) {
4008 * This and previous field were reference, but had
4009 * different frame_nums. Consider this field first in
4010 * pair. Throw away previous field except for reference
4013 s0->first_field = 1;
4014 s0->current_picture_ptr = NULL;
4017 /* Second field in complementary pair */
4018 s0->first_field = 0;
4023 /* Frame or first field in a potentially complementary pair */
4024 assert(!s0->current_picture_ptr);
4025 s0->first_field = FIELD_PICTURE;
4028 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4029 s0->first_field = 0;
4036 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4038 assert(s->mb_num == s->mb_width * s->mb_height);
4039 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4040 first_mb_in_slice >= s->mb_num){
4041 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4044 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4045 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4046 if (s->picture_structure == PICT_BOTTOM_FIELD)
4047 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4048 assert(s->mb_y < s->mb_height);
4050 if(s->picture_structure==PICT_FRAME){
4051 h->curr_pic_num= h->frame_num;
4052 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4054 h->curr_pic_num= 2*h->frame_num + 1;
4055 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4058 if(h->nal_unit_type == NAL_IDR_SLICE){
4059 get_ue_golomb(&s->gb); /* idr_pic_id */
4062 if(h->sps.poc_type==0){
4063 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4065 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4066 h->delta_poc_bottom= get_se_golomb(&s->gb);
4070 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4071 h->delta_poc[0]= get_se_golomb(&s->gb);
4073 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4074 h->delta_poc[1]= get_se_golomb(&s->gb);
4079 if(h->pps.redundant_pic_cnt_present){
4080 h->redundant_pic_count= get_ue_golomb(&s->gb);
4083 //set defaults, might be overridden a few lines later
4084 h->ref_count[0]= h->pps.ref_count[0];
4085 h->ref_count[1]= h->pps.ref_count[1];
4087 if(h->slice_type_nos != FF_I_TYPE){
4088 if(h->slice_type_nos == FF_B_TYPE){
4089 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4091 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4093 if(num_ref_idx_active_override_flag){
4094 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4095 if(h->slice_type_nos==FF_B_TYPE)
4096 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4098 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4099 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4100 h->ref_count[0]= h->ref_count[1]= 1;
4104 if(h->slice_type_nos == FF_B_TYPE)
4111 if(!default_ref_list_done){
4112 fill_default_ref_list(h);
4115 if(decode_ref_pic_list_reordering(h) < 0)
4118 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4119 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4120 pred_weight_table(h);
4121 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4122 implicit_weight_table(h);
4127 decode_ref_pic_marking(h0, &s->gb);
4130 fill_mbaff_ref_list(h);
4132 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4133 tmp = get_ue_golomb(&s->gb);
4135 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4138 h->cabac_init_idc= tmp;
4141 h->last_qscale_diff = 0;
4142 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4144 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4148 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4149 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4150 //FIXME qscale / qp ... stuff
4151 if(h->slice_type == FF_SP_TYPE){
4152 get_bits1(&s->gb); /* sp_for_switch_flag */
4154 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4155 get_se_golomb(&s->gb); /* slice_qs_delta */
4158 h->deblocking_filter = 1;
4159 h->slice_alpha_c0_offset = 0;
4160 h->slice_beta_offset = 0;
4161 if( h->pps.deblocking_filter_parameters_present ) {
4162 tmp= get_ue_golomb(&s->gb);
4164 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4167 h->deblocking_filter= tmp;
4168 if(h->deblocking_filter < 2)
4169 h->deblocking_filter^= 1; // 1<->0
4171 if( h->deblocking_filter ) {
4172 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4173 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4177 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4178 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4179 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4180 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4181 h->deblocking_filter= 0;
4183 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4184 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4185 /* Cheat slightly for speed:
4186 Do not bother to deblock across slices. */
4187 h->deblocking_filter = 2;
4189 h0->max_contexts = 1;
4190 if(!h0->single_decode_warning) {
4191 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4192 h0->single_decode_warning = 1;
4195 return 1; // deblocking switched inside frame
4200 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4201 slice_group_change_cycle= get_bits(&s->gb, ?);
4204 h0->last_slice_type = slice_type;
4205 h->slice_num = ++h0->current_slice;
4208 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4212 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4213 +(h->ref_list[j][i].reference&3);
4216 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4217 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4219 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4220 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4222 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4224 av_get_pict_type_char(h->slice_type),
4225 pps_id, h->frame_num,
4226 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4227 h->ref_count[0], h->ref_count[1],
4229 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4231 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4232 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4242 static inline int get_level_prefix(GetBitContext *gb){
4246 OPEN_READER(re, gb);
4247 UPDATE_CACHE(re, gb);
4248 buf=GET_CACHE(re, gb);
4250 log= 32 - av_log2(buf);
4252 print_bin(buf>>(32-log), log);
4253 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4256 LAST_SKIP_BITS(re, gb, log);
4257 CLOSE_READER(re, gb);
4262 static inline int get_dct8x8_allowed(H264Context *h){
4265 if(!IS_SUB_8X8(h->sub_mb_type[i])
4266 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4273 * decodes a residual block.
4274 * @param n block index
4275 * @param scantable scantable
4276 * @param max_coeff number of coefficients in the block
4277 * @return <0 if an error occurred
4279 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4280 MpegEncContext * const s = &h->s;
4281 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4283 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4285 //FIXME put trailing_onex into the context
4287 if(n == CHROMA_DC_BLOCK_INDEX){
4288 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4289 total_coeff= coeff_token>>2;
4291 if(n == LUMA_DC_BLOCK_INDEX){
4292 total_coeff= pred_non_zero_count(h, 0);
4293 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4294 total_coeff= coeff_token>>2;
4296 total_coeff= pred_non_zero_count(h, n);
4297 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4298 total_coeff= coeff_token>>2;
4299 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4303 //FIXME set last_non_zero?
4307 if(total_coeff > (unsigned)max_coeff) {
4308 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4312 trailing_ones= coeff_token&3;
4313 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4314 assert(total_coeff<=16);
4316 for(i=0; i<trailing_ones; i++){
4317 level[i]= 1 - 2*get_bits1(gb);
4321 int level_code, mask;
4322 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4323 int prefix= get_level_prefix(gb);
4325 //first coefficient has suffix_length equal to 0 or 1
4326 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4328 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4330 level_code= (prefix<<suffix_length); //part
4331 }else if(prefix==14){
4333 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4335 level_code= prefix + get_bits(gb, 4); //part
4337 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4338 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4340 level_code += (1<<(prefix-3))-4096;
4343 if(trailing_ones < 3) level_code += 2;
4348 mask= -(level_code&1);
4349 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4352 //remaining coefficients have suffix_length > 0
4353 for(;i<total_coeff;i++) {
4354 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4355 prefix = get_level_prefix(gb);
4357 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4359 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4361 level_code += (1<<(prefix-3))-4096;
4363 mask= -(level_code&1);
4364 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4365 if(level_code > suffix_limit[suffix_length])
4370 if(total_coeff == max_coeff)
4373 if(n == CHROMA_DC_BLOCK_INDEX)
4374 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4376 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4379 coeff_num = zeros_left + total_coeff - 1;
4380 j = scantable[coeff_num];
4382 block[j] = level[0];
4383 for(i=1;i<total_coeff;i++) {
4386 else if(zeros_left < 7){
4387 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4389 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4391 zeros_left -= run_before;
4392 coeff_num -= 1 + run_before;
4393 j= scantable[ coeff_num ];
4398 block[j] = (level[0] * qmul[j] + 32)>>6;
4399 for(i=1;i<total_coeff;i++) {
4402 else if(zeros_left < 7){
4403 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4405 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4407 zeros_left -= run_before;
4408 coeff_num -= 1 + run_before;
4409 j= scantable[ coeff_num ];
4411 block[j]= (level[i] * qmul[j] + 32)>>6;
4416 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4423 static void predict_field_decoding_flag(H264Context *h){
4424 MpegEncContext * const s = &h->s;
4425 const int mb_xy= h->mb_xy;
4426 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4427 ? s->current_picture.mb_type[mb_xy-1]
4428 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4429 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4431 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4435 * decodes a P_SKIP or B_SKIP macroblock
4437 static void decode_mb_skip(H264Context *h){
4438 MpegEncContext * const s = &h->s;
4439 const int mb_xy= h->mb_xy;
4442 memset(h->non_zero_count[mb_xy], 0, 16);
4443 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4446 mb_type|= MB_TYPE_INTERLACED;
4448 if( h->slice_type_nos == FF_B_TYPE )
4450 // just for fill_caches. pred_direct_motion will set the real mb_type
4451 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4453 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4454 pred_direct_motion(h, &mb_type);
4455 mb_type|= MB_TYPE_SKIP;
4460 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4462 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4463 pred_pskip_motion(h, &mx, &my);
4464 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4465 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4468 write_back_motion(h, mb_type);
4469 s->current_picture.mb_type[mb_xy]= mb_type;
4470 s->current_picture.qscale_table[mb_xy]= s->qscale;
4471 h->slice_table[ mb_xy ]= h->slice_num;
4472 h->prev_mb_skipped= 1;
4476 * decodes a macroblock
4477 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4479 static int decode_mb_cavlc(H264Context *h){
4480 MpegEncContext * const s = &h->s;
4482 int partition_count;
4483 unsigned int mb_type, cbp;
4484 int dct8x8_allowed= h->pps.transform_8x8_mode;
4486 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4488 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4490 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4491 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4493 if(h->slice_type_nos != FF_I_TYPE){
4494 if(s->mb_skip_run==-1)
4495 s->mb_skip_run= get_ue_golomb(&s->gb);
4497 if (s->mb_skip_run--) {
4498 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4499 if(s->mb_skip_run==0)
4500 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4502 predict_field_decoding_flag(h);
4509 if( (s->mb_y&1) == 0 )
4510 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4512 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4514 h->prev_mb_skipped= 0;
4516 mb_type= get_ue_golomb(&s->gb);
4517 if(h->slice_type_nos == FF_B_TYPE){
4519 partition_count= b_mb_type_info[mb_type].partition_count;
4520 mb_type= b_mb_type_info[mb_type].type;
4523 goto decode_intra_mb;
4525 }else if(h->slice_type_nos == FF_P_TYPE){
4527 partition_count= p_mb_type_info[mb_type].partition_count;
4528 mb_type= p_mb_type_info[mb_type].type;
4531 goto decode_intra_mb;
4534 assert(h->slice_type_nos == FF_I_TYPE);
4535 if(h->slice_type == FF_SI_TYPE && mb_type)
4539 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4543 cbp= i_mb_type_info[mb_type].cbp;
4544 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4545 mb_type= i_mb_type_info[mb_type].type;
4549 mb_type |= MB_TYPE_INTERLACED;
4551 h->slice_table[ mb_xy ]= h->slice_num;
4553 if(IS_INTRA_PCM(mb_type)){
4556 // We assume these blocks are very rare so we do not optimize it.
4557 align_get_bits(&s->gb);
4559 // The pixels are stored in the same order as levels in h->mb array.
4560 for(y=0; y<16; y++){
4561 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4562 for(x=0; x<16; x++){
4563 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4564 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4568 const int index= 256 + 4*(y&3) + 32*(y>>2);
4570 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4571 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4575 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4577 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4578 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4582 // In deblocking, the quantizer is 0
4583 s->current_picture.qscale_table[mb_xy]= 0;
4584 // All coeffs are present
4585 memset(h->non_zero_count[mb_xy], 16, 16);
4587 s->current_picture.mb_type[mb_xy]= mb_type;
4592 h->ref_count[0] <<= 1;
4593 h->ref_count[1] <<= 1;
4596 fill_caches(h, mb_type, 0);
4599 if(IS_INTRA(mb_type)){
4601 // init_top_left_availability(h);
4602 if(IS_INTRA4x4(mb_type)){
4605 if(dct8x8_allowed && get_bits1(&s->gb)){
4606 mb_type |= MB_TYPE_8x8DCT;
4610 // fill_intra4x4_pred_table(h);
4611 for(i=0; i<16; i+=di){
4612 int mode= pred_intra_mode(h, i);
4614 if(!get_bits1(&s->gb)){
4615 const int rem_mode= get_bits(&s->gb, 3);
4616 mode = rem_mode + (rem_mode >= mode);
4620 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4622 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4624 write_back_intra_pred_mode(h);
4625 if( check_intra4x4_pred_mode(h) < 0)
4628 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4629 if(h->intra16x16_pred_mode < 0)
4633 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4636 h->chroma_pred_mode= pred_mode;
4637 }else if(partition_count==4){
4638 int i, j, sub_partition_count[4], list, ref[2][4];
4640 if(h->slice_type_nos == FF_B_TYPE){
4642 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4643 if(h->sub_mb_type[i] >=13){
4644 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4647 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4648 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4650 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4651 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4652 pred_direct_motion(h, &mb_type);
4653 h->ref_cache[0][scan8[4]] =
4654 h->ref_cache[1][scan8[4]] =
4655 h->ref_cache[0][scan8[12]] =
4656 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4659 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4661 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4662 if(h->sub_mb_type[i] >=4){
4663 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4666 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4667 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4671 for(list=0; list<h->list_count; list++){
4672 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4674 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4675 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4676 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4678 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4690 dct8x8_allowed = get_dct8x8_allowed(h);
4692 for(list=0; list<h->list_count; list++){
4694 if(IS_DIRECT(h->sub_mb_type[i])) {
4695 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4698 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4699 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4701 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4702 const int sub_mb_type= h->sub_mb_type[i];
4703 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4704 for(j=0; j<sub_partition_count[i]; j++){
4706 const int index= 4*i + block_width*j;
4707 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4708 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4709 mx += get_se_golomb(&s->gb);
4710 my += get_se_golomb(&s->gb);
4711 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4713 if(IS_SUB_8X8(sub_mb_type)){
4715 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4717 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4718 }else if(IS_SUB_8X4(sub_mb_type)){
4719 mv_cache[ 1 ][0]= mx;
4720 mv_cache[ 1 ][1]= my;
4721 }else if(IS_SUB_4X8(sub_mb_type)){
4722 mv_cache[ 8 ][0]= mx;
4723 mv_cache[ 8 ][1]= my;
4725 mv_cache[ 0 ][0]= mx;
4726 mv_cache[ 0 ][1]= my;
4729 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4735 }else if(IS_DIRECT(mb_type)){
4736 pred_direct_motion(h, &mb_type);
4737 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4739 int list, mx, my, i;
4740 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4741 if(IS_16X16(mb_type)){
4742 for(list=0; list<h->list_count; list++){
4744 if(IS_DIR(mb_type, 0, list)){
4745 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4746 if(val >= h->ref_count[list]){
4747 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4751 val= LIST_NOT_USED&0xFF;
4752 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4754 for(list=0; list<h->list_count; list++){
4756 if(IS_DIR(mb_type, 0, list)){
4757 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4758 mx += get_se_golomb(&s->gb);
4759 my += get_se_golomb(&s->gb);
4760 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4762 val= pack16to32(mx,my);
4765 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4768 else if(IS_16X8(mb_type)){
4769 for(list=0; list<h->list_count; list++){
4772 if(IS_DIR(mb_type, i, list)){
4773 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4774 if(val >= h->ref_count[list]){
4775 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4779 val= LIST_NOT_USED&0xFF;
4780 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4783 for(list=0; list<h->list_count; list++){
4786 if(IS_DIR(mb_type, i, list)){
4787 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4788 mx += get_se_golomb(&s->gb);
4789 my += get_se_golomb(&s->gb);
4790 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4792 val= pack16to32(mx,my);
4795 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4799 assert(IS_8X16(mb_type));
4800 for(list=0; list<h->list_count; list++){
4803 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4804 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4805 if(val >= h->ref_count[list]){
4806 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4810 val= LIST_NOT_USED&0xFF;
4811 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4814 for(list=0; list<h->list_count; list++){
4817 if(IS_DIR(mb_type, i, list)){
4818 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4819 mx += get_se_golomb(&s->gb);
4820 my += get_se_golomb(&s->gb);
4821 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4823 val= pack16to32(mx,my);
4826 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4832 if(IS_INTER(mb_type))
4833 write_back_motion(h, mb_type);
4835 if(!IS_INTRA16x16(mb_type)){
4836 cbp= get_ue_golomb(&s->gb);
4838 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4842 if(IS_INTRA4x4(mb_type))
4843 cbp= golomb_to_intra4x4_cbp[cbp];
4845 cbp= golomb_to_inter_cbp[cbp];
4849 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4850 if(get_bits1(&s->gb)){
4851 mb_type |= MB_TYPE_8x8DCT;
4852 h->cbp_table[mb_xy]= cbp;
4855 s->current_picture.mb_type[mb_xy]= mb_type;
4857 if(cbp || IS_INTRA16x16(mb_type)){
4858 int i8x8, i4x4, chroma_idx;
4860 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4861 const uint8_t *scan, *scan8x8, *dc_scan;
4863 // fill_non_zero_count_cache(h);
4865 if(IS_INTERLACED(mb_type)){
4866 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4867 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4868 dc_scan= luma_dc_field_scan;
4870 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4871 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4872 dc_scan= luma_dc_zigzag_scan;
4875 dquant= get_se_golomb(&s->gb);
4877 if( dquant > 25 || dquant < -26 ){
4878 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4882 s->qscale += dquant;
4883 if(((unsigned)s->qscale) > 51){
4884 if(s->qscale<0) s->qscale+= 52;
4885 else s->qscale-= 52;
4888 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4889 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4890 if(IS_INTRA16x16(mb_type)){
4891 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4892 return -1; //FIXME continue if partitioned and other return -1 too
4895 assert((cbp&15) == 0 || (cbp&15) == 15);
4898 for(i8x8=0; i8x8<4; i8x8++){
4899 for(i4x4=0; i4x4<4; i4x4++){
4900 const int index= i4x4 + 4*i8x8;
4901 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4907 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4910 for(i8x8=0; i8x8<4; i8x8++){
4911 if(cbp & (1<<i8x8)){
4912 if(IS_8x8DCT(mb_type)){
4913 DCTELEM *buf = &h->mb[64*i8x8];
4915 for(i4x4=0; i4x4<4; i4x4++){
4916 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4917 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4920 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4921 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4923 for(i4x4=0; i4x4<4; i4x4++){
4924 const int index= i4x4 + 4*i8x8;
4926 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4932 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4933 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4939 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4940 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4946 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4947 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4948 for(i4x4=0; i4x4<4; i4x4++){
4949 const int index= 16 + 4*chroma_idx + i4x4;
4950 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4956 uint8_t * const nnz= &h->non_zero_count_cache[0];
4957 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4958 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4961 uint8_t * const nnz= &h->non_zero_count_cache[0];
4962 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4963 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4964 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4966 s->current_picture.qscale_table[mb_xy]= s->qscale;
4967 write_back_non_zero_count(h);
4970 h->ref_count[0] >>= 1;
4971 h->ref_count[1] >>= 1;
4977 static int decode_cabac_field_decoding_flag(H264Context *h) {
4978 MpegEncContext * const s = &h->s;
4979 const int mb_x = s->mb_x;
4980 const int mb_y = s->mb_y & ~1;
4981 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4982 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4984 unsigned int ctx = 0;
4986 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4989 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4993 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4996 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4997 uint8_t *state= &h->cabac_state[ctx_base];
5001 MpegEncContext * const s = &h->s;
5002 const int mba_xy = h->left_mb_xy[0];
5003 const int mbb_xy = h->top_mb_xy;
5005 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5007 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5009 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5010 return 0; /* I4x4 */
5013 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5014 return 0; /* I4x4 */
5017 if( get_cabac_terminate( &h->cabac ) )
5018 return 25; /* PCM */
5020 mb_type = 1; /* I16x16 */
5021 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5022 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5023 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5024 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5025 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5029 static int decode_cabac_mb_type( H264Context *h ) {
5030 MpegEncContext * const s = &h->s;
5032 if( h->slice_type_nos == FF_I_TYPE ) {
5033 return decode_cabac_intra_mb_type(h, 3, 1);
5034 } else if( h->slice_type_nos == FF_P_TYPE ) {
5035 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5037 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5038 /* P_L0_D16x16, P_8x8 */
5039 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5041 /* P_L0_D8x16, P_L0_D16x8 */
5042 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5045 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5047 } else if( h->slice_type_nos == FF_B_TYPE ) {
5048 const int mba_xy = h->left_mb_xy[0];
5049 const int mbb_xy = h->top_mb_xy;
5053 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5055 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5058 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5059 return 0; /* B_Direct_16x16 */
5061 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5062 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5065 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5066 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5067 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5068 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5070 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5071 else if( bits == 13 ) {
5072 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5073 } else if( bits == 14 )
5074 return 11; /* B_L1_L0_8x16 */
5075 else if( bits == 15 )
5076 return 22; /* B_8x8 */
5078 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5079 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5081 /* TODO SI/SP frames? */
5086 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5087 MpegEncContext * const s = &h->s;
5091 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5092 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5095 && h->slice_table[mba_xy] == h->slice_num
5096 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5097 mba_xy += s->mb_stride;
5099 mbb_xy = mb_xy - s->mb_stride;
5101 && h->slice_table[mbb_xy] == h->slice_num
5102 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5103 mbb_xy -= s->mb_stride;
5105 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5107 int mb_xy = h->mb_xy;
5109 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5112 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5114 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5117 if( h->slice_type_nos == FF_B_TYPE )
5119 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5122 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5125 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5128 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5129 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5130 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5132 if( mode >= pred_mode )
5138 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5139 const int mba_xy = h->left_mb_xy[0];
5140 const int mbb_xy = h->top_mb_xy;
5144 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5145 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5148 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5151 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5154 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5156 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5162 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5163 int cbp_b, cbp_a, ctx, cbp = 0;
5165 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5166 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5168 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5169 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5170 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5171 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5172 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5173 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5174 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5175 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5178 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5182 cbp_a = (h->left_cbp>>4)&0x03;
5183 cbp_b = (h-> top_cbp>>4)&0x03;
5186 if( cbp_a > 0 ) ctx++;
5187 if( cbp_b > 0 ) ctx += 2;
5188 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5192 if( cbp_a == 2 ) ctx++;
5193 if( cbp_b == 2 ) ctx += 2;
5194 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5196 static int decode_cabac_mb_dqp( H264Context *h) {
5200 if( h->last_qscale_diff != 0 )
5203 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5209 if(val > 102) //prevent infinite loop
5216 return -(val + 1)/2;
5218 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5219 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5221 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5223 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5227 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5229 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5230 return 0; /* B_Direct_8x8 */
5231 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5232 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5234 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5235 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5236 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5239 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5240 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5244 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5245 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5248 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5249 int refa = h->ref_cache[list][scan8[n] - 1];
5250 int refb = h->ref_cache[list][scan8[n] - 8];
5254 if( h->slice_type_nos == FF_B_TYPE) {
5255 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5257 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5266 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5272 if(ref >= 32 /*h->ref_list[list]*/){
5273 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5274 return 0; //FIXME we should return -1 and check the return everywhere
5280 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5281 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5282 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5283 int ctxbase = (l == 0) ? 40 : 47;
5288 else if( amvd > 32 )
5293 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5298 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5306 while( get_cabac_bypass( &h->cabac ) ) {
5310 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5315 if( get_cabac_bypass( &h->cabac ) )
5319 return get_cabac_bypass_sign( &h->cabac, -mvd );
5322 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5328 nza = h->left_cbp&0x100;
5329 nzb = h-> top_cbp&0x100;
5331 nza = (h->left_cbp>>(6+idx))&0x01;
5332 nzb = (h-> top_cbp>>(6+idx))&0x01;
5336 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5337 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5339 assert(cat == 1 || cat == 2);
5340 nza = h->non_zero_count_cache[scan8[idx] - 1];
5341 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5351 return ctx + 4 * cat;
5354 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5355 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5356 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5357 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5358 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5361 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5362 static const int significant_coeff_flag_offset[2][6] = {
5363 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5364 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5366 static const int last_coeff_flag_offset[2][6] = {
5367 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5368 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5370 static const int coeff_abs_level_m1_offset[6] = {
5371 227+0, 227+10, 227+20, 227+30, 227+39, 426
5373 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5374 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5375 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5376 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5377 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5378 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5379 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5380 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5381 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5383 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5384 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5385 * map node ctx => cabac ctx for level=1 */
5386 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5387 /* map node ctx => cabac ctx for level>1 */
5388 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5389 static const uint8_t coeff_abs_level_transition[2][8] = {
5390 /* update node ctx after decoding a level=1 */
5391 { 1, 2, 3, 3, 4, 5, 6, 7 },
5392 /* update node ctx after decoding a level>1 */
5393 { 4, 4, 4, 4, 5, 6, 7, 7 }
5399 int coeff_count = 0;
5402 uint8_t *significant_coeff_ctx_base;
5403 uint8_t *last_coeff_ctx_base;
5404 uint8_t *abs_level_m1_ctx_base;
5407 #define CABAC_ON_STACK
5409 #ifdef CABAC_ON_STACK
5412 cc.range = h->cabac.range;
5413 cc.low = h->cabac.low;
5414 cc.bytestream= h->cabac.bytestream;
5416 #define CC &h->cabac
5420 /* cat: 0-> DC 16x16 n = 0
5421 * 1-> AC 16x16 n = luma4x4idx
5422 * 2-> Luma4x4 n = luma4x4idx
5423 * 3-> DC Chroma n = iCbCr
5424 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5425 * 5-> Luma8x8 n = 4 * luma8x8idx
5428 /* read coded block flag */
5429 if( is_dc || cat != 5 ) {
5430 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5433 h->non_zero_count_cache[scan8[16+n]] = 0;
5435 h->non_zero_count_cache[scan8[n]] = 0;
5438 #ifdef CABAC_ON_STACK
5439 h->cabac.range = cc.range ;
5440 h->cabac.low = cc.low ;
5441 h->cabac.bytestream= cc.bytestream;
5447 significant_coeff_ctx_base = h->cabac_state
5448 + significant_coeff_flag_offset[MB_FIELD][cat];
5449 last_coeff_ctx_base = h->cabac_state
5450 + last_coeff_flag_offset[MB_FIELD][cat];
5451 abs_level_m1_ctx_base = h->cabac_state
5452 + coeff_abs_level_m1_offset[cat];
5454 if( !is_dc && cat == 5 ) {
5455 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5456 for(last= 0; last < coefs; last++) { \
5457 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5458 if( get_cabac( CC, sig_ctx )) { \
5459 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5460 index[coeff_count++] = last; \
5461 if( get_cabac( CC, last_ctx ) ) { \
5467 if( last == max_coeff -1 ) {\
5468 index[coeff_count++] = last;\
5470 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5471 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5472 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5474 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5476 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5478 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5481 assert(coeff_count > 0);
5485 h->cbp_table[h->mb_xy] |= 0x100;
5487 h->cbp_table[h->mb_xy] |= 0x40 << n;
5490 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5492 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5494 assert( cat == 1 || cat == 2 );
5495 h->non_zero_count_cache[scan8[n]] = coeff_count;
5499 while( coeff_count-- ) {
5500 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5502 int j= scantable[index[coeff_count]];
5504 if( get_cabac( CC, ctx ) == 0 ) {
5505 node_ctx = coeff_abs_level_transition[0][node_ctx];
5507 block[j] = get_cabac_bypass_sign( CC, -1);
5509 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5513 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5514 node_ctx = coeff_abs_level_transition[1][node_ctx];
5516 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5520 if( coeff_abs >= 15 ) {
5522 while( get_cabac_bypass( CC ) ) {
5528 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5534 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5536 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5540 #ifdef CABAC_ON_STACK
5541 h->cabac.range = cc.range ;
5542 h->cabac.low = cc.low ;
5543 h->cabac.bytestream= cc.bytestream;
5548 #ifndef CONFIG_SMALL
5549 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5550 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5553 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5554 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5558 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5560 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5562 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5563 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5567 static inline void compute_mb_neighbors(H264Context *h)
5569 MpegEncContext * const s = &h->s;
5570 const int mb_xy = h->mb_xy;
5571 h->top_mb_xy = mb_xy - s->mb_stride;
5572 h->left_mb_xy[0] = mb_xy - 1;
5574 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5575 const int top_pair_xy = pair_xy - s->mb_stride;
5576 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5577 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5578 const int curr_mb_frame_flag = !MB_FIELD;
5579 const int bottom = (s->mb_y & 1);
5581 ? !curr_mb_frame_flag // bottom macroblock
5582 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5584 h->top_mb_xy -= s->mb_stride;
5586 if (left_mb_frame_flag != curr_mb_frame_flag) {
5587 h->left_mb_xy[0] = pair_xy - 1;
5589 } else if (FIELD_PICTURE) {
5590 h->top_mb_xy -= s->mb_stride;
5596 * decodes a macroblock
5597 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5599 static int decode_mb_cabac(H264Context *h) {
5600 MpegEncContext * const s = &h->s;
5602 int mb_type, partition_count, cbp = 0;
5603 int dct8x8_allowed= h->pps.transform_8x8_mode;
5605 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5607 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5609 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5610 if( h->slice_type_nos != FF_I_TYPE ) {
5612 /* a skipped mb needs the aff flag from the following mb */
5613 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5614 predict_field_decoding_flag(h);
5615 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5616 skip = h->next_mb_skipped;
5618 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5619 /* read skip flags */
5621 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5622 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5623 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5624 if(h->next_mb_skipped)
5625 predict_field_decoding_flag(h);
5627 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5632 h->cbp_table[mb_xy] = 0;
5633 h->chroma_pred_mode_table[mb_xy] = 0;
5634 h->last_qscale_diff = 0;
5641 if( (s->mb_y&1) == 0 )
5643 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5645 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5647 h->prev_mb_skipped = 0;
5649 compute_mb_neighbors(h);
5650 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5651 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5655 if( h->slice_type_nos == FF_B_TYPE ) {
5657 partition_count= b_mb_type_info[mb_type].partition_count;
5658 mb_type= b_mb_type_info[mb_type].type;
5661 goto decode_intra_mb;
5663 } else if( h->slice_type_nos == FF_P_TYPE ) {
5665 partition_count= p_mb_type_info[mb_type].partition_count;
5666 mb_type= p_mb_type_info[mb_type].type;
5669 goto decode_intra_mb;
5672 if(h->slice_type == FF_SI_TYPE && mb_type)
5674 assert(h->slice_type_nos == FF_I_TYPE);
5676 partition_count = 0;
5677 cbp= i_mb_type_info[mb_type].cbp;
5678 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5679 mb_type= i_mb_type_info[mb_type].type;
5682 mb_type |= MB_TYPE_INTERLACED;
5684 h->slice_table[ mb_xy ]= h->slice_num;
5686 if(IS_INTRA_PCM(mb_type)) {
5690 // We assume these blocks are very rare so we do not optimize it.
5691 // FIXME The two following lines get the bitstream position in the cabac
5692 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5693 ptr= h->cabac.bytestream;
5694 if(h->cabac.low&0x1) ptr--;
5696 if(h->cabac.low&0x1FF) ptr--;
5699 // The pixels are stored in the same order as levels in h->mb array.
5700 for(y=0; y<16; y++){
5701 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5702 for(x=0; x<16; x++){
5703 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5704 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5708 const int index= 256 + 4*(y&3) + 32*(y>>2);
5710 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5711 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5715 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5717 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5718 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5722 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5724 // All blocks are present
5725 h->cbp_table[mb_xy] = 0x1ef;
5726 h->chroma_pred_mode_table[mb_xy] = 0;
5727 // In deblocking, the quantizer is 0
5728 s->current_picture.qscale_table[mb_xy]= 0;
5729 // All coeffs are present
5730 memset(h->non_zero_count[mb_xy], 16, 16);
5731 s->current_picture.mb_type[mb_xy]= mb_type;
5732 h->last_qscale_diff = 0;
5737 h->ref_count[0] <<= 1;
5738 h->ref_count[1] <<= 1;
5741 fill_caches(h, mb_type, 0);
5743 if( IS_INTRA( mb_type ) ) {
5745 if( IS_INTRA4x4( mb_type ) ) {
5746 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5747 mb_type |= MB_TYPE_8x8DCT;
5748 for( i = 0; i < 16; i+=4 ) {
5749 int pred = pred_intra_mode( h, i );
5750 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5751 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5754 for( i = 0; i < 16; i++ ) {
5755 int pred = pred_intra_mode( h, i );
5756 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5758 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5761 write_back_intra_pred_mode(h);
5762 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5764 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5765 if( h->intra16x16_pred_mode < 0 ) return -1;
5767 h->chroma_pred_mode_table[mb_xy] =
5768 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5770 pred_mode= check_intra_pred_mode( h, pred_mode );
5771 if( pred_mode < 0 ) return -1;
5772 h->chroma_pred_mode= pred_mode;
5773 } else if( partition_count == 4 ) {
5774 int i, j, sub_partition_count[4], list, ref[2][4];
5776 if( h->slice_type_nos == FF_B_TYPE ) {
5777 for( i = 0; i < 4; i++ ) {
5778 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5779 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5780 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5782 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5783 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5784 pred_direct_motion(h, &mb_type);
5785 h->ref_cache[0][scan8[4]] =
5786 h->ref_cache[1][scan8[4]] =
5787 h->ref_cache[0][scan8[12]] =
5788 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5789 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5790 for( i = 0; i < 4; i++ )
5791 if( IS_DIRECT(h->sub_mb_type[i]) )
5792 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5796 for( i = 0; i < 4; i++ ) {
5797 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5798 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5799 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5803 for( list = 0; list < h->list_count; list++ ) {
5804 for( i = 0; i < 4; i++ ) {
5805 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5806 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5807 if( h->ref_count[list] > 1 )
5808 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5814 h->ref_cache[list][ scan8[4*i]+1 ]=
5815 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5820 dct8x8_allowed = get_dct8x8_allowed(h);
5822 for(list=0; list<h->list_count; list++){
5824 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5825 if(IS_DIRECT(h->sub_mb_type[i])){
5826 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5830 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5831 const int sub_mb_type= h->sub_mb_type[i];
5832 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5833 for(j=0; j<sub_partition_count[i]; j++){
5836 const int index= 4*i + block_width*j;
5837 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5838 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5839 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5841 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5842 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5843 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5845 if(IS_SUB_8X8(sub_mb_type)){
5847 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5849 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5852 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5854 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5855 }else if(IS_SUB_8X4(sub_mb_type)){
5856 mv_cache[ 1 ][0]= mx;
5857 mv_cache[ 1 ][1]= my;
5859 mvd_cache[ 1 ][0]= mx - mpx;
5860 mvd_cache[ 1 ][1]= my - mpy;
5861 }else if(IS_SUB_4X8(sub_mb_type)){
5862 mv_cache[ 8 ][0]= mx;
5863 mv_cache[ 8 ][1]= my;
5865 mvd_cache[ 8 ][0]= mx - mpx;
5866 mvd_cache[ 8 ][1]= my - mpy;
5868 mv_cache[ 0 ][0]= mx;
5869 mv_cache[ 0 ][1]= my;
5871 mvd_cache[ 0 ][0]= mx - mpx;
5872 mvd_cache[ 0 ][1]= my - mpy;
5875 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5876 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5877 p[0] = p[1] = p[8] = p[9] = 0;
5878 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5882 } else if( IS_DIRECT(mb_type) ) {
5883 pred_direct_motion(h, &mb_type);
5884 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5885 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5886 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5888 int list, mx, my, i, mpx, mpy;
5889 if(IS_16X16(mb_type)){
5890 for(list=0; list<h->list_count; list++){
5891 if(IS_DIR(mb_type, 0, list)){
5892 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5893 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5895 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5897 for(list=0; list<h->list_count; list++){
5898 if(IS_DIR(mb_type, 0, list)){
5899 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5901 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5902 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5903 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5905 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5906 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5908 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5911 else if(IS_16X8(mb_type)){
5912 for(list=0; list<h->list_count; list++){
5914 if(IS_DIR(mb_type, i, list)){
5915 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5916 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5918 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5921 for(list=0; list<h->list_count; list++){
5923 if(IS_DIR(mb_type, i, list)){
5924 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5925 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5926 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5927 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5929 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5930 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5932 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5933 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5938 assert(IS_8X16(mb_type));
5939 for(list=0; list<h->list_count; list++){
5941 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5942 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5943 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5945 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5948 for(list=0; list<h->list_count; list++){
5950 if(IS_DIR(mb_type, i, list)){
5951 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5952 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5953 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5955 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5956 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5957 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5959 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5960 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5967 if( IS_INTER( mb_type ) ) {
5968 h->chroma_pred_mode_table[mb_xy] = 0;
5969 write_back_motion( h, mb_type );
5972 if( !IS_INTRA16x16( mb_type ) ) {
5973 cbp = decode_cabac_mb_cbp_luma( h );
5974 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5977 h->cbp_table[mb_xy] = h->cbp = cbp;
5979 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5980 if( decode_cabac_mb_transform_size( h ) )
5981 mb_type |= MB_TYPE_8x8DCT;
5983 s->current_picture.mb_type[mb_xy]= mb_type;
5985 if( cbp || IS_INTRA16x16( mb_type ) ) {
5986 const uint8_t *scan, *scan8x8, *dc_scan;
5987 const uint32_t *qmul;
5990 if(IS_INTERLACED(mb_type)){
5991 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5992 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5993 dc_scan= luma_dc_field_scan;
5995 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5996 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5997 dc_scan= luma_dc_zigzag_scan;
6000 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6001 if( dqp == INT_MIN ){
6002 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6006 if(((unsigned)s->qscale) > 51){
6007 if(s->qscale<0) s->qscale+= 52;
6008 else s->qscale-= 52;
6010 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6011 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6013 if( IS_INTRA16x16( mb_type ) ) {
6015 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6016 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6019 qmul = h->dequant4_coeff[0][s->qscale];
6020 for( i = 0; i < 16; i++ ) {
6021 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6022 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6025 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6029 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6030 if( cbp & (1<<i8x8) ) {
6031 if( IS_8x8DCT(mb_type) ) {
6032 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6033 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6035 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6036 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6037 const int index = 4*i8x8 + i4x4;
6038 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6040 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6041 //STOP_TIMER("decode_residual")
6045 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6046 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6053 for( c = 0; c < 2; c++ ) {
6054 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6055 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6061 for( c = 0; c < 2; c++ ) {
6062 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6063 for( i = 0; i < 4; i++ ) {
6064 const int index = 16 + 4 * c + i;
6065 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6066 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6070 uint8_t * const nnz= &h->non_zero_count_cache[0];
6071 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6072 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6075 uint8_t * const nnz= &h->non_zero_count_cache[0];
6076 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6077 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6078 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6079 h->last_qscale_diff = 0;
6082 s->current_picture.qscale_table[mb_xy]= s->qscale;
6083 write_back_non_zero_count(h);
6086 h->ref_count[0] >>= 1;
6087 h->ref_count[1] >>= 1;
6094 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6096 const int index_a = qp + h->slice_alpha_c0_offset;
6097 const int alpha = (alpha_table+52)[index_a];
6098 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6103 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6104 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6106 /* 16px edge length, because bS=4 is triggered by being at
6107 * the edge of an intra MB, so all 4 bS are the same */
6108 for( d = 0; d < 16; d++ ) {
6109 const int p0 = pix[-1];
6110 const int p1 = pix[-2];
6111 const int p2 = pix[-3];
6113 const int q0 = pix[0];
6114 const int q1 = pix[1];
6115 const int q2 = pix[2];
6117 if( FFABS( p0 - q0 ) < alpha &&
6118 FFABS( p1 - p0 ) < beta &&
6119 FFABS( q1 - q0 ) < beta ) {
6121 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6122 if( FFABS( p2 - p0 ) < beta)
6124 const int p3 = pix[-4];
6126 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6127 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6128 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6131 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6133 if( FFABS( q2 - q0 ) < beta)
6135 const int q3 = pix[3];
6137 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6138 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6139 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6142 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6146 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6147 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6149 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6155 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6157 const int index_a = qp + h->slice_alpha_c0_offset;
6158 const int alpha = (alpha_table+52)[index_a];
6159 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6164 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6165 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6167 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6171 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6173 for( i = 0; i < 16; i++, pix += stride) {
6179 int bS_index = (i >> 1);
6182 bS_index |= (i & 1);
6185 if( bS[bS_index] == 0 ) {
6189 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6190 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6191 alpha = (alpha_table+52)[index_a];
6192 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6194 if( bS[bS_index] < 4 ) {
6195 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6196 const int p0 = pix[-1];
6197 const int p1 = pix[-2];
6198 const int p2 = pix[-3];
6199 const int q0 = pix[0];
6200 const int q1 = pix[1];
6201 const int q2 = pix[2];
6203 if( FFABS( p0 - q0 ) < alpha &&
6204 FFABS( p1 - p0 ) < beta &&
6205 FFABS( q1 - q0 ) < beta ) {
6209 if( FFABS( p2 - p0 ) < beta ) {
6210 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6213 if( FFABS( q2 - q0 ) < beta ) {
6214 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6218 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6219 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6220 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6221 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6224 const int p0 = pix[-1];
6225 const int p1 = pix[-2];
6226 const int p2 = pix[-3];
6228 const int q0 = pix[0];
6229 const int q1 = pix[1];
6230 const int q2 = pix[2];
6232 if( FFABS( p0 - q0 ) < alpha &&
6233 FFABS( p1 - p0 ) < beta &&
6234 FFABS( q1 - q0 ) < beta ) {
6236 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6237 if( FFABS( p2 - p0 ) < beta)
6239 const int p3 = pix[-4];
6241 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6242 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6243 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6246 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6248 if( FFABS( q2 - q0 ) < beta)
6250 const int q3 = pix[3];
6252 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6253 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6254 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6257 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6261 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6262 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6264 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6269 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6271 for( i = 0; i < 8; i++, pix += stride) {
6279 if( bS[bS_index] == 0 ) {
6283 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6284 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6285 alpha = (alpha_table+52)[index_a];
6286 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6288 if( bS[bS_index] < 4 ) {
6289 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6290 const int p0 = pix[-1];
6291 const int p1 = pix[-2];
6292 const int q0 = pix[0];
6293 const int q1 = pix[1];
6295 if( FFABS( p0 - q0 ) < alpha &&
6296 FFABS( p1 - p0 ) < beta &&
6297 FFABS( q1 - q0 ) < beta ) {
6298 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6300 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6301 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6302 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6305 const int p0 = pix[-1];
6306 const int p1 = pix[-2];
6307 const int q0 = pix[0];
6308 const int q1 = pix[1];
6310 if( FFABS( p0 - q0 ) < alpha &&
6311 FFABS( p1 - p0 ) < beta &&
6312 FFABS( q1 - q0 ) < beta ) {
6314 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6315 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6316 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6322 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6324 const int index_a = qp + h->slice_alpha_c0_offset;
6325 const int alpha = (alpha_table+52)[index_a];
6326 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6327 const int pix_next = stride;
6332 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6333 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6335 /* 16px edge length, see filter_mb_edgev */
6336 for( d = 0; d < 16; d++ ) {
6337 const int p0 = pix[-1*pix_next];
6338 const int p1 = pix[-2*pix_next];
6339 const int p2 = pix[-3*pix_next];
6340 const int q0 = pix[0];
6341 const int q1 = pix[1*pix_next];
6342 const int q2 = pix[2*pix_next];
6344 if( FFABS( p0 - q0 ) < alpha &&
6345 FFABS( p1 - p0 ) < beta &&
6346 FFABS( q1 - q0 ) < beta ) {
6348 const int p3 = pix[-4*pix_next];
6349 const int q3 = pix[ 3*pix_next];
6351 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6352 if( FFABS( p2 - p0 ) < beta) {
6354 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6355 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6356 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6359 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6361 if( FFABS( q2 - q0 ) < beta) {
6363 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6364 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6365 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6368 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6372 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6373 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6375 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6382 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6384 const int index_a = qp + h->slice_alpha_c0_offset;
6385 const int alpha = (alpha_table+52)[index_a];
6386 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6391 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6392 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6394 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6398 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6399 MpegEncContext * const s = &h->s;
6400 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6402 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6406 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6408 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6409 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6410 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6413 assert(!FRAME_MBAFF);
6415 mb_type = s->current_picture.mb_type[mb_xy];
6416 qp = s->current_picture.qscale_table[mb_xy];
6417 qp0 = s->current_picture.qscale_table[mb_xy-1];
6418 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6419 qpc = get_chroma_qp( h, 0, qp );
6420 qpc0 = get_chroma_qp( h, 0, qp0 );
6421 qpc1 = get_chroma_qp( h, 0, qp1 );
6422 qp0 = (qp + qp0 + 1) >> 1;
6423 qp1 = (qp + qp1 + 1) >> 1;
6424 qpc0 = (qpc + qpc0 + 1) >> 1;
6425 qpc1 = (qpc + qpc1 + 1) >> 1;
6426 qp_thresh = 15 - h->slice_alpha_c0_offset;
6427 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6428 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6431 if( IS_INTRA(mb_type) ) {
6432 int16_t bS4[4] = {4,4,4,4};
6433 int16_t bS3[4] = {3,3,3,3};
6434 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6435 if( IS_8x8DCT(mb_type) ) {
6436 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6437 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6438 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6439 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6441 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6442 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6443 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6444 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6445 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6446 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6447 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6448 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6450 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6451 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6452 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6453 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6454 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6455 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6456 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6457 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6460 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6461 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6463 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6465 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6467 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6468 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6469 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6470 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6472 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6473 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6474 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6475 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6477 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6478 bSv[0][0] = 0x0004000400040004ULL;
6479 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6480 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6482 #define FILTER(hv,dir,edge)\
6483 if(bSv[dir][edge]) {\
6484 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6486 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6487 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6493 } else if( IS_8x8DCT(mb_type) ) {
6512 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6513 MpegEncContext * const s = &h->s;
6514 const int mb_xy= mb_x + mb_y*s->mb_stride;
6515 const int mb_type = s->current_picture.mb_type[mb_xy];
6516 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6517 int first_vertical_edge_done = 0;
6520 //for sufficiently low qp, filtering wouldn't do anything
6521 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6523 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6524 int qp = s->current_picture.qscale_table[mb_xy];
6526 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6527 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6532 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6533 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6534 int top_type, left_type[2];
6535 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6536 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6537 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6539 if(IS_8x8DCT(top_type)){
6540 h->non_zero_count_cache[4+8*0]=
6541 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6542 h->non_zero_count_cache[6+8*0]=
6543 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6545 if(IS_8x8DCT(left_type[0])){
6546 h->non_zero_count_cache[3+8*1]=
6547 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6549 if(IS_8x8DCT(left_type[1])){
6550 h->non_zero_count_cache[3+8*3]=
6551 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6554 if(IS_8x8DCT(mb_type)){
6555 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6556 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6558 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6559 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6561 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6562 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6564 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6565 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6570 // left mb is in picture
6571 && h->slice_table[mb_xy-1] != 255
6572 // and current and left pair do not have the same interlaced type
6573 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6574 // and left mb is in the same slice if deblocking_filter == 2
6575 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6576 /* First vertical edge is different in MBAFF frames
6577 * There are 8 different bS to compute and 2 different Qp
6579 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6580 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6585 int mb_qp, mbn0_qp, mbn1_qp;
6587 first_vertical_edge_done = 1;
6589 if( IS_INTRA(mb_type) )
6590 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6592 for( i = 0; i < 8; i++ ) {
6593 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6595 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6597 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6598 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6599 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6606 mb_qp = s->current_picture.qscale_table[mb_xy];
6607 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6608 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6609 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6610 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6611 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6612 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6613 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6614 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6615 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6616 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6617 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6618 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6621 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6622 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6623 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6624 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6625 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6627 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6628 for( dir = 0; dir < 2; dir++ )
6631 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6632 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6633 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6634 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6635 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6637 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6638 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6639 // how often to recheck mv-based bS when iterating between edges
6640 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6641 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6642 // how often to recheck mv-based bS when iterating along each edge
6643 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6645 if (first_vertical_edge_done) {
6647 first_vertical_edge_done = 0;
6650 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6653 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6654 && !IS_INTERLACED(mb_type)
6655 && IS_INTERLACED(mbm_type)
6657 // This is a special case in the norm where the filtering must
6658 // be done twice (one each of the field) even if we are in a
6659 // frame macroblock.
6661 static const int nnz_idx[4] = {4,5,6,3};
6662 unsigned int tmp_linesize = 2 * linesize;
6663 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6664 int mbn_xy = mb_xy - 2 * s->mb_stride;
6669 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6670 if( IS_INTRA(mb_type) ||
6671 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6672 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6674 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6675 for( i = 0; i < 4; i++ ) {
6676 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6677 mbn_nnz[nnz_idx[i]] != 0 )
6683 // Do not use s->qscale as luma quantizer because it has not the same
6684 // value in IPCM macroblocks.
6685 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6686 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6687 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6688 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6689 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6690 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6691 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6692 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6699 for( edge = start; edge < edges; edge++ ) {
6700 /* mbn_xy: neighbor macroblock */
6701 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6702 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6703 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6707 if( (edge&1) && IS_8x8DCT(mb_type) )
6710 if( IS_INTRA(mb_type) ||
6711 IS_INTRA(mbn_type) ) {
6714 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6715 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6724 bS[0] = bS[1] = bS[2] = bS[3] = value;
6729 if( edge & mask_edge ) {
6730 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6733 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6734 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6737 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6738 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6739 int bn_idx= b_idx - (dir ? 8:1);
6742 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6743 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6744 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6745 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6748 if(h->slice_type_nos == FF_B_TYPE && v){
6750 for( l = 0; !v && l < 2; l++ ) {
6752 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6753 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6754 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6758 bS[0] = bS[1] = bS[2] = bS[3] = v;
6764 for( i = 0; i < 4; i++ ) {
6765 int x = dir == 0 ? edge : i;
6766 int y = dir == 0 ? i : edge;
6767 int b_idx= 8 + 4 + x + 8*y;
6768 int bn_idx= b_idx - (dir ? 8:1);
6770 if( h->non_zero_count_cache[b_idx] != 0 ||
6771 h->non_zero_count_cache[bn_idx] != 0 ) {
6777 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6778 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6779 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6780 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6786 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6788 for( l = 0; l < 2; l++ ) {
6790 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6791 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6792 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6801 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6806 // Do not use s->qscale as luma quantizer because it has not the same
6807 // value in IPCM macroblocks.
6808 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6809 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6810 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6811 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6813 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6814 if( (edge&1) == 0 ) {
6815 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6816 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6817 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6818 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6821 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6822 if( (edge&1) == 0 ) {
6823 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6824 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6825 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6826 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6833 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6834 MpegEncContext * const s = &h->s;
6835 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6839 if( h->pps.cabac ) {
6843 align_get_bits( &s->gb );
6846 ff_init_cabac_states( &h->cabac);
6847 ff_init_cabac_decoder( &h->cabac,
6848 s->gb.buffer + get_bits_count(&s->gb)/8,
6849 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6850 /* calculate pre-state */
6851 for( i= 0; i < 460; i++ ) {
6853 if( h->slice_type_nos == FF_I_TYPE )
6854 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6856 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6859 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6861 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6866 int ret = decode_mb_cabac(h);
6868 //STOP_TIMER("decode_mb_cabac")
6870 if(ret>=0) hl_decode_mb(h);
6872 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6875 if(ret>=0) ret = decode_mb_cabac(h);
6877 if(ret>=0) hl_decode_mb(h);
6880 eos = get_cabac_terminate( &h->cabac );
6882 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6883 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6884 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6888 if( ++s->mb_x >= s->mb_width ) {
6890 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6892 if(FIELD_OR_MBAFF_PICTURE) {
6897 if( eos || s->mb_y >= s->mb_height ) {
6898 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6899 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6906 int ret = decode_mb_cavlc(h);
6908 if(ret>=0) hl_decode_mb(h);
6910 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6912 ret = decode_mb_cavlc(h);
6914 if(ret>=0) hl_decode_mb(h);
6919 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6920 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6925 if(++s->mb_x >= s->mb_width){
6927 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6929 if(FIELD_OR_MBAFF_PICTURE) {
6932 if(s->mb_y >= s->mb_height){
6933 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6935 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6936 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6940 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6947 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6948 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6949 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6950 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6954 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6963 for(;s->mb_y < s->mb_height; s->mb_y++){
6964 for(;s->mb_x < s->mb_width; s->mb_x++){
6965 int ret= decode_mb(h);
6970 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6971 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6976 if(++s->mb_x >= s->mb_width){
6978 if(++s->mb_y >= s->mb_height){
6979 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6980 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6984 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6991 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6992 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6993 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6997 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7004 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7007 return -1; //not reached
7010 static int decode_unregistered_user_data(H264Context *h, int size){
7011 MpegEncContext * const s = &h->s;
7012 uint8_t user_data[16+256];
7018 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7019 user_data[i]= get_bits(&s->gb, 8);
7023 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7024 if(e==1 && build>=0)
7025 h->x264_build= build;
7027 if(s->avctx->debug & FF_DEBUG_BUGS)
7028 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7031 skip_bits(&s->gb, 8);
7036 static int decode_sei(H264Context *h){
7037 MpegEncContext * const s = &h->s;
7039 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7044 type+= show_bits(&s->gb, 8);
7045 }while(get_bits(&s->gb, 8) == 255);
7049 size+= show_bits(&s->gb, 8);
7050 }while(get_bits(&s->gb, 8) == 255);
7054 if(decode_unregistered_user_data(h, size) < 0)
7058 skip_bits(&s->gb, 8*size);
7061 //FIXME check bits here
7062 align_get_bits(&s->gb);
7068 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7069 MpegEncContext * const s = &h->s;
7071 cpb_count = get_ue_golomb(&s->gb) + 1;
7072 get_bits(&s->gb, 4); /* bit_rate_scale */
7073 get_bits(&s->gb, 4); /* cpb_size_scale */
7074 for(i=0; i<cpb_count; i++){
7075 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7076 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7077 get_bits1(&s->gb); /* cbr_flag */
7079 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7080 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7081 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7082 get_bits(&s->gb, 5); /* time_offset_length */
7085 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7086 MpegEncContext * const s = &h->s;
7087 int aspect_ratio_info_present_flag;
7088 unsigned int aspect_ratio_idc;
7089 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7091 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7093 if( aspect_ratio_info_present_flag ) {
7094 aspect_ratio_idc= get_bits(&s->gb, 8);
7095 if( aspect_ratio_idc == EXTENDED_SAR ) {
7096 sps->sar.num= get_bits(&s->gb, 16);
7097 sps->sar.den= get_bits(&s->gb, 16);
7098 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7099 sps->sar= pixel_aspect[aspect_ratio_idc];
7101 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7108 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7110 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7111 get_bits1(&s->gb); /* overscan_appropriate_flag */
7114 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7115 get_bits(&s->gb, 3); /* video_format */
7116 get_bits1(&s->gb); /* video_full_range_flag */
7117 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7118 get_bits(&s->gb, 8); /* colour_primaries */
7119 get_bits(&s->gb, 8); /* transfer_characteristics */
7120 get_bits(&s->gb, 8); /* matrix_coefficients */
7124 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7125 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7126 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7129 sps->timing_info_present_flag = get_bits1(&s->gb);
7130 if(sps->timing_info_present_flag){
7131 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7132 sps->time_scale = get_bits_long(&s->gb, 32);
7133 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7136 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7137 if(nal_hrd_parameters_present_flag)
7138 decode_hrd_parameters(h, sps);
7139 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7140 if(vcl_hrd_parameters_present_flag)
7141 decode_hrd_parameters(h, sps);
7142 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7143 get_bits1(&s->gb); /* low_delay_hrd_flag */
7144 get_bits1(&s->gb); /* pic_struct_present_flag */
7146 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7147 if(sps->bitstream_restriction_flag){
7148 unsigned int num_reorder_frames;
7149 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7150 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7151 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7152 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7153 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7154 num_reorder_frames= get_ue_golomb(&s->gb);
7155 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7157 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7158 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7162 sps->num_reorder_frames= num_reorder_frames;
7168 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7169 const uint8_t *jvt_list, const uint8_t *fallback_list){
7170 MpegEncContext * const s = &h->s;
7171 int i, last = 8, next = 8;
7172 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7173 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7174 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7176 for(i=0;i<size;i++){
7178 next = (last + get_se_golomb(&s->gb)) & 0xff;
7179 if(!i && !next){ /* matrix not written, we use the preset one */
7180 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7183 last = factors[scan[i]] = next ? next : last;
7187 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7188 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7189 MpegEncContext * const s = &h->s;
7190 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7191 const uint8_t *fallback[4] = {
7192 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7193 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7194 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7195 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7197 if(get_bits1(&s->gb)){
7198 sps->scaling_matrix_present |= is_sps;
7199 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7200 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7201 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7202 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7203 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7204 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7205 if(is_sps || pps->transform_8x8_mode){
7206 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7207 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7209 } else if(fallback_sps) {
7210 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7211 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7216 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7219 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7220 const size_t size, const char *name)
7223 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7228 vec[id] = av_mallocz(size);
7230 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7235 static inline int decode_seq_parameter_set(H264Context *h){
7236 MpegEncContext * const s = &h->s;
7237 int profile_idc, level_idc;
7238 unsigned int sps_id, tmp, mb_width, mb_height;
7242 profile_idc= get_bits(&s->gb, 8);
7243 get_bits1(&s->gb); //constraint_set0_flag
7244 get_bits1(&s->gb); //constraint_set1_flag
7245 get_bits1(&s->gb); //constraint_set2_flag
7246 get_bits1(&s->gb); //constraint_set3_flag
7247 get_bits(&s->gb, 4); // reserved
7248 level_idc= get_bits(&s->gb, 8);
7249 sps_id= get_ue_golomb(&s->gb);
7251 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7255 sps->profile_idc= profile_idc;
7256 sps->level_idc= level_idc;
7258 if(sps->profile_idc >= 100){ //high profile
7259 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7260 get_bits1(&s->gb); //residual_color_transform_flag
7261 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7262 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7263 sps->transform_bypass = get_bits1(&s->gb);
7264 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7266 sps->scaling_matrix_present = 0;
7268 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7269 sps->poc_type= get_ue_golomb(&s->gb);
7271 if(sps->poc_type == 0){ //FIXME #define
7272 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7273 } else if(sps->poc_type == 1){//FIXME #define
7274 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7275 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7276 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7277 tmp= get_ue_golomb(&s->gb);
7279 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7280 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7283 sps->poc_cycle_length= tmp;
7285 for(i=0; i<sps->poc_cycle_length; i++)
7286 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7287 }else if(sps->poc_type != 2){
7288 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7292 tmp= get_ue_golomb(&s->gb);
7293 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7294 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7297 sps->ref_frame_count= tmp;
7298 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7299 mb_width= get_ue_golomb(&s->gb) + 1;
7300 mb_height= get_ue_golomb(&s->gb) + 1;
7301 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7302 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7303 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7306 sps->mb_width = mb_width;
7307 sps->mb_height= mb_height;
7309 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7310 if(!sps->frame_mbs_only_flag)
7311 sps->mb_aff= get_bits1(&s->gb);
7315 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7317 #ifndef ALLOW_INTERLACE
7319 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7321 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7322 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7324 sps->crop= get_bits1(&s->gb);
7326 sps->crop_left = get_ue_golomb(&s->gb);
7327 sps->crop_right = get_ue_golomb(&s->gb);
7328 sps->crop_top = get_ue_golomb(&s->gb);
7329 sps->crop_bottom= get_ue_golomb(&s->gb);
7330 if(sps->crop_left || sps->crop_top){
7331 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7333 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7334 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7340 sps->crop_bottom= 0;
7343 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7344 if( sps->vui_parameters_present_flag )
7345 decode_vui_parameters(h, sps);
7347 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7348 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7349 sps_id, sps->profile_idc, sps->level_idc,
7351 sps->ref_frame_count,
7352 sps->mb_width, sps->mb_height,
7353 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7354 sps->direct_8x8_inference_flag ? "8B8" : "",
7355 sps->crop_left, sps->crop_right,
7356 sps->crop_top, sps->crop_bottom,
7357 sps->vui_parameters_present_flag ? "VUI" : ""
7364 build_qp_table(PPS *pps, int t, int index)
7367 for(i = 0; i < 52; i++)
7368 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7371 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7372 MpegEncContext * const s = &h->s;
7373 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7376 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7380 tmp= get_ue_golomb(&s->gb);
7381 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7382 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7387 pps->cabac= get_bits1(&s->gb);
7388 pps->pic_order_present= get_bits1(&s->gb);
7389 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7390 if(pps->slice_group_count > 1 ){
7391 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7392 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7393 switch(pps->mb_slice_group_map_type){
7396 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7397 | run_length[ i ] |1 |ue(v) |
7402 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7404 | top_left_mb[ i ] |1 |ue(v) |
7405 | bottom_right_mb[ i ] |1 |ue(v) |
7413 | slice_group_change_direction_flag |1 |u(1) |
7414 | slice_group_change_rate_minus1 |1 |ue(v) |
7419 | slice_group_id_cnt_minus1 |1 |ue(v) |
7420 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7422 | slice_group_id[ i ] |1 |u(v) |
7427 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7428 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7429 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7430 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7431 pps->ref_count[0]= pps->ref_count[1]= 1;
7435 pps->weighted_pred= get_bits1(&s->gb);
7436 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7437 pps->init_qp= get_se_golomb(&s->gb) + 26;
7438 pps->init_qs= get_se_golomb(&s->gb) + 26;
7439 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7440 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7441 pps->constrained_intra_pred= get_bits1(&s->gb);
7442 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7444 pps->transform_8x8_mode= 0;
7445 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7446 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7447 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7449 if(get_bits_count(&s->gb) < bit_length){
7450 pps->transform_8x8_mode= get_bits1(&s->gb);
7451 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7452 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7454 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7457 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7458 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7459 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7460 h->pps.chroma_qp_diff= 1;
7462 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7463 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7464 pps_id, pps->sps_id,
7465 pps->cabac ? "CABAC" : "CAVLC",
7466 pps->slice_group_count,
7467 pps->ref_count[0], pps->ref_count[1],
7468 pps->weighted_pred ? "weighted" : "",
7469 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7470 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7471 pps->constrained_intra_pred ? "CONSTR" : "",
7472 pps->redundant_pic_cnt_present ? "REDU" : "",
7473 pps->transform_8x8_mode ? "8x8DCT" : ""
7481 * Call decode_slice() for each context.
7483 * @param h h264 master context
7484 * @param context_count number of contexts to execute
7486 static void execute_decode_slices(H264Context *h, int context_count){
7487 MpegEncContext * const s = &h->s;
7488 AVCodecContext * const avctx= s->avctx;
7492 if(context_count == 1) {
7493 decode_slice(avctx, h);
7495 for(i = 1; i < context_count; i++) {
7496 hx = h->thread_context[i];
7497 hx->s.error_resilience = avctx->error_resilience;
7498 hx->s.error_count = 0;
7501 avctx->execute(avctx, (void *)decode_slice,
7502 (void **)h->thread_context, NULL, context_count);
7504 /* pull back stuff from slices to master context */
7505 hx = h->thread_context[context_count - 1];
7506 s->mb_x = hx->s.mb_x;
7507 s->mb_y = hx->s.mb_y;
7508 s->dropable = hx->s.dropable;
7509 s->picture_structure = hx->s.picture_structure;
7510 for(i = 1; i < context_count; i++)
7511 h->s.error_count += h->thread_context[i]->s.error_count;
7516 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7517 MpegEncContext * const s = &h->s;
7518 AVCodecContext * const avctx= s->avctx;
7520 H264Context *hx; ///< thread context
7521 int context_count = 0;
7523 h->max_contexts = avctx->thread_count;
7526 for(i=0; i<50; i++){
7527 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7530 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7531 h->current_slice = 0;
7532 if (!s->first_field)
7533 s->current_picture_ptr= NULL;
7545 if(buf_index >= buf_size) break;
7547 for(i = 0; i < h->nal_length_size; i++)
7548 nalsize = (nalsize << 8) | buf[buf_index++];
7549 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7554 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7559 // start code prefix search
7560 for(; buf_index + 3 < buf_size; buf_index++){
7561 // This should always succeed in the first iteration.
7562 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7566 if(buf_index+3 >= buf_size) break;
7571 hx = h->thread_context[context_count];
7573 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7574 if (ptr==NULL || dst_length < 0){
7577 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7579 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7581 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7582 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7585 if (h->is_avc && (nalsize != consumed)){
7586 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7590 buf_index += consumed;
7592 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7593 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7598 switch(hx->nal_unit_type){
7600 if (h->nal_unit_type != NAL_IDR_SLICE) {
7601 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7604 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7606 init_get_bits(&hx->s.gb, ptr, bit_length);
7608 hx->inter_gb_ptr= &hx->s.gb;
7609 hx->s.data_partitioning = 0;
7611 if((err = decode_slice_header(hx, h)))
7614 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7615 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7616 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7617 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7618 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7619 && avctx->skip_frame < AVDISCARD_ALL)
7623 init_get_bits(&hx->s.gb, ptr, bit_length);
7625 hx->inter_gb_ptr= NULL;
7626 hx->s.data_partitioning = 1;
7628 err = decode_slice_header(hx, h);
7631 init_get_bits(&hx->intra_gb, ptr, bit_length);
7632 hx->intra_gb_ptr= &hx->intra_gb;
7635 init_get_bits(&hx->inter_gb, ptr, bit_length);
7636 hx->inter_gb_ptr= &hx->inter_gb;
7638 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7639 && s->context_initialized
7641 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7642 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7643 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7644 && avctx->skip_frame < AVDISCARD_ALL)
7648 init_get_bits(&s->gb, ptr, bit_length);
7652 init_get_bits(&s->gb, ptr, bit_length);
7653 decode_seq_parameter_set(h);
7655 if(s->flags& CODEC_FLAG_LOW_DELAY)
7658 if(avctx->has_b_frames < 2)
7659 avctx->has_b_frames= !s->low_delay;
7662 init_get_bits(&s->gb, ptr, bit_length);
7664 decode_picture_parameter_set(h, bit_length);
7668 case NAL_END_SEQUENCE:
7669 case NAL_END_STREAM:
7670 case NAL_FILLER_DATA:
7672 case NAL_AUXILIARY_SLICE:
7675 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7678 if(context_count == h->max_contexts) {
7679 execute_decode_slices(h, context_count);
7684 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7686 /* Slice could not be decoded in parallel mode, copy down
7687 * NAL unit stuff to context 0 and restart. Note that
7688 * rbsp_buffer is not transferred, but since we no longer
7689 * run in parallel mode this should not be an issue. */
7690 h->nal_unit_type = hx->nal_unit_type;
7691 h->nal_ref_idc = hx->nal_ref_idc;
7697 execute_decode_slices(h, context_count);
7702 * returns the number of bytes consumed for building the current frame
7704 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7705 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7706 if(pos+10>buf_size) pos=buf_size; // oops ;)
7711 static int decode_frame(AVCodecContext *avctx,
7712 void *data, int *data_size,
7713 const uint8_t *buf, int buf_size)
7715 H264Context *h = avctx->priv_data;
7716 MpegEncContext *s = &h->s;
7717 AVFrame *pict = data;
7720 s->flags= avctx->flags;
7721 s->flags2= avctx->flags2;
7723 /* end of stream, output what is still in the buffers */
7724 if (buf_size == 0) {
7728 //FIXME factorize this with the output code below
7729 out = h->delayed_pic[0];
7731 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7732 if(h->delayed_pic[i]->poc < out->poc){
7733 out = h->delayed_pic[i];
7737 for(i=out_idx; h->delayed_pic[i]; i++)
7738 h->delayed_pic[i] = h->delayed_pic[i+1];
7741 *data_size = sizeof(AVFrame);
7742 *pict= *(AVFrame*)out;
7748 if(h->is_avc && !h->got_avcC) {
7749 int i, cnt, nalsize;
7750 unsigned char *p = avctx->extradata;
7751 if(avctx->extradata_size < 7) {
7752 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7756 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7759 /* sps and pps in the avcC always have length coded with 2 bytes,
7760 so put a fake nal_length_size = 2 while parsing them */
7761 h->nal_length_size = 2;
7762 // Decode sps from avcC
7763 cnt = *(p+5) & 0x1f; // Number of sps
7765 for (i = 0; i < cnt; i++) {
7766 nalsize = AV_RB16(p) + 2;
7767 if(decode_nal_units(h, p, nalsize) < 0) {
7768 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7773 // Decode pps from avcC
7774 cnt = *(p++); // Number of pps
7775 for (i = 0; i < cnt; i++) {
7776 nalsize = AV_RB16(p) + 2;
7777 if(decode_nal_units(h, p, nalsize) != nalsize) {
7778 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7783 // Now store right nal length size, that will be use to parse all other nals
7784 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7785 // Do not reparse avcC
7789 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7790 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7794 buf_index=decode_nal_units(h, buf, buf_size);
7798 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7799 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7800 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7804 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7805 Picture *out = s->current_picture_ptr;
7806 Picture *cur = s->current_picture_ptr;
7807 int i, pics, cross_idr, out_of_order, out_idx;
7811 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7812 s->current_picture_ptr->pict_type= s->pict_type;
7815 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7816 h->prev_poc_msb= h->poc_msb;
7817 h->prev_poc_lsb= h->poc_lsb;
7819 h->prev_frame_num_offset= h->frame_num_offset;
7820 h->prev_frame_num= h->frame_num;
7823 * FIXME: Error handling code does not seem to support interlaced
7824 * when slices span multiple rows
7825 * The ff_er_add_slice calls don't work right for bottom
7826 * fields; they cause massive erroneous error concealing
7827 * Error marking covers both fields (top and bottom).
7828 * This causes a mismatched s->error_count
7829 * and a bad error table. Further, the error count goes to
7830 * INT_MAX when called for bottom field, because mb_y is
7831 * past end by one (callers fault) and resync_mb_y != 0
7832 * causes problems for the first MB line, too.
7839 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7840 /* Wait for second field. */
7844 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7845 /* Derive top_field_first from field pocs. */
7846 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7848 //FIXME do something with unavailable reference frames
7850 /* Sort B-frames into display order */
7852 if(h->sps.bitstream_restriction_flag
7853 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7854 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7858 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7859 && !h->sps.bitstream_restriction_flag){
7860 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7865 while(h->delayed_pic[pics]) pics++;
7867 assert(pics <= MAX_DELAYED_PIC_COUNT);
7869 h->delayed_pic[pics++] = cur;
7870 if(cur->reference == 0)
7871 cur->reference = DELAYED_PIC_REF;
7873 out = h->delayed_pic[0];
7875 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7876 if(h->delayed_pic[i]->poc < out->poc){
7877 out = h->delayed_pic[i];
7880 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7882 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7884 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7886 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7888 ((!cross_idr && out->poc > h->outputed_poc + 2)
7889 || cur->pict_type == FF_B_TYPE)))
7892 s->avctx->has_b_frames++;
7895 if(out_of_order || pics > s->avctx->has_b_frames){
7896 out->reference &= ~DELAYED_PIC_REF;
7897 for(i=out_idx; h->delayed_pic[i]; i++)
7898 h->delayed_pic[i] = h->delayed_pic[i+1];
7900 if(!out_of_order && pics > s->avctx->has_b_frames){
7901 *data_size = sizeof(AVFrame);
7903 h->outputed_poc = out->poc;
7904 *pict= *(AVFrame*)out;
7906 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7911 assert(pict->data[0] || !*data_size);
7912 ff_print_debug_info(s, pict);
7913 //printf("out %d\n", (int)pict->data[0]);
7916 /* Return the Picture timestamp as the frame number */
7917 /* we subtract 1 because it is added on utils.c */
7918 avctx->frame_number = s->picture_number - 1;
7920 return get_consumed_bytes(s, buf_index, buf_size);
7923 static inline void fill_mb_avail(H264Context *h){
7924 MpegEncContext * const s = &h->s;
7925 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7928 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7929 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7930 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7936 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7937 h->mb_avail[4]= 1; //FIXME move out
7938 h->mb_avail[5]= 0; //FIXME move out
7946 #define SIZE (COUNT*40)
7952 // int int_temp[10000];
7954 AVCodecContext avctx;
7956 dsputil_init(&dsp, &avctx);
7958 init_put_bits(&pb, temp, SIZE);
7959 printf("testing unsigned exp golomb\n");
7960 for(i=0; i<COUNT; i++){
7962 set_ue_golomb(&pb, i);
7963 STOP_TIMER("set_ue_golomb");
7965 flush_put_bits(&pb);
7967 init_get_bits(&gb, temp, 8*SIZE);
7968 for(i=0; i<COUNT; i++){
7971 s= show_bits(&gb, 24);
7974 j= get_ue_golomb(&gb);
7976 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7979 STOP_TIMER("get_ue_golomb");
7983 init_put_bits(&pb, temp, SIZE);
7984 printf("testing signed exp golomb\n");
7985 for(i=0; i<COUNT; i++){
7987 set_se_golomb(&pb, i - COUNT/2);
7988 STOP_TIMER("set_se_golomb");
7990 flush_put_bits(&pb);
7992 init_get_bits(&gb, temp, 8*SIZE);
7993 for(i=0; i<COUNT; i++){
7996 s= show_bits(&gb, 24);
7999 j= get_se_golomb(&gb);
8000 if(j != i - COUNT/2){
8001 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8004 STOP_TIMER("get_se_golomb");
8008 printf("testing 4x4 (I)DCT\n");
8011 uint8_t src[16], ref[16];
8012 uint64_t error= 0, max_error=0;
8014 for(i=0; i<COUNT; i++){
8016 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8017 for(j=0; j<16; j++){
8018 ref[j]= random()%255;
8019 src[j]= random()%255;
8022 h264_diff_dct_c(block, src, ref, 4);
8025 for(j=0; j<16; j++){
8026 // printf("%d ", block[j]);
8027 block[j]= block[j]*4;
8028 if(j&1) block[j]= (block[j]*4 + 2)/5;
8029 if(j&4) block[j]= (block[j]*4 + 2)/5;
8033 s->dsp.h264_idct_add(ref, block, 4);
8034 /* for(j=0; j<16; j++){
8035 printf("%d ", ref[j]);
8039 for(j=0; j<16; j++){
8040 int diff= FFABS(src[j] - ref[j]);
8043 max_error= FFMAX(max_error, diff);
8046 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8047 printf("testing quantizer\n");
8048 for(qp=0; qp<52; qp++){
8050 src1_block[i]= src2_block[i]= random()%255;
8053 printf("Testing NAL layer\n");
8055 uint8_t bitstream[COUNT];
8056 uint8_t nal[COUNT*2];
8058 memset(&h, 0, sizeof(H264Context));
8060 for(i=0; i<COUNT; i++){
8068 for(j=0; j<COUNT; j++){
8069 bitstream[j]= (random() % 255) + 1;
8072 for(j=0; j<zeros; j++){
8073 int pos= random() % COUNT;
8074 while(bitstream[pos] == 0){
8083 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8085 printf("encoding failed\n");
8089 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8093 if(out_length != COUNT){
8094 printf("incorrect length %d %d\n", out_length, COUNT);
8098 if(consumed != nal_length){
8099 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8103 if(memcmp(bitstream, out, COUNT)){
8104 printf("mismatch\n");
8110 printf("Testing RBSP\n");
8118 static av_cold int decode_end(AVCodecContext *avctx)
8120 H264Context *h = avctx->priv_data;
8121 MpegEncContext *s = &h->s;
8123 av_freep(&h->rbsp_buffer[0]);
8124 av_freep(&h->rbsp_buffer[1]);
8125 free_tables(h); //FIXME cleanup init stuff perhaps
8128 // memset(h, 0, sizeof(H264Context));
8134 AVCodec h264_decoder = {
8138 sizeof(H264Context),
8143 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8145 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),