2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type_nos == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
573 if(!(h->top_samples_available&0x8000)){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if(!(h->left_samples_available&0x8000)){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
667 // store all luma nnzs, for deblocking
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non-zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
785 }else if(top_ref==ref){
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
891 pred_motion(h, 0, 4, 0, 0, mx, my);
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
984 *mb_type |= MB_TYPE_DIRECT2;
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1004 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1009 if(ref[0] < 0 && ref[1] < 0){
1010 ref[0] = ref[1] = 0;
1011 mv[0][0] = mv[0][1] =
1012 mv[1][0] = mv[1][1] = 0;
1014 for(list=0; list<2; list++){
1016 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1018 mv[list][0] = mv[list][1] = 0;
1024 *mb_type &= ~MB_TYPE_L1;
1025 sub_mb_type &= ~MB_TYPE_L1;
1026 }else if(ref[0] < 0){
1028 *mb_type &= ~MB_TYPE_L0;
1029 sub_mb_type &= ~MB_TYPE_L0;
1032 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1033 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1034 int mb_types_col[2];
1035 int b8_stride = h->b8_stride;
1036 int b4_stride = h->b_stride;
1038 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1040 if(IS_INTERLACED(*mb_type)){
1041 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1042 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1044 l1ref0 -= 2*b8_stride;
1045 l1ref1 -= 2*b8_stride;
1046 l1mv0 -= 4*b4_stride;
1047 l1mv1 -= 4*b4_stride;
1052 int cur_poc = s->current_picture_ptr->poc;
1053 int *col_poc = h->ref_list[1]->field_poc;
1054 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1055 int dy = 2*col_parity - (s->mb_y&1);
1057 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1058 l1ref0 += dy*b8_stride;
1059 l1ref1 += dy*b8_stride;
1060 l1mv0 += 2*dy*b4_stride;
1061 l1mv1 += 2*dy*b4_stride;
1065 for(i8=0; i8<4; i8++){
1068 int xy8 = x8+y8*b8_stride;
1069 int xy4 = 3*x8+y8*b4_stride;
1072 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1074 h->sub_mb_type[i8] = sub_mb_type;
1076 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1077 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1078 if(!IS_INTRA(mb_types_col[y8])
1079 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1080 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1082 a= pack16to32(mv[0][0],mv[0][1]);
1084 b= pack16to32(mv[1][0],mv[1][1]);
1086 a= pack16to32(mv[0][0],mv[0][1]);
1087 b= pack16to32(mv[1][0],mv[1][1]);
1089 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1090 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1092 }else if(IS_16X16(*mb_type)){
1095 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1096 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1097 if(!IS_INTRA(mb_type_col)
1098 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1099 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1100 && (h->x264_build>33 || !h->x264_build)))){
1102 a= pack16to32(mv[0][0],mv[0][1]);
1104 b= pack16to32(mv[1][0],mv[1][1]);
1106 a= pack16to32(mv[0][0],mv[0][1]);
1107 b= pack16to32(mv[1][0],mv[1][1]);
1109 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1110 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1112 for(i8=0; i8<4; i8++){
1113 const int x8 = i8&1;
1114 const int y8 = i8>>1;
1116 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1118 h->sub_mb_type[i8] = sub_mb_type;
1120 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1121 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1122 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1123 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1126 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1127 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1128 && (h->x264_build>33 || !h->x264_build)))){
1129 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1130 if(IS_SUB_8X8(sub_mb_type)){
1131 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1132 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1134 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1139 for(i4=0; i4<4; i4++){
1140 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1141 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1143 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1145 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1151 }else{ /* direct temporal mv pred */
1152 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1153 const int *dist_scale_factor = h->dist_scale_factor;
1156 if(IS_INTERLACED(*mb_type)){
1157 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1158 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1159 dist_scale_factor = h->dist_scale_factor_field;
1161 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1162 /* FIXME assumes direct_8x8_inference == 1 */
1163 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1164 int mb_types_col[2];
1167 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1168 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1169 | (*mb_type & MB_TYPE_INTERLACED);
1170 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1172 if(IS_INTERLACED(*mb_type)){
1173 /* frame to field scaling */
1174 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1175 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1177 l1ref0 -= 2*h->b8_stride;
1178 l1ref1 -= 2*h->b8_stride;
1179 l1mv0 -= 4*h->b_stride;
1180 l1mv1 -= 4*h->b_stride;
1184 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1185 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1187 *mb_type |= MB_TYPE_16x8;
1189 *mb_type |= MB_TYPE_8x8;
1191 /* field to frame scaling */
1192 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1193 * but in MBAFF, top and bottom POC are equal */
1194 int dy = (s->mb_y&1) ? 1 : 2;
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1197 l1ref0 += dy*h->b8_stride;
1198 l1ref1 += dy*h->b8_stride;
1199 l1mv0 += 2*dy*h->b_stride;
1200 l1mv1 += 2*dy*h->b_stride;
1203 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1205 *mb_type |= MB_TYPE_16x16;
1207 *mb_type |= MB_TYPE_8x8;
1210 for(i8=0; i8<4; i8++){
1211 const int x8 = i8&1;
1212 const int y8 = i8>>1;
1214 const int16_t (*l1mv)[2]= l1mv0;
1216 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 h->sub_mb_type[i8] = sub_mb_type;
1220 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1221 if(IS_INTRA(mb_types_col[y8])){
1222 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1223 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1224 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1230 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1232 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1235 scale = dist_scale_factor[ref0];
1236 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1239 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1240 int my_col = (mv_col[1]<<y_shift)/2;
1241 int mx = (scale * mv_col[0] + 128) >> 8;
1242 int my = (scale * my_col + 128) >> 8;
1243 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1244 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col)){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1261 : map_col_to_list0[1][l1ref1[0]];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col)){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*h->b8_stride];
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1591 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1616 // last_non_zero = i;
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1634 // last_non_zero = i;
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1650 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1654 level= (bias + level)>>QUANT_SHIFT;
1657 level= (bias - level)>>QUANT_SHIFT;
1666 return last_non_zero;
1669 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1676 const int luma_xy= (mx&3) + ((my&3)<<2);
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
1684 const int pic_width = 16*s->mb_width;
1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
1687 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1710 // chroma offset when predicting from a field of opposite parity
1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1719 src_cb= s->edge_emu_buffer;
1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cr= s->edge_emu_buffer;
1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1743 x_offset += 8*s->mb_x;
1744 y_offset += 8*(s->mb_y >> MB_FIELD);
1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1753 chroma_op= chroma_avg;
1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1764 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1776 x_offset += 8*s->mb_x;
1777 y_offset += 8*(s->mb_y >> MB_FIELD);
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1831 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1849 static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1865 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1869 MpegEncContext * const s = &h->s;
1870 const int mb_xy= h->mb_xy;
1871 const int mb_type= s->current_picture.mb_type[mb_xy];
1873 assert(IS_INTER(mb_type));
1875 prefetch_motion(h, 0);
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1880 &weight_op[0], &weight_avg[0],
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1885 &weight_op[1], &weight_avg[1],
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1894 &weight_op[2], &weight_avg[2],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1903 assert(IS_8X8(mb_type));
1906 const int sub_mb_type= h->sub_mb_type[i];
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1914 &weight_op[3], &weight_avg[3],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1919 &weight_op[4], &weight_avg[4],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1928 &weight_op[5], &weight_avg[5],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 assert(IS_SUB_4X4(sub_mb_type));
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[6], &weight_avg[6],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 prefetch_motion(h, 1);
1952 static av_cold void decode_init_vlc(void){
1953 static int done = 0;
1959 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1960 &chroma_dc_coeff_token_len [0], 1, 1,
1961 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1964 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1965 &coeff_token_len [i][0], 1, 1,
1966 &coeff_token_bits[i][0], 1, 1, 1);
1970 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1971 &chroma_dc_total_zeros_len [i][0], 1, 1,
1972 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1974 for(i=0; i<15; i++){
1975 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1976 &total_zeros_len [i][0], 1, 1,
1977 &total_zeros_bits[i][0], 1, 1, 1);
1981 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1982 &run_len [i][0], 1, 1,
1983 &run_bits[i][0], 1, 1, 1);
1985 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1986 &run_len [6][0], 1, 1,
1987 &run_bits[6][0], 1, 1, 1);
1991 static void free_tables(H264Context *h){
1994 av_freep(&h->intra4x4_pred_mode);
1995 av_freep(&h->chroma_pred_mode_table);
1996 av_freep(&h->cbp_table);
1997 av_freep(&h->mvd_table[0]);
1998 av_freep(&h->mvd_table[1]);
1999 av_freep(&h->direct_table);
2000 av_freep(&h->non_zero_count);
2001 av_freep(&h->slice_table_base);
2002 h->slice_table= NULL;
2004 av_freep(&h->mb2b_xy);
2005 av_freep(&h->mb2b8_xy);
2007 for(i = 0; i < MAX_SPS_COUNT; i++)
2008 av_freep(h->sps_buffers + i);
2010 for(i = 0; i < MAX_PPS_COUNT; i++)
2011 av_freep(h->pps_buffers + i);
2013 for(i = 0; i < h->s.avctx->thread_count; i++) {
2014 hx = h->thread_context[i];
2016 av_freep(&hx->top_borders[1]);
2017 av_freep(&hx->top_borders[0]);
2018 av_freep(&hx->s.obmc_scratchpad);
2022 static void init_dequant8_coeff_table(H264Context *h){
2024 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2025 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2026 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2028 for(i=0; i<2; i++ ){
2029 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2030 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2034 for(q=0; q<52; q++){
2035 int shift = ff_div6[q];
2036 int idx = ff_rem6[q];
2038 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2039 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2040 h->pps.scaling_matrix8[i][x]) << shift;
2045 static void init_dequant4_coeff_table(H264Context *h){
2047 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2048 for(i=0; i<6; i++ ){
2049 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2051 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2052 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2059 for(q=0; q<52; q++){
2060 int shift = ff_div6[q] + 2;
2061 int idx = ff_rem6[q];
2063 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2064 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2065 h->pps.scaling_matrix4[i][x]) << shift;
2070 static void init_dequant_tables(H264Context *h){
2072 init_dequant4_coeff_table(h);
2073 if(h->pps.transform_8x8_mode)
2074 init_dequant8_coeff_table(h);
2075 if(h->sps.transform_bypass){
2078 h->dequant4_coeff[i][0][x] = 1<<6;
2079 if(h->pps.transform_8x8_mode)
2082 h->dequant8_coeff[i][0][x] = 1<<6;
2089 * needs width/height
2091 static int alloc_tables(H264Context *h){
2092 MpegEncContext * const s = &h->s;
2093 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2096 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2098 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2102 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2104 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2107 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2108 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2110 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2111 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2112 for(y=0; y<s->mb_height; y++){
2113 for(x=0; x<s->mb_width; x++){
2114 const int mb_xy= x + y*s->mb_stride;
2115 const int b_xy = 4*x + 4*y*h->b_stride;
2116 const int b8_xy= 2*x + 2*y*h->b8_stride;
2118 h->mb2b_xy [mb_xy]= b_xy;
2119 h->mb2b8_xy[mb_xy]= b8_xy;
2123 s->obmc_scratchpad = NULL;
2125 if(!h->dequant4_coeff[0])
2126 init_dequant_tables(h);
2135 * Mimic alloc_tables(), but for every context thread.
2137 static void clone_tables(H264Context *dst, H264Context *src){
2138 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2139 dst->non_zero_count = src->non_zero_count;
2140 dst->slice_table = src->slice_table;
2141 dst->cbp_table = src->cbp_table;
2142 dst->mb2b_xy = src->mb2b_xy;
2143 dst->mb2b8_xy = src->mb2b8_xy;
2144 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2145 dst->mvd_table[0] = src->mvd_table[0];
2146 dst->mvd_table[1] = src->mvd_table[1];
2147 dst->direct_table = src->direct_table;
2149 dst->s.obmc_scratchpad = NULL;
2150 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2155 * Allocate buffers which are not shared amongst multiple threads.
2157 static int context_init(H264Context *h){
2158 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2159 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 return -1; // free_tables will clean up for us
2166 static av_cold void common_init(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2169 s->width = s->avctx->width;
2170 s->height = s->avctx->height;
2171 s->codec_id= s->avctx->codec->id;
2173 ff_h264_pred_init(&h->hpc, s->codec_id);
2175 h->dequant_coeff_pps= -1;
2176 s->unrestricted_mv=1;
2177 s->decode=1; //FIXME
2179 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2180 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2183 static av_cold int decode_init(AVCodecContext *avctx){
2184 H264Context *h= avctx->priv_data;
2185 MpegEncContext * const s = &h->s;
2187 MPV_decode_defaults(s);
2192 s->out_format = FMT_H264;
2193 s->workaround_bugs= avctx->workaround_bugs;
2196 // s->decode_mb= ff_h263_decode_mb;
2197 s->quarter_sample = 1;
2200 if(avctx->codec_id == CODEC_ID_SVQ3)
2201 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2203 avctx->pix_fmt= PIX_FMT_YUV420P;
2207 if(avctx->extradata_size > 0 && avctx->extradata &&
2208 *(char *)avctx->extradata == 1){
2215 h->thread_context[0] = h;
2219 static int frame_start(H264Context *h){
2220 MpegEncContext * const s = &h->s;
2223 if(MPV_frame_start(s, s->avctx) < 0)
2225 ff_er_frame_start(s);
2227 * MPV_frame_start uses pict_type to derive key_frame.
2228 * This is incorrect for H.264; IDR markings must be used.
2229 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2230 * See decode_nal_units().
2232 s->current_picture_ptr->key_frame= 0;
2234 assert(s->linesize && s->uvlinesize);
2236 for(i=0; i<16; i++){
2237 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[16+i]=
2242 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2243 h->block_offset[24+16+i]=
2244 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 /* can't be in alloc_tables because linesize isn't known there.
2248 * FIXME: redo bipred weight to not require extra buffer? */
2249 for(i = 0; i < s->avctx->thread_count; i++)
2250 if(!h->thread_context[i]->s.obmc_scratchpad)
2251 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2253 /* some macroblocks will be accessed before they're available */
2254 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2255 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2257 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2259 // We mark the current picture as non-reference after allocating it, so
2260 // that if we break out due to an error it can be released automatically
2261 // in the next MPV_frame_start().
2262 // SVQ3 as well as most other codecs have only last/next/current and thus
2263 // get released even with set reference, besides SVQ3 and others do not
2264 // mark frames as reference later "naturally".
2265 if(s->codec_id != CODEC_ID_SVQ3)
2266 s->current_picture_ptr->reference= 0;
2268 s->current_picture_ptr->field_poc[0]=
2269 s->current_picture_ptr->field_poc[1]= INT_MAX;
2274 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2275 MpegEncContext * const s = &h->s;
2279 src_cb -= uvlinesize;
2280 src_cr -= uvlinesize;
2282 // There are two lines saved, the line above the the top macroblock of a pair,
2283 // and the line above the bottom macroblock
2284 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2285 for(i=1; i<17; i++){
2286 h->left_border[i]= src_y[15+i* linesize];
2289 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2290 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2292 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2293 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2294 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2296 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2297 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2304 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2305 MpegEncContext * const s = &h->s;
2312 if(h->deblocking_filter == 2) {
2314 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2315 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2317 deblock_left = (s->mb_x > 0);
2318 deblock_top = (s->mb_y > 0);
2321 src_y -= linesize + 1;
2322 src_cb -= uvlinesize + 1;
2323 src_cr -= uvlinesize + 1;
2325 #define XCHG(a,b,t,xchg)\
2332 for(i = !deblock_top; i<17; i++){
2333 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2338 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2339 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2340 if(s->mb_x+1 < s->mb_width){
2341 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2345 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2347 for(i = !deblock_top; i<9; i++){
2348 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2349 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2353 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2359 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2360 MpegEncContext * const s = &h->s;
2363 src_y -= 2 * linesize;
2364 src_cb -= 2 * uvlinesize;
2365 src_cr -= 2 * uvlinesize;
2367 // There are two lines saved, the line above the the top macroblock of a pair,
2368 // and the line above the bottom macroblock
2369 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2370 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2371 for(i=2; i<34; i++){
2372 h->left_border[i]= src_y[15+i* linesize];
2375 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2376 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2377 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2378 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2380 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2381 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2382 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2383 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2384 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2385 for(i=2; i<18; i++){
2386 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2387 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2389 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2390 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2391 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2392 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2396 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2397 MpegEncContext * const s = &h->s;
2400 int deblock_left = (s->mb_x > 0);
2401 int deblock_top = (s->mb_y > 1);
2403 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2405 src_y -= 2 * linesize + 1;
2406 src_cb -= 2 * uvlinesize + 1;
2407 src_cr -= 2 * uvlinesize + 1;
2409 #define XCHG(a,b,t,xchg)\
2416 for(i = (!deblock_top)<<1; i<34; i++){
2417 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2422 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2425 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2426 if(s->mb_x+1 < s->mb_width){
2427 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2428 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2432 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2434 for(i = (!deblock_top) << 1; i<18; i++){
2435 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2436 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2440 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2441 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2442 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2443 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2448 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2449 MpegEncContext * const s = &h->s;
2450 const int mb_x= s->mb_x;
2451 const int mb_y= s->mb_y;
2452 const int mb_xy= h->mb_xy;
2453 const int mb_type= s->current_picture.mb_type[mb_xy];
2454 uint8_t *dest_y, *dest_cb, *dest_cr;
2455 int linesize, uvlinesize /*dct_offset*/;
2457 int *block_offset = &h->block_offset[0];
2458 const unsigned int bottom = mb_y & 1;
2459 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2460 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2461 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2463 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2464 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2465 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2467 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2468 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2470 if (!simple && MB_FIELD) {
2471 linesize = h->mb_linesize = s->linesize * 2;
2472 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2473 block_offset = &h->block_offset[24];
2474 if(mb_y&1){ //FIXME move out of this function?
2475 dest_y -= s->linesize*15;
2476 dest_cb-= s->uvlinesize*7;
2477 dest_cr-= s->uvlinesize*7;
2481 for(list=0; list<h->list_count; list++){
2482 if(!USES_LIST(mb_type, list))
2484 if(IS_16X16(mb_type)){
2485 int8_t *ref = &h->ref_cache[list][scan8[0]];
2486 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2488 for(i=0; i<16; i+=4){
2489 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2490 int ref = h->ref_cache[list][scan8[i]];
2492 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2498 linesize = h->mb_linesize = s->linesize;
2499 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2500 // dct_offset = s->linesize * 16;
2503 if(transform_bypass){
2505 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2506 }else if(IS_8x8DCT(mb_type)){
2507 idct_dc_add = s->dsp.h264_idct8_dc_add;
2508 idct_add = s->dsp.h264_idct8_add;
2510 idct_dc_add = s->dsp.h264_idct_dc_add;
2511 idct_add = s->dsp.h264_idct_add;
2514 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2515 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2516 int mbt_y = mb_y&~1;
2517 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2518 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2519 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2520 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2523 if (!simple && IS_INTRA_PCM(mb_type)) {
2526 // The pixels are stored in h->mb array in the same order as levels,
2527 // copy them in output in the correct order.
2528 for(i=0; i<16; i++) {
2529 for (y=0; y<4; y++) {
2530 for (x=0; x<4; x++) {
2531 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2535 for(i=16; i<16+4; i++) {
2536 for (y=0; y<4; y++) {
2537 for (x=0; x<4; x++) {
2538 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2542 for(i=20; i<20+4; i++) {
2543 for (y=0; y<4; y++) {
2544 for (x=0; x<4; x++) {
2545 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2550 if(IS_INTRA(mb_type)){
2551 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2552 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2554 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2555 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2556 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2559 if(IS_INTRA4x4(mb_type)){
2560 if(simple || !s->encoding){
2561 if(IS_8x8DCT(mb_type)){
2562 for(i=0; i<16; i+=4){
2563 uint8_t * const ptr= dest_y + block_offset[i];
2564 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2565 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2566 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2567 (h->topright_samples_available<<i)&0x4000, linesize);
2569 if(nnz == 1 && h->mb[i*16])
2570 idct_dc_add(ptr, h->mb + i*16, linesize);
2572 idct_add(ptr, h->mb + i*16, linesize);
2576 for(i=0; i<16; i++){
2577 uint8_t * const ptr= dest_y + block_offset[i];
2579 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2582 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2583 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2584 assert(mb_y || linesize <= block_offset[i]);
2585 if(!topright_avail){
2586 tr= ptr[3 - linesize]*0x01010101;
2587 topright= (uint8_t*) &tr;
2589 topright= ptr + 4 - linesize;
2593 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2594 nnz = h->non_zero_count_cache[ scan8[i] ];
2597 if(nnz == 1 && h->mb[i*16])
2598 idct_dc_add(ptr, h->mb + i*16, linesize);
2600 idct_add(ptr, h->mb + i*16, linesize);
2602 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2607 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2609 if(!transform_bypass)
2610 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2612 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2614 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2615 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2617 hl_motion(h, dest_y, dest_cb, dest_cr,
2618 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2619 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2620 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2624 if(!IS_INTRA4x4(mb_type)){
2626 if(IS_INTRA16x16(mb_type)){
2627 for(i=0; i<16; i++){
2628 if(h->non_zero_count_cache[ scan8[i] ])
2629 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2630 else if(h->mb[i*16])
2631 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2634 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2635 for(i=0; i<16; i+=di){
2636 int nnz = h->non_zero_count_cache[ scan8[i] ];
2638 if(nnz==1 && h->mb[i*16])
2639 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2641 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2646 for(i=0; i<16; i++){
2647 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2648 uint8_t * const ptr= dest_y + block_offset[i];
2649 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2655 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2656 uint8_t *dest[2] = {dest_cb, dest_cr};
2657 if(transform_bypass){
2658 idct_add = idct_dc_add = s->dsp.add_pixels4;
2660 idct_add = s->dsp.h264_idct_add;
2661 idct_dc_add = s->dsp.h264_idct_dc_add;
2662 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2663 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2666 for(i=16; i<16+8; i++){
2667 if(h->non_zero_count_cache[ scan8[i] ])
2668 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2669 else if(h->mb[i*16])
2670 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2673 for(i=16; i<16+8; i++){
2674 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2675 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2676 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2682 if(h->deblocking_filter) {
2683 if (!simple && FRAME_MBAFF) {
2684 //FIXME try deblocking one mb at a time?
2685 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2686 const int mb_y = s->mb_y - 1;
2687 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2688 const int mb_xy= mb_x + mb_y*s->mb_stride;
2689 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2690 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2691 if (!bottom) return;
2692 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2693 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2694 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2696 if(IS_INTRA(mb_type_top | mb_type_bottom))
2697 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2699 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2702 s->mb_y--; h->mb_xy -= s->mb_stride;
2703 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2704 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2705 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2706 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2707 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2709 s->mb_y++; h->mb_xy += s->mb_stride;
2710 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2711 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2712 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2713 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2714 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2716 tprintf(h->s.avctx, "call filter_mb\n");
2717 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2718 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2719 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2720 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2721 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2727 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2729 static void hl_decode_mb_simple(H264Context *h){
2730 hl_decode_mb_internal(h, 1);
2734 * Process a macroblock; this handles edge cases, such as interlacing.
2736 static void av_noinline hl_decode_mb_complex(H264Context *h){
2737 hl_decode_mb_internal(h, 0);
2740 static void hl_decode_mb(H264Context *h){
2741 MpegEncContext * const s = &h->s;
2742 const int mb_xy= h->mb_xy;
2743 const int mb_type= s->current_picture.mb_type[mb_xy];
2744 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2745 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2747 if(ENABLE_H264_ENCODER && !s->decode)
2751 hl_decode_mb_complex(h);
2752 else hl_decode_mb_simple(h);
2755 static void pic_as_field(Picture *pic, const int parity){
2757 for (i = 0; i < 4; ++i) {
2758 if (parity == PICT_BOTTOM_FIELD)
2759 pic->data[i] += pic->linesize[i];
2760 pic->reference = parity;
2761 pic->linesize[i] *= 2;
2765 static int split_field_copy(Picture *dest, Picture *src,
2766 int parity, int id_add){
2767 int match = !!(src->reference & parity);
2771 pic_as_field(dest, parity);
2773 dest->pic_id += id_add;
2780 * Split one reference list into field parts, interleaving by parity
2781 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2782 * set to look at the actual start of data for that field.
2784 * @param dest output list
2785 * @param dest_len maximum number of fields to put in dest
2786 * @param src the source reference list containing fields and/or field pairs
2787 * (aka short_ref/long_ref, or
2788 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2789 * @param src_len number of Picture's in source (pairs and unmatched fields)
2790 * @param parity the parity of the picture being decoded/needing
2791 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2792 * @return number of fields placed in dest
2794 static int split_field_half_ref_list(Picture *dest, int dest_len,
2795 Picture *src, int src_len, int parity){
2796 int same_parity = 1;
2802 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2803 if (same_parity && same_i < src_len) {
2804 field_output = split_field_copy(dest + out_i, src + same_i,
2806 same_parity = !field_output;
2809 } else if (opp_i < src_len) {
2810 field_output = split_field_copy(dest + out_i, src + opp_i,
2811 PICT_FRAME - parity, 0);
2812 same_parity = field_output;
2824 * Split the reference frame list into a reference field list.
2825 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2826 * The input list contains both reference field pairs and
2827 * unmatched reference fields; it is ordered as spec describes
2828 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2829 * unmatched field pairs are also present. Conceptually this is equivalent
2830 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2832 * @param dest output reference list where ordered fields are to be placed
2833 * @param dest_len max number of fields to place at dest
2834 * @param src source reference list, as described above
2835 * @param src_len number of pictures (pairs and unmatched fields) in src
2836 * @param parity parity of field being currently decoded
2837 * (one of PICT_{TOP,BOTTOM}_FIELD)
2838 * @param long_i index into src array that holds first long reference picture,
2839 * or src_len if no long refs present.
2841 static int split_field_ref_list(Picture *dest, int dest_len,
2842 Picture *src, int src_len,
2843 int parity, int long_i){
2845 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2849 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2850 src_len - long_i, parity);
2855 * fills the default_ref_list.
2857 static int fill_default_ref_list(H264Context *h){
2858 MpegEncContext * const s = &h->s;
2860 int smallest_poc_greater_than_current = -1;
2862 Picture sorted_short_ref[32];
2863 Picture field_entry_list[2][32];
2864 Picture *frame_list[2];
2866 if (FIELD_PICTURE) {
2867 structure_sel = PICT_FRAME;
2868 frame_list[0] = field_entry_list[0];
2869 frame_list[1] = field_entry_list[1];
2872 frame_list[0] = h->default_ref_list[0];
2873 frame_list[1] = h->default_ref_list[1];
2876 if(h->slice_type_nos==FF_B_TYPE){
2883 /* sort frame according to POC in B slice */
2884 for(out_i=0; out_i<h->short_ref_count; out_i++){
2886 int best_poc=INT_MAX;
2888 for(i=0; i<h->short_ref_count; i++){
2889 const int poc= h->short_ref[i]->poc;
2890 if(poc > limit && poc < best_poc){
2896 assert(best_i != INT_MIN);
2899 sorted_short_ref[out_i]= *h->short_ref[best_i];
2900 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2901 if (-1 == smallest_poc_greater_than_current) {
2902 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2903 smallest_poc_greater_than_current = out_i;
2908 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2910 // find the largest POC
2911 for(list=0; list<2; list++){
2914 int step= list ? -1 : 1;
2916 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2918 while(j<0 || j>= h->short_ref_count){
2919 if(j != -99 && step == (list ? -1 : 1))
2922 j= smallest_poc_greater_than_current + (step>>1);
2924 sel = sorted_short_ref[j].reference | structure_sel;
2925 if(sel != PICT_FRAME) continue;
2926 frame_list[list][index ]= sorted_short_ref[j];
2927 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2929 short_len[list] = index;
2931 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2933 if(h->long_ref[i] == NULL) continue;
2934 sel = h->long_ref[i]->reference | structure_sel;
2935 if(sel != PICT_FRAME) continue;
2937 frame_list[ list ][index ]= *h->long_ref[i];
2938 frame_list[ list ][index++].pic_id= i;
2943 for(list=0; list<2; list++){
2945 len[list] = split_field_ref_list(h->default_ref_list[list],
2949 s->picture_structure,
2952 // swap the two first elements of L1 when L0 and L1 are identical
2953 if(list && len[0] > 1 && len[0] == len[1])
2954 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2956 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2960 if(len[list] < h->ref_count[ list ])
2961 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2968 for(i=0; i<h->short_ref_count; i++){
2970 sel = h->short_ref[i]->reference | structure_sel;
2971 if(sel != PICT_FRAME) continue;
2972 frame_list[0][index ]= *h->short_ref[i];
2973 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2976 for(i = 0; i < 16; i++){
2978 if(h->long_ref[i] == NULL) continue;
2979 sel = h->long_ref[i]->reference | structure_sel;
2980 if(sel != PICT_FRAME) continue;
2981 frame_list[0][index ]= *h->long_ref[i];
2982 frame_list[0][index++].pic_id= i;
2986 index = split_field_ref_list(h->default_ref_list[0],
2987 h->ref_count[0], frame_list[0],
2988 index, s->picture_structure,
2991 if(index < h->ref_count[0])
2992 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2995 for (i=0; i<h->ref_count[0]; i++) {
2996 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2998 if(h->slice_type_nos==FF_B_TYPE){
2999 for (i=0; i<h->ref_count[1]; i++) {
3000 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
3007 static void print_short_term(H264Context *h);
3008 static void print_long_term(H264Context *h);
3011 * Extract structure information about the picture described by pic_num in
3012 * the current decoding context (frame or field). Note that pic_num is
3013 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3014 * @param pic_num picture number for which to extract structure information
3015 * @param structure one of PICT_XXX describing structure of picture
3017 * @return frame number (short term) or long term index of picture
3018 * described by pic_num
3020 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3021 MpegEncContext * const s = &h->s;
3023 *structure = s->picture_structure;
3026 /* opposite field */
3027 *structure ^= PICT_FRAME;
3034 static int decode_ref_pic_list_reordering(H264Context *h){
3035 MpegEncContext * const s = &h->s;
3036 int list, index, pic_structure;
3038 print_short_term(h);
3040 if(h->slice_type_nos==FF_I_TYPE) return 0; //FIXME move before function
3042 for(list=0; list<h->list_count; list++){
3043 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3045 if(get_bits1(&s->gb)){
3046 int pred= h->curr_pic_num;
3048 for(index=0; ; index++){
3049 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3050 unsigned int pic_id;
3052 Picture *ref = NULL;
3054 if(reordering_of_pic_nums_idc==3)
3057 if(index >= h->ref_count[list]){
3058 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3062 if(reordering_of_pic_nums_idc<3){
3063 if(reordering_of_pic_nums_idc<2){
3064 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3067 if(abs_diff_pic_num > h->max_pic_num){
3068 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3072 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3073 else pred+= abs_diff_pic_num;
3074 pred &= h->max_pic_num - 1;
3076 frame_num = pic_num_extract(h, pred, &pic_structure);
3078 for(i= h->short_ref_count-1; i>=0; i--){
3079 ref = h->short_ref[i];
3080 assert(ref->reference);
3081 assert(!ref->long_ref);
3082 if(ref->data[0] != NULL &&
3083 ref->frame_num == frame_num &&
3084 (ref->reference & pic_structure) &&
3085 ref->long_ref == 0) // ignore non-existing pictures by testing data[0] pointer
3092 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3094 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3097 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3100 ref = h->long_ref[long_idx];
3101 assert(!(ref && !ref->reference));
3102 if(ref && (ref->reference & pic_structure)){
3103 ref->pic_id= pic_id;
3104 assert(ref->long_ref);
3112 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3113 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3115 for(i=index; i+1<h->ref_count[list]; i++){
3116 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3119 for(; i > index; i--){
3120 h->ref_list[list][i]= h->ref_list[list][i-1];
3122 h->ref_list[list][index]= *ref;
3124 pic_as_field(&h->ref_list[list][index], pic_structure);
3128 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3134 for(list=0; list<h->list_count; list++){
3135 for(index= 0; index < h->ref_count[list]; index++){
3136 if(!h->ref_list[list][index].data[0])
3137 h->ref_list[list][index]= s->current_picture;
3141 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3142 direct_dist_scale_factor(h);
3143 direct_ref_list_init(h);
3147 static void fill_mbaff_ref_list(H264Context *h){
3149 for(list=0; list<2; list++){ //FIXME try list_count
3150 for(i=0; i<h->ref_count[list]; i++){
3151 Picture *frame = &h->ref_list[list][i];
3152 Picture *field = &h->ref_list[list][16+2*i];
3155 field[0].linesize[j] <<= 1;
3156 field[0].reference = PICT_TOP_FIELD;
3157 field[1] = field[0];
3159 field[1].data[j] += frame->linesize[j];
3160 field[1].reference = PICT_BOTTOM_FIELD;
3162 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3163 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3165 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3166 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3170 for(j=0; j<h->ref_count[1]; j++){
3171 for(i=0; i<h->ref_count[0]; i++)
3172 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3173 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3174 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3178 static int pred_weight_table(H264Context *h){
3179 MpegEncContext * const s = &h->s;
3181 int luma_def, chroma_def;
3184 h->use_weight_chroma= 0;
3185 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3186 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3187 luma_def = 1<<h->luma_log2_weight_denom;
3188 chroma_def = 1<<h->chroma_log2_weight_denom;
3190 for(list=0; list<2; list++){
3191 for(i=0; i<h->ref_count[list]; i++){
3192 int luma_weight_flag, chroma_weight_flag;
3194 luma_weight_flag= get_bits1(&s->gb);
3195 if(luma_weight_flag){
3196 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3197 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3198 if( h->luma_weight[list][i] != luma_def
3199 || h->luma_offset[list][i] != 0)
3202 h->luma_weight[list][i]= luma_def;
3203 h->luma_offset[list][i]= 0;
3206 chroma_weight_flag= get_bits1(&s->gb);
3207 if(chroma_weight_flag){
3210 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3211 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3212 if( h->chroma_weight[list][i][j] != chroma_def
3213 || h->chroma_offset[list][i][j] != 0)
3214 h->use_weight_chroma= 1;
3219 h->chroma_weight[list][i][j]= chroma_def;
3220 h->chroma_offset[list][i][j]= 0;
3224 if(h->slice_type_nos != FF_B_TYPE) break;
3226 h->use_weight= h->use_weight || h->use_weight_chroma;
3230 static void implicit_weight_table(H264Context *h){
3231 MpegEncContext * const s = &h->s;
3233 int cur_poc = s->current_picture_ptr->poc;
3235 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3236 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3238 h->use_weight_chroma= 0;
3243 h->use_weight_chroma= 2;
3244 h->luma_log2_weight_denom= 5;
3245 h->chroma_log2_weight_denom= 5;
3247 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3248 int poc0 = h->ref_list[0][ref0].poc;
3249 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3250 int poc1 = h->ref_list[1][ref1].poc;
3251 int td = av_clip(poc1 - poc0, -128, 127);
3253 int tb = av_clip(cur_poc - poc0, -128, 127);
3254 int tx = (16384 + (FFABS(td) >> 1)) / td;
3255 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3256 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3257 h->implicit_weight[ref0][ref1] = 32;
3259 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3261 h->implicit_weight[ref0][ref1] = 32;
3267 * Mark a picture as no longer needed for reference. The refmask
3268 * argument allows unreferencing of individual fields or the whole frame.
3269 * If the picture becomes entirely unreferenced, but is being held for
3270 * display purposes, it is marked as such.
3271 * @param refmask mask of fields to unreference; the mask is bitwise
3272 * anded with the reference marking of pic
3273 * @return non-zero if pic becomes entirely unreferenced (except possibly
3274 * for display purposes) zero if one of the fields remains in
3277 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3279 if (pic->reference &= refmask) {
3282 for(i = 0; h->delayed_pic[i]; i++)
3283 if(pic == h->delayed_pic[i]){
3284 pic->reference=DELAYED_PIC_REF;
3292 * instantaneous decoder refresh.
3294 static void idr(H264Context *h){
3297 for(i=0; i<16; i++){
3298 if (h->long_ref[i] != NULL) {
3299 unreference_pic(h, h->long_ref[i], 0);
3300 h->long_ref[i]= NULL;
3303 h->long_ref_count=0;
3305 for(i=0; i<h->short_ref_count; i++){
3306 unreference_pic(h, h->short_ref[i], 0);
3307 h->short_ref[i]= NULL;
3309 h->short_ref_count=0;
3310 h->prev_frame_num= 0;
3311 h->prev_frame_num_offset= 0;
3316 /* forget old pics after a seek */
3317 static void flush_dpb(AVCodecContext *avctx){
3318 H264Context *h= avctx->priv_data;
3320 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3321 if(h->delayed_pic[i])
3322 h->delayed_pic[i]->reference= 0;
3323 h->delayed_pic[i]= NULL;
3325 h->outputed_poc= INT_MIN;
3327 if(h->s.current_picture_ptr)
3328 h->s.current_picture_ptr->reference= 0;
3329 h->s.first_field= 0;
3330 ff_mpeg_flush(avctx);
3334 * Find a Picture in the short term reference list by frame number.
3335 * @param frame_num frame number to search for
3336 * @param idx the index into h->short_ref where returned picture is found
3337 * undefined if no picture found.
3338 * @return pointer to the found picture, or NULL if no pic with the provided
3339 * frame number is found
3341 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3342 MpegEncContext * const s = &h->s;
3345 for(i=0; i<h->short_ref_count; i++){
3346 Picture *pic= h->short_ref[i];
3347 if(s->avctx->debug&FF_DEBUG_MMCO)
3348 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3349 if(pic->frame_num == frame_num) {
3358 * Remove a picture from the short term reference list by its index in
3359 * that list. This does no checking on the provided index; it is assumed
3360 * to be valid. Other list entries are shifted down.
3361 * @param i index into h->short_ref of picture to remove.
3363 static void remove_short_at_index(H264Context *h, int i){
3364 assert(i >= 0 && i < h->short_ref_count);
3365 h->short_ref[i]= NULL;
3366 if (--h->short_ref_count)
3367 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3372 * @return the removed picture or NULL if an error occurs
3374 static Picture * remove_short(H264Context *h, int frame_num){
3375 MpegEncContext * const s = &h->s;
3379 if(s->avctx->debug&FF_DEBUG_MMCO)
3380 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3382 pic = find_short(h, frame_num, &i);
3384 remove_short_at_index(h, i);
3390 * Remove a picture from the long term reference list by its index in
3391 * that list. This does no checking on the provided index; it is assumed
3392 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3393 * @param i index into h->long_ref of picture to remove.
3395 static void remove_long_at_index(H264Context *h, int i){
3396 h->long_ref[i]= NULL;
3397 h->long_ref_count--;
3402 * @return the removed picture or NULL if an error occurs
3404 static Picture * remove_long(H264Context *h, int i){
3407 pic= h->long_ref[i];
3409 remove_long_at_index(h, i);
3415 * print short term list
3417 static void print_short_term(H264Context *h) {
3419 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3420 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3421 for(i=0; i<h->short_ref_count; i++){
3422 Picture *pic= h->short_ref[i];
3423 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3429 * print long term list
3431 static void print_long_term(H264Context *h) {
3433 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3434 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3435 for(i = 0; i < 16; i++){
3436 Picture *pic= h->long_ref[i];
3438 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3445 * Executes the reference picture marking (memory management control operations).
3447 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3448 MpegEncContext * const s = &h->s;
3450 int current_ref_assigned=0;
3453 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3454 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3456 for(i=0; i<mmco_count; i++){
3457 int structure, frame_num;
3458 if(s->avctx->debug&FF_DEBUG_MMCO)
3459 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3461 switch(mmco[i].opcode){
3462 case MMCO_SHORT2UNUSED:
3463 if(s->avctx->debug&FF_DEBUG_MMCO)
3464 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3465 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3466 pic = find_short(h, frame_num, &j);
3468 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3469 remove_short_at_index(h, j);
3470 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3471 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3473 case MMCO_SHORT2LONG:
3474 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3475 h->long_ref[mmco[i].long_arg]->frame_num ==
3476 mmco[i].short_pic_num / 2) {
3477 /* do nothing, we've already moved this field pair. */
3479 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3481 pic= remove_long(h, mmco[i].long_arg);
3482 if(pic) unreference_pic(h, pic, 0);
3484 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3485 if (h->long_ref[ mmco[i].long_arg ]){
3486 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3487 h->long_ref_count++;
3491 case MMCO_LONG2UNUSED:
3492 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3493 pic = h->long_ref[j];
3495 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3496 remove_long_at_index(h, j);
3497 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3498 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3501 // Comment below left from previous code as it is an interresting note.
3502 /* First field in pair is in short term list or
3503 * at a different long term index.
3504 * This is not allowed; see 7.4.3, notes 2 and 3.
3505 * Report the problem and keep the pair where it is,
3506 * and mark this field valid.
3509 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3510 pic= remove_long(h, mmco[i].long_arg);
3511 if(pic) unreference_pic(h, pic, 0);
3513 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3514 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3515 h->long_ref_count++;
3518 s->current_picture_ptr->reference |= s->picture_structure;
3519 current_ref_assigned=1;
3521 case MMCO_SET_MAX_LONG:
3522 assert(mmco[i].long_arg <= 16);
3523 // just remove the long term which index is greater than new max
3524 for(j = mmco[i].long_arg; j<16; j++){
3525 pic = remove_long(h, j);
3526 if (pic) unreference_pic(h, pic, 0);
3530 while(h->short_ref_count){
3531 pic= remove_short(h, h->short_ref[0]->frame_num);
3532 if(pic) unreference_pic(h, pic, 0);
3534 for(j = 0; j < 16; j++) {
3535 pic= remove_long(h, j);
3536 if(pic) unreference_pic(h, pic, 0);
3538 s->current_picture_ptr->poc=
3539 s->current_picture_ptr->field_poc[0]=
3540 s->current_picture_ptr->field_poc[1]=
3544 s->current_picture_ptr->frame_num= 0;
3550 if (!current_ref_assigned && FIELD_PICTURE &&
3551 !s->first_field && s->current_picture_ptr->reference) {
3553 /* Second field of complementary field pair; the first field of
3554 * which is already referenced. If short referenced, it
3555 * should be first entry in short_ref. If not, it must exist
3556 * in long_ref; trying to put it on the short list here is an
3557 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3559 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3560 /* Just mark the second field valid */
3561 s->current_picture_ptr->reference = PICT_FRAME;
3562 } else if (s->current_picture_ptr->long_ref) {
3563 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3564 "assignment for second field "
3565 "in complementary field pair "
3566 "(first field is long term)\n");
3569 * First field in reference, but not in any sensible place on our
3570 * reference lists. This shouldn't happen unless reference
3571 * handling somewhere else is wrong.
3575 current_ref_assigned = 1;
3578 if(!current_ref_assigned){
3579 pic= remove_short(h, s->current_picture_ptr->frame_num);
3581 unreference_pic(h, pic, 0);
3582 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3585 if(h->short_ref_count)
3586 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3588 h->short_ref[0]= s->current_picture_ptr;
3589 h->short_ref[0]->long_ref=0;
3590 h->short_ref_count++;
3591 s->current_picture_ptr->reference |= s->picture_structure;
3594 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3596 /* We have too many reference frames, probably due to corrupted
3597 * stream. Need to discard one frame. Prevents overrun of the
3598 * short_ref and long_ref buffers.
3600 av_log(h->s.avctx, AV_LOG_ERROR,
3601 "number of reference frames exceeds max (probably "
3602 "corrupt input), discarding one\n");
3604 if (h->long_ref_count && !h->short_ref_count) {
3605 for (i = 0; i < 16; ++i)
3610 pic = h->long_ref[i];
3611 remove_long_at_index(h, i);
3613 pic = h->short_ref[h->short_ref_count - 1];
3614 remove_short_at_index(h, h->short_ref_count - 1);
3616 unreference_pic(h, pic, 0);
3619 print_short_term(h);
3624 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3625 MpegEncContext * const s = &h->s;
3629 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3630 s->broken_link= get_bits1(gb) -1;
3631 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3632 if(h->mmco[0].long_arg == -1)
3635 h->mmco[0].opcode= MMCO_LONG;
3639 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3640 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3641 MMCOOpcode opcode= get_ue_golomb(gb);
3643 h->mmco[i].opcode= opcode;
3644 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3645 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3646 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3647 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3651 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3652 unsigned int long_arg= get_ue_golomb(gb);
3653 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3654 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3657 h->mmco[i].long_arg= long_arg;
3660 if(opcode > (unsigned)MMCO_LONG){
3661 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3664 if(opcode == MMCO_END)
3669 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3671 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3672 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3673 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3674 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3676 if (FIELD_PICTURE) {
3677 h->mmco[0].short_pic_num *= 2;
3678 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3679 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3690 static int init_poc(H264Context *h){
3691 MpegEncContext * const s = &h->s;
3692 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3694 Picture *cur = s->current_picture_ptr;
3696 h->frame_num_offset= h->prev_frame_num_offset;
3697 if(h->frame_num < h->prev_frame_num)
3698 h->frame_num_offset += max_frame_num;
3700 if(h->sps.poc_type==0){
3701 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3703 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3704 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3705 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3706 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3708 h->poc_msb = h->prev_poc_msb;
3709 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3711 field_poc[1] = h->poc_msb + h->poc_lsb;
3712 if(s->picture_structure == PICT_FRAME)
3713 field_poc[1] += h->delta_poc_bottom;
3714 }else if(h->sps.poc_type==1){
3715 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3718 if(h->sps.poc_cycle_length != 0)
3719 abs_frame_num = h->frame_num_offset + h->frame_num;
3723 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3726 expected_delta_per_poc_cycle = 0;
3727 for(i=0; i < h->sps.poc_cycle_length; i++)
3728 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3730 if(abs_frame_num > 0){
3731 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3732 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3734 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3735 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3736 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3740 if(h->nal_ref_idc == 0)
3741 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3743 field_poc[0] = expectedpoc + h->delta_poc[0];
3744 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3746 if(s->picture_structure == PICT_FRAME)
3747 field_poc[1] += h->delta_poc[1];
3749 int poc= 2*(h->frame_num_offset + h->frame_num);
3758 if(s->picture_structure != PICT_BOTTOM_FIELD)
3759 s->current_picture_ptr->field_poc[0]= field_poc[0];
3760 if(s->picture_structure != PICT_TOP_FIELD)
3761 s->current_picture_ptr->field_poc[1]= field_poc[1];
3762 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3769 * initialize scan tables
3771 static void init_scan_tables(H264Context *h){
3772 MpegEncContext * const s = &h->s;
3774 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3775 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3776 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3778 for(i=0; i<16; i++){
3779 #define T(x) (x>>2) | ((x<<2) & 0xF)
3780 h->zigzag_scan[i] = T(zigzag_scan[i]);
3781 h-> field_scan[i] = T( field_scan[i]);
3785 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3786 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3787 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3788 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3789 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3791 for(i=0; i<64; i++){
3792 #define T(x) (x>>3) | ((x&7)<<3)
3793 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3794 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3795 h->field_scan8x8[i] = T(field_scan8x8[i]);
3796 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3800 if(h->sps.transform_bypass){ //FIXME same ugly
3801 h->zigzag_scan_q0 = zigzag_scan;
3802 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3803 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3804 h->field_scan_q0 = field_scan;
3805 h->field_scan8x8_q0 = field_scan8x8;
3806 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3808 h->zigzag_scan_q0 = h->zigzag_scan;
3809 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3810 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3811 h->field_scan_q0 = h->field_scan;
3812 h->field_scan8x8_q0 = h->field_scan8x8;
3813 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3818 * Replicates H264 "master" context to thread contexts.
3820 static void clone_slice(H264Context *dst, H264Context *src)
3822 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3823 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3824 dst->s.current_picture = src->s.current_picture;
3825 dst->s.linesize = src->s.linesize;
3826 dst->s.uvlinesize = src->s.uvlinesize;
3827 dst->s.first_field = src->s.first_field;
3829 dst->prev_poc_msb = src->prev_poc_msb;
3830 dst->prev_poc_lsb = src->prev_poc_lsb;
3831 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3832 dst->prev_frame_num = src->prev_frame_num;
3833 dst->short_ref_count = src->short_ref_count;
3835 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3836 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3837 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3838 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3840 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3841 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3845 * decodes a slice header.
3846 * This will also call MPV_common_init() and frame_start() as needed.
3848 * @param h h264context
3849 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3851 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3853 static int decode_slice_header(H264Context *h, H264Context *h0){
3854 MpegEncContext * const s = &h->s;
3855 MpegEncContext * const s0 = &h0->s;
3856 unsigned int first_mb_in_slice;
3857 unsigned int pps_id;
3858 int num_ref_idx_active_override_flag;
3859 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3860 unsigned int slice_type, tmp, i, j;
3861 int default_ref_list_done = 0;
3862 int last_pic_structure;
3864 s->dropable= h->nal_ref_idc == 0;
3866 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3867 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3868 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3870 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3871 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3874 first_mb_in_slice= get_ue_golomb(&s->gb);
3876 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3877 h0->current_slice = 0;
3878 if (!s0->first_field)
3879 s->current_picture_ptr= NULL;
3882 slice_type= get_ue_golomb(&s->gb);
3884 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3889 h->slice_type_fixed=1;
3891 h->slice_type_fixed=0;
3893 slice_type= slice_type_map[ slice_type ];
3894 if (slice_type == FF_I_TYPE
3895 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3896 default_ref_list_done = 1;
3898 h->slice_type= slice_type;
3899 h->slice_type_nos= slice_type & 3;
3901 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3902 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3903 av_log(h->s.avctx, AV_LOG_ERROR,
3904 "B picture before any references, skipping\n");
3908 pps_id= get_ue_golomb(&s->gb);
3909 if(pps_id>=MAX_PPS_COUNT){
3910 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3913 if(!h0->pps_buffers[pps_id]) {
3914 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3917 h->pps= *h0->pps_buffers[pps_id];
3919 if(!h0->sps_buffers[h->pps.sps_id]) {
3920 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3923 h->sps = *h0->sps_buffers[h->pps.sps_id];
3925 if(h == h0 && h->dequant_coeff_pps != pps_id){
3926 h->dequant_coeff_pps = pps_id;
3927 init_dequant_tables(h);
3930 s->mb_width= h->sps.mb_width;
3931 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3933 h->b_stride= s->mb_width*4;
3934 h->b8_stride= s->mb_width*2;
3936 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3937 if(h->sps.frame_mbs_only_flag)
3938 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3940 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3942 if (s->context_initialized
3943 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3945 return -1; // width / height changed during parallelized decoding
3949 if (!s->context_initialized) {
3951 return -1; // we cant (re-)initialize context during parallel decoding
3952 if (MPV_common_init(s) < 0)
3956 init_scan_tables(h);
3959 for(i = 1; i < s->avctx->thread_count; i++) {
3961 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3962 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3963 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3966 init_scan_tables(c);
3970 for(i = 0; i < s->avctx->thread_count; i++)
3971 if(context_init(h->thread_context[i]) < 0)
3974 s->avctx->width = s->width;
3975 s->avctx->height = s->height;
3976 s->avctx->sample_aspect_ratio= h->sps.sar;
3977 if(!s->avctx->sample_aspect_ratio.den)
3978 s->avctx->sample_aspect_ratio.den = 1;
3980 if(h->sps.timing_info_present_flag){
3981 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3982 if(h->x264_build > 0 && h->x264_build < 44)
3983 s->avctx->time_base.den *= 2;
3984 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3985 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3989 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3992 h->mb_aff_frame = 0;
3993 last_pic_structure = s0->picture_structure;
3994 if(h->sps.frame_mbs_only_flag){
3995 s->picture_structure= PICT_FRAME;
3997 if(get_bits1(&s->gb)) { //field_pic_flag
3998 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4000 s->picture_structure= PICT_FRAME;
4001 h->mb_aff_frame = h->sps.mb_aff;
4005 if(h0->current_slice == 0){
4006 while(h->frame_num != h->prev_frame_num &&
4007 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
4008 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
4010 h->prev_frame_num++;
4011 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
4012 s->current_picture_ptr->frame_num= h->prev_frame_num;
4013 execute_ref_pic_marking(h, NULL, 0);
4016 /* See if we have a decoded first field looking for a pair... */
4017 if (s0->first_field) {
4018 assert(s0->current_picture_ptr);
4019 assert(s0->current_picture_ptr->data[0]);
4020 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4022 /* figure out if we have a complementary field pair */
4023 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4025 * Previous field is unmatched. Don't display it, but let it
4026 * remain for reference if marked as such.
4028 s0->current_picture_ptr = NULL;
4029 s0->first_field = FIELD_PICTURE;
4032 if (h->nal_ref_idc &&
4033 s0->current_picture_ptr->reference &&
4034 s0->current_picture_ptr->frame_num != h->frame_num) {
4036 * This and previous field were reference, but had
4037 * different frame_nums. Consider this field first in
4038 * pair. Throw away previous field except for reference
4041 s0->first_field = 1;
4042 s0->current_picture_ptr = NULL;
4045 /* Second field in complementary pair */
4046 s0->first_field = 0;
4051 /* Frame or first field in a potentially complementary pair */
4052 assert(!s0->current_picture_ptr);
4053 s0->first_field = FIELD_PICTURE;
4056 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4057 s0->first_field = 0;
4064 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4066 assert(s->mb_num == s->mb_width * s->mb_height);
4067 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4068 first_mb_in_slice >= s->mb_num){
4069 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4072 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4073 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4074 if (s->picture_structure == PICT_BOTTOM_FIELD)
4075 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4076 assert(s->mb_y < s->mb_height);
4078 if(s->picture_structure==PICT_FRAME){
4079 h->curr_pic_num= h->frame_num;
4080 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4082 h->curr_pic_num= 2*h->frame_num + 1;
4083 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4086 if(h->nal_unit_type == NAL_IDR_SLICE){
4087 get_ue_golomb(&s->gb); /* idr_pic_id */
4090 if(h->sps.poc_type==0){
4091 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4093 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4094 h->delta_poc_bottom= get_se_golomb(&s->gb);
4098 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4099 h->delta_poc[0]= get_se_golomb(&s->gb);
4101 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4102 h->delta_poc[1]= get_se_golomb(&s->gb);
4107 if(h->pps.redundant_pic_cnt_present){
4108 h->redundant_pic_count= get_ue_golomb(&s->gb);
4111 //set defaults, might be overridden a few lines later
4112 h->ref_count[0]= h->pps.ref_count[0];
4113 h->ref_count[1]= h->pps.ref_count[1];
4115 if(h->slice_type_nos != FF_I_TYPE){
4116 if(h->slice_type_nos == FF_B_TYPE){
4117 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4119 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4121 if(num_ref_idx_active_override_flag){
4122 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4123 if(h->slice_type_nos==FF_B_TYPE)
4124 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4126 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4127 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4128 h->ref_count[0]= h->ref_count[1]= 1;
4132 if(h->slice_type_nos == FF_B_TYPE)
4139 if(!default_ref_list_done){
4140 fill_default_ref_list(h);
4143 if(decode_ref_pic_list_reordering(h) < 0)
4146 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4147 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4148 pred_weight_table(h);
4149 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4150 implicit_weight_table(h);
4155 decode_ref_pic_marking(h0, &s->gb);
4158 fill_mbaff_ref_list(h);
4160 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4161 tmp = get_ue_golomb(&s->gb);
4163 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4166 h->cabac_init_idc= tmp;
4169 h->last_qscale_diff = 0;
4170 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4172 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4176 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4177 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4178 //FIXME qscale / qp ... stuff
4179 if(h->slice_type == FF_SP_TYPE){
4180 get_bits1(&s->gb); /* sp_for_switch_flag */
4182 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4183 get_se_golomb(&s->gb); /* slice_qs_delta */
4186 h->deblocking_filter = 1;
4187 h->slice_alpha_c0_offset = 0;
4188 h->slice_beta_offset = 0;
4189 if( h->pps.deblocking_filter_parameters_present ) {
4190 tmp= get_ue_golomb(&s->gb);
4192 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4195 h->deblocking_filter= tmp;
4196 if(h->deblocking_filter < 2)
4197 h->deblocking_filter^= 1; // 1<->0
4199 if( h->deblocking_filter ) {
4200 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4201 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4205 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4206 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4207 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4208 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4209 h->deblocking_filter= 0;
4211 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4212 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4213 /* Cheat slightly for speed:
4214 Do not bother to deblock across slices. */
4215 h->deblocking_filter = 2;
4217 h0->max_contexts = 1;
4218 if(!h0->single_decode_warning) {
4219 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4220 h0->single_decode_warning = 1;
4223 return 1; // deblocking switched inside frame
4228 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4229 slice_group_change_cycle= get_bits(&s->gb, ?);
4232 h0->last_slice_type = slice_type;
4233 h->slice_num = ++h0->current_slice;
4236 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4240 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4241 +(h->ref_list[j][i].reference&3);
4244 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4245 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4247 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4248 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4250 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4252 av_get_pict_type_char(h->slice_type),
4253 pps_id, h->frame_num,
4254 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4255 h->ref_count[0], h->ref_count[1],
4257 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4259 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4260 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4270 static inline int get_level_prefix(GetBitContext *gb){
4274 OPEN_READER(re, gb);
4275 UPDATE_CACHE(re, gb);
4276 buf=GET_CACHE(re, gb);
4278 log= 32 - av_log2(buf);
4280 print_bin(buf>>(32-log), log);
4281 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4284 LAST_SKIP_BITS(re, gb, log);
4285 CLOSE_READER(re, gb);
4290 static inline int get_dct8x8_allowed(H264Context *h){
4293 if(!IS_SUB_8X8(h->sub_mb_type[i])
4294 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4301 * decodes a residual block.
4302 * @param n block index
4303 * @param scantable scantable
4304 * @param max_coeff number of coefficients in the block
4305 * @return <0 if an error occurred
4307 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4308 MpegEncContext * const s = &h->s;
4309 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4311 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4313 //FIXME put trailing_onex into the context
4315 if(n == CHROMA_DC_BLOCK_INDEX){
4316 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4317 total_coeff= coeff_token>>2;
4319 if(n == LUMA_DC_BLOCK_INDEX){
4320 total_coeff= pred_non_zero_count(h, 0);
4321 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4322 total_coeff= coeff_token>>2;
4324 total_coeff= pred_non_zero_count(h, n);
4325 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4326 total_coeff= coeff_token>>2;
4327 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4331 //FIXME set last_non_zero?
4335 if(total_coeff > (unsigned)max_coeff) {
4336 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4340 trailing_ones= coeff_token&3;
4341 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4342 assert(total_coeff<=16);
4344 for(i=0; i<trailing_ones; i++){
4345 level[i]= 1 - 2*get_bits1(gb);
4349 int level_code, mask;
4350 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4351 int prefix= get_level_prefix(gb);
4353 //first coefficient has suffix_length equal to 0 or 1
4354 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4356 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4358 level_code= (prefix<<suffix_length); //part
4359 }else if(prefix==14){
4361 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4363 level_code= prefix + get_bits(gb, 4); //part
4365 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4366 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4368 level_code += (1<<(prefix-3))-4096;
4371 if(trailing_ones < 3) level_code += 2;
4376 mask= -(level_code&1);
4377 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4380 //remaining coefficients have suffix_length > 0
4381 for(;i<total_coeff;i++) {
4382 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4383 prefix = get_level_prefix(gb);
4385 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4387 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4389 level_code += (1<<(prefix-3))-4096;
4391 mask= -(level_code&1);
4392 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4393 if(level_code > suffix_limit[suffix_length])
4398 if(total_coeff == max_coeff)
4401 if(n == CHROMA_DC_BLOCK_INDEX)
4402 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4404 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4407 coeff_num = zeros_left + total_coeff - 1;
4408 j = scantable[coeff_num];
4410 block[j] = level[0];
4411 for(i=1;i<total_coeff;i++) {
4414 else if(zeros_left < 7){
4415 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4417 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4419 zeros_left -= run_before;
4420 coeff_num -= 1 + run_before;
4421 j= scantable[ coeff_num ];
4426 block[j] = (level[0] * qmul[j] + 32)>>6;
4427 for(i=1;i<total_coeff;i++) {
4430 else if(zeros_left < 7){
4431 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4433 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4435 zeros_left -= run_before;
4436 coeff_num -= 1 + run_before;
4437 j= scantable[ coeff_num ];
4439 block[j]= (level[i] * qmul[j] + 32)>>6;
4444 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4451 static void predict_field_decoding_flag(H264Context *h){
4452 MpegEncContext * const s = &h->s;
4453 const int mb_xy= h->mb_xy;
4454 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4455 ? s->current_picture.mb_type[mb_xy-1]
4456 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4457 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4459 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4463 * decodes a P_SKIP or B_SKIP macroblock
4465 static void decode_mb_skip(H264Context *h){
4466 MpegEncContext * const s = &h->s;
4467 const int mb_xy= h->mb_xy;
4470 memset(h->non_zero_count[mb_xy], 0, 16);
4471 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4474 mb_type|= MB_TYPE_INTERLACED;
4476 if( h->slice_type_nos == FF_B_TYPE )
4478 // just for fill_caches. pred_direct_motion will set the real mb_type
4479 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4481 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4482 pred_direct_motion(h, &mb_type);
4483 mb_type|= MB_TYPE_SKIP;
4488 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4490 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4491 pred_pskip_motion(h, &mx, &my);
4492 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4493 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4496 write_back_motion(h, mb_type);
4497 s->current_picture.mb_type[mb_xy]= mb_type;
4498 s->current_picture.qscale_table[mb_xy]= s->qscale;
4499 h->slice_table[ mb_xy ]= h->slice_num;
4500 h->prev_mb_skipped= 1;
4504 * decodes a macroblock
4505 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4507 static int decode_mb_cavlc(H264Context *h){
4508 MpegEncContext * const s = &h->s;
4510 int partition_count;
4511 unsigned int mb_type, cbp;
4512 int dct8x8_allowed= h->pps.transform_8x8_mode;
4514 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4516 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4518 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4519 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4521 if(h->slice_type_nos != FF_I_TYPE){
4522 if(s->mb_skip_run==-1)
4523 s->mb_skip_run= get_ue_golomb(&s->gb);
4525 if (s->mb_skip_run--) {
4526 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4527 if(s->mb_skip_run==0)
4528 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4530 predict_field_decoding_flag(h);
4537 if( (s->mb_y&1) == 0 )
4538 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4540 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4542 h->prev_mb_skipped= 0;
4544 mb_type= get_ue_golomb(&s->gb);
4545 if(h->slice_type_nos == FF_B_TYPE){
4547 partition_count= b_mb_type_info[mb_type].partition_count;
4548 mb_type= b_mb_type_info[mb_type].type;
4551 goto decode_intra_mb;
4553 }else if(h->slice_type_nos == FF_P_TYPE){
4555 partition_count= p_mb_type_info[mb_type].partition_count;
4556 mb_type= p_mb_type_info[mb_type].type;
4559 goto decode_intra_mb;
4562 assert(h->slice_type_nos == FF_I_TYPE);
4563 if(h->slice_type == FF_SI_TYPE && mb_type)
4567 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4571 cbp= i_mb_type_info[mb_type].cbp;
4572 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4573 mb_type= i_mb_type_info[mb_type].type;
4577 mb_type |= MB_TYPE_INTERLACED;
4579 h->slice_table[ mb_xy ]= h->slice_num;
4581 if(IS_INTRA_PCM(mb_type)){
4584 // We assume these blocks are very rare so we do not optimize it.
4585 align_get_bits(&s->gb);
4587 // The pixels are stored in the same order as levels in h->mb array.
4588 for(y=0; y<16; y++){
4589 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4590 for(x=0; x<16; x++){
4591 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4592 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4596 const int index= 256 + 4*(y&3) + 32*(y>>2);
4598 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4599 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4603 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4605 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4606 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4610 // In deblocking, the quantizer is 0
4611 s->current_picture.qscale_table[mb_xy]= 0;
4612 // All coeffs are present
4613 memset(h->non_zero_count[mb_xy], 16, 16);
4615 s->current_picture.mb_type[mb_xy]= mb_type;
4620 h->ref_count[0] <<= 1;
4621 h->ref_count[1] <<= 1;
4624 fill_caches(h, mb_type, 0);
4627 if(IS_INTRA(mb_type)){
4629 // init_top_left_availability(h);
4630 if(IS_INTRA4x4(mb_type)){
4633 if(dct8x8_allowed && get_bits1(&s->gb)){
4634 mb_type |= MB_TYPE_8x8DCT;
4638 // fill_intra4x4_pred_table(h);
4639 for(i=0; i<16; i+=di){
4640 int mode= pred_intra_mode(h, i);
4642 if(!get_bits1(&s->gb)){
4643 const int rem_mode= get_bits(&s->gb, 3);
4644 mode = rem_mode + (rem_mode >= mode);
4648 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4650 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4652 write_back_intra_pred_mode(h);
4653 if( check_intra4x4_pred_mode(h) < 0)
4656 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4657 if(h->intra16x16_pred_mode < 0)
4661 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4664 h->chroma_pred_mode= pred_mode;
4665 }else if(partition_count==4){
4666 int i, j, sub_partition_count[4], list, ref[2][4];
4668 if(h->slice_type_nos == FF_B_TYPE){
4670 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4671 if(h->sub_mb_type[i] >=13){
4672 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4675 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4676 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4678 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4679 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4680 pred_direct_motion(h, &mb_type);
4681 h->ref_cache[0][scan8[4]] =
4682 h->ref_cache[1][scan8[4]] =
4683 h->ref_cache[0][scan8[12]] =
4684 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4687 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4689 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4690 if(h->sub_mb_type[i] >=4){
4691 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4694 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4695 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4699 for(list=0; list<h->list_count; list++){
4700 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4702 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4703 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4704 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4706 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4718 dct8x8_allowed = get_dct8x8_allowed(h);
4720 for(list=0; list<h->list_count; list++){
4722 if(IS_DIRECT(h->sub_mb_type[i])) {
4723 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4726 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4727 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4729 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4730 const int sub_mb_type= h->sub_mb_type[i];
4731 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4732 for(j=0; j<sub_partition_count[i]; j++){
4734 const int index= 4*i + block_width*j;
4735 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4736 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4737 mx += get_se_golomb(&s->gb);
4738 my += get_se_golomb(&s->gb);
4739 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4741 if(IS_SUB_8X8(sub_mb_type)){
4743 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4745 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4746 }else if(IS_SUB_8X4(sub_mb_type)){
4747 mv_cache[ 1 ][0]= mx;
4748 mv_cache[ 1 ][1]= my;
4749 }else if(IS_SUB_4X8(sub_mb_type)){
4750 mv_cache[ 8 ][0]= mx;
4751 mv_cache[ 8 ][1]= my;
4753 mv_cache[ 0 ][0]= mx;
4754 mv_cache[ 0 ][1]= my;
4757 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4763 }else if(IS_DIRECT(mb_type)){
4764 pred_direct_motion(h, &mb_type);
4765 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4767 int list, mx, my, i;
4768 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4769 if(IS_16X16(mb_type)){
4770 for(list=0; list<h->list_count; list++){
4772 if(IS_DIR(mb_type, 0, list)){
4773 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4774 if(val >= h->ref_count[list]){
4775 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4779 val= LIST_NOT_USED&0xFF;
4780 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4782 for(list=0; list<h->list_count; list++){
4784 if(IS_DIR(mb_type, 0, list)){
4785 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4786 mx += get_se_golomb(&s->gb);
4787 my += get_se_golomb(&s->gb);
4788 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4790 val= pack16to32(mx,my);
4793 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4796 else if(IS_16X8(mb_type)){
4797 for(list=0; list<h->list_count; list++){
4800 if(IS_DIR(mb_type, i, list)){
4801 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4802 if(val >= h->ref_count[list]){
4803 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4807 val= LIST_NOT_USED&0xFF;
4808 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4811 for(list=0; list<h->list_count; list++){
4814 if(IS_DIR(mb_type, i, list)){
4815 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4816 mx += get_se_golomb(&s->gb);
4817 my += get_se_golomb(&s->gb);
4818 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4820 val= pack16to32(mx,my);
4823 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4827 assert(IS_8X16(mb_type));
4828 for(list=0; list<h->list_count; list++){
4831 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4832 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4833 if(val >= h->ref_count[list]){
4834 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4838 val= LIST_NOT_USED&0xFF;
4839 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4842 for(list=0; list<h->list_count; list++){
4845 if(IS_DIR(mb_type, i, list)){
4846 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4847 mx += get_se_golomb(&s->gb);
4848 my += get_se_golomb(&s->gb);
4849 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4851 val= pack16to32(mx,my);
4854 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4860 if(IS_INTER(mb_type))
4861 write_back_motion(h, mb_type);
4863 if(!IS_INTRA16x16(mb_type)){
4864 cbp= get_ue_golomb(&s->gb);
4866 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4870 if(IS_INTRA4x4(mb_type))
4871 cbp= golomb_to_intra4x4_cbp[cbp];
4873 cbp= golomb_to_inter_cbp[cbp];
4877 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4878 if(get_bits1(&s->gb)){
4879 mb_type |= MB_TYPE_8x8DCT;
4880 h->cbp_table[mb_xy]= cbp;
4883 s->current_picture.mb_type[mb_xy]= mb_type;
4885 if(cbp || IS_INTRA16x16(mb_type)){
4886 int i8x8, i4x4, chroma_idx;
4888 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4889 const uint8_t *scan, *scan8x8, *dc_scan;
4891 // fill_non_zero_count_cache(h);
4893 if(IS_INTERLACED(mb_type)){
4894 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4895 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4896 dc_scan= luma_dc_field_scan;
4898 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4899 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4900 dc_scan= luma_dc_zigzag_scan;
4903 dquant= get_se_golomb(&s->gb);
4905 if( dquant > 25 || dquant < -26 ){
4906 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4910 s->qscale += dquant;
4911 if(((unsigned)s->qscale) > 51){
4912 if(s->qscale<0) s->qscale+= 52;
4913 else s->qscale-= 52;
4916 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4917 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4918 if(IS_INTRA16x16(mb_type)){
4919 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4920 return -1; //FIXME continue if partitioned and other return -1 too
4923 assert((cbp&15) == 0 || (cbp&15) == 15);
4926 for(i8x8=0; i8x8<4; i8x8++){
4927 for(i4x4=0; i4x4<4; i4x4++){
4928 const int index= i4x4 + 4*i8x8;
4929 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4935 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4938 for(i8x8=0; i8x8<4; i8x8++){
4939 if(cbp & (1<<i8x8)){
4940 if(IS_8x8DCT(mb_type)){
4941 DCTELEM *buf = &h->mb[64*i8x8];
4943 for(i4x4=0; i4x4<4; i4x4++){
4944 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4945 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4948 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4949 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4951 for(i4x4=0; i4x4<4; i4x4++){
4952 const int index= i4x4 + 4*i8x8;
4954 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4960 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4961 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4967 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4968 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4974 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4975 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4976 for(i4x4=0; i4x4<4; i4x4++){
4977 const int index= 16 + 4*chroma_idx + i4x4;
4978 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4984 uint8_t * const nnz= &h->non_zero_count_cache[0];
4985 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4986 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4989 uint8_t * const nnz= &h->non_zero_count_cache[0];
4990 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4991 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4992 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4994 s->current_picture.qscale_table[mb_xy]= s->qscale;
4995 write_back_non_zero_count(h);
4998 h->ref_count[0] >>= 1;
4999 h->ref_count[1] >>= 1;
5005 static int decode_cabac_field_decoding_flag(H264Context *h) {
5006 MpegEncContext * const s = &h->s;
5007 const int mb_x = s->mb_x;
5008 const int mb_y = s->mb_y & ~1;
5009 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5010 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5012 unsigned int ctx = 0;
5014 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5017 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5021 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5024 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5025 uint8_t *state= &h->cabac_state[ctx_base];
5029 MpegEncContext * const s = &h->s;
5030 const int mba_xy = h->left_mb_xy[0];
5031 const int mbb_xy = h->top_mb_xy;
5033 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5035 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5037 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5038 return 0; /* I4x4 */
5041 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5042 return 0; /* I4x4 */
5045 if( get_cabac_terminate( &h->cabac ) )
5046 return 25; /* PCM */
5048 mb_type = 1; /* I16x16 */
5049 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5050 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5051 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5052 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5053 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5057 static int decode_cabac_mb_type( H264Context *h ) {
5058 MpegEncContext * const s = &h->s;
5060 if( h->slice_type_nos == FF_I_TYPE ) {
5061 return decode_cabac_intra_mb_type(h, 3, 1);
5062 } else if( h->slice_type_nos == FF_P_TYPE ) {
5063 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5065 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5066 /* P_L0_D16x16, P_8x8 */
5067 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5069 /* P_L0_D8x16, P_L0_D16x8 */
5070 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5073 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5075 } else if( h->slice_type_nos == FF_B_TYPE ) {
5076 const int mba_xy = h->left_mb_xy[0];
5077 const int mbb_xy = h->top_mb_xy;
5081 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5083 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5086 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5087 return 0; /* B_Direct_16x16 */
5089 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5090 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5093 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5094 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5095 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5096 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5098 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5099 else if( bits == 13 ) {
5100 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5101 } else if( bits == 14 )
5102 return 11; /* B_L1_L0_8x16 */
5103 else if( bits == 15 )
5104 return 22; /* B_8x8 */
5106 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5107 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5109 /* TODO SI/SP frames? */
5114 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5115 MpegEncContext * const s = &h->s;
5119 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5120 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5123 && h->slice_table[mba_xy] == h->slice_num
5124 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5125 mba_xy += s->mb_stride;
5127 mbb_xy = mb_xy - s->mb_stride;
5129 && h->slice_table[mbb_xy] == h->slice_num
5130 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5131 mbb_xy -= s->mb_stride;
5133 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5135 int mb_xy = h->mb_xy;
5137 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5140 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5142 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5145 if( h->slice_type_nos == FF_B_TYPE )
5147 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5150 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5153 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5156 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5157 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5158 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5160 if( mode >= pred_mode )
5166 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5167 const int mba_xy = h->left_mb_xy[0];
5168 const int mbb_xy = h->top_mb_xy;
5172 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5173 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5176 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5179 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5182 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5184 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5190 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5191 int cbp_b, cbp_a, ctx, cbp = 0;
5193 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5194 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5196 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5197 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5198 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5199 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5200 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5201 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5202 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5203 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5206 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5210 cbp_a = (h->left_cbp>>4)&0x03;
5211 cbp_b = (h-> top_cbp>>4)&0x03;
5214 if( cbp_a > 0 ) ctx++;
5215 if( cbp_b > 0 ) ctx += 2;
5216 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5220 if( cbp_a == 2 ) ctx++;
5221 if( cbp_b == 2 ) ctx += 2;
5222 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5224 static int decode_cabac_mb_dqp( H264Context *h) {
5228 if( h->last_qscale_diff != 0 )
5231 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5237 if(val > 102) //prevent infinite loop
5244 return -(val + 1)/2;
5246 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5247 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5249 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5251 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5255 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5257 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5258 return 0; /* B_Direct_8x8 */
5259 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5260 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5262 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5263 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5264 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5267 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5268 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5272 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5273 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5276 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5277 int refa = h->ref_cache[list][scan8[n] - 1];
5278 int refb = h->ref_cache[list][scan8[n] - 8];
5282 if( h->slice_type_nos == FF_B_TYPE) {
5283 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5285 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5294 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5300 if(ref >= 32 /*h->ref_list[list]*/){
5301 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5302 return 0; //FIXME we should return -1 and check the return everywhere
5308 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5309 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5310 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5311 int ctxbase = (l == 0) ? 40 : 47;
5316 else if( amvd > 32 )
5321 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5326 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5334 while( get_cabac_bypass( &h->cabac ) ) {
5338 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5343 if( get_cabac_bypass( &h->cabac ) )
5347 return get_cabac_bypass_sign( &h->cabac, -mvd );
5350 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5356 nza = h->left_cbp&0x100;
5357 nzb = h-> top_cbp&0x100;
5359 nza = (h->left_cbp>>(6+idx))&0x01;
5360 nzb = (h-> top_cbp>>(6+idx))&0x01;
5364 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5365 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5367 assert(cat == 1 || cat == 2);
5368 nza = h->non_zero_count_cache[scan8[idx] - 1];
5369 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5379 return ctx + 4 * cat;
5382 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5383 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5384 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5385 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5386 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5389 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5390 static const int significant_coeff_flag_offset[2][6] = {
5391 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5392 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5394 static const int last_coeff_flag_offset[2][6] = {
5395 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5396 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5398 static const int coeff_abs_level_m1_offset[6] = {
5399 227+0, 227+10, 227+20, 227+30, 227+39, 426
5401 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5402 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5403 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5404 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5405 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5406 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5407 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5408 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5409 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5411 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5412 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5413 * map node ctx => cabac ctx for level=1 */
5414 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5415 /* map node ctx => cabac ctx for level>1 */
5416 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5417 static const uint8_t coeff_abs_level_transition[2][8] = {
5418 /* update node ctx after decoding a level=1 */
5419 { 1, 2, 3, 3, 4, 5, 6, 7 },
5420 /* update node ctx after decoding a level>1 */
5421 { 4, 4, 4, 4, 5, 6, 7, 7 }
5427 int coeff_count = 0;
5430 uint8_t *significant_coeff_ctx_base;
5431 uint8_t *last_coeff_ctx_base;
5432 uint8_t *abs_level_m1_ctx_base;
5435 #define CABAC_ON_STACK
5437 #ifdef CABAC_ON_STACK
5440 cc.range = h->cabac.range;
5441 cc.low = h->cabac.low;
5442 cc.bytestream= h->cabac.bytestream;
5444 #define CC &h->cabac
5448 /* cat: 0-> DC 16x16 n = 0
5449 * 1-> AC 16x16 n = luma4x4idx
5450 * 2-> Luma4x4 n = luma4x4idx
5451 * 3-> DC Chroma n = iCbCr
5452 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5453 * 5-> Luma8x8 n = 4 * luma8x8idx
5456 /* read coded block flag */
5457 if( is_dc || cat != 5 ) {
5458 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5461 h->non_zero_count_cache[scan8[16+n]] = 0;
5463 h->non_zero_count_cache[scan8[n]] = 0;
5466 #ifdef CABAC_ON_STACK
5467 h->cabac.range = cc.range ;
5468 h->cabac.low = cc.low ;
5469 h->cabac.bytestream= cc.bytestream;
5475 significant_coeff_ctx_base = h->cabac_state
5476 + significant_coeff_flag_offset[MB_FIELD][cat];
5477 last_coeff_ctx_base = h->cabac_state
5478 + last_coeff_flag_offset[MB_FIELD][cat];
5479 abs_level_m1_ctx_base = h->cabac_state
5480 + coeff_abs_level_m1_offset[cat];
5482 if( !is_dc && cat == 5 ) {
5483 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5484 for(last= 0; last < coefs; last++) { \
5485 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5486 if( get_cabac( CC, sig_ctx )) { \
5487 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5488 index[coeff_count++] = last; \
5489 if( get_cabac( CC, last_ctx ) ) { \
5495 if( last == max_coeff -1 ) {\
5496 index[coeff_count++] = last;\
5498 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5499 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5500 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5502 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5504 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5506 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5509 assert(coeff_count > 0);
5513 h->cbp_table[h->mb_xy] |= 0x100;
5515 h->cbp_table[h->mb_xy] |= 0x40 << n;
5518 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5520 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5522 assert( cat == 1 || cat == 2 );
5523 h->non_zero_count_cache[scan8[n]] = coeff_count;
5527 while( coeff_count-- ) {
5528 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5530 int j= scantable[index[coeff_count]];
5532 if( get_cabac( CC, ctx ) == 0 ) {
5533 node_ctx = coeff_abs_level_transition[0][node_ctx];
5535 block[j] = get_cabac_bypass_sign( CC, -1);
5537 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5541 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5542 node_ctx = coeff_abs_level_transition[1][node_ctx];
5544 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5548 if( coeff_abs >= 15 ) {
5550 while( get_cabac_bypass( CC ) ) {
5556 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5562 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5564 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5568 #ifdef CABAC_ON_STACK
5569 h->cabac.range = cc.range ;
5570 h->cabac.low = cc.low ;
5571 h->cabac.bytestream= cc.bytestream;
5576 #ifndef CONFIG_SMALL
5577 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5578 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5581 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5582 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5586 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5588 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5590 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5591 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5595 static inline void compute_mb_neighbors(H264Context *h)
5597 MpegEncContext * const s = &h->s;
5598 const int mb_xy = h->mb_xy;
5599 h->top_mb_xy = mb_xy - s->mb_stride;
5600 h->left_mb_xy[0] = mb_xy - 1;
5602 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5603 const int top_pair_xy = pair_xy - s->mb_stride;
5604 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5605 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5606 const int curr_mb_frame_flag = !MB_FIELD;
5607 const int bottom = (s->mb_y & 1);
5609 ? !curr_mb_frame_flag // bottom macroblock
5610 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5612 h->top_mb_xy -= s->mb_stride;
5614 if (left_mb_frame_flag != curr_mb_frame_flag) {
5615 h->left_mb_xy[0] = pair_xy - 1;
5617 } else if (FIELD_PICTURE) {
5618 h->top_mb_xy -= s->mb_stride;
5624 * decodes a macroblock
5625 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5627 static int decode_mb_cabac(H264Context *h) {
5628 MpegEncContext * const s = &h->s;
5630 int mb_type, partition_count, cbp = 0;
5631 int dct8x8_allowed= h->pps.transform_8x8_mode;
5633 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5635 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5637 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5638 if( h->slice_type_nos != FF_I_TYPE ) {
5640 /* a skipped mb needs the aff flag from the following mb */
5641 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5642 predict_field_decoding_flag(h);
5643 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5644 skip = h->next_mb_skipped;
5646 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5647 /* read skip flags */
5649 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5650 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5651 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5652 if(h->next_mb_skipped)
5653 predict_field_decoding_flag(h);
5655 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5660 h->cbp_table[mb_xy] = 0;
5661 h->chroma_pred_mode_table[mb_xy] = 0;
5662 h->last_qscale_diff = 0;
5669 if( (s->mb_y&1) == 0 )
5671 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5673 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5675 h->prev_mb_skipped = 0;
5677 compute_mb_neighbors(h);
5678 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5679 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5683 if( h->slice_type_nos == FF_B_TYPE ) {
5685 partition_count= b_mb_type_info[mb_type].partition_count;
5686 mb_type= b_mb_type_info[mb_type].type;
5689 goto decode_intra_mb;
5691 } else if( h->slice_type_nos == FF_P_TYPE ) {
5693 partition_count= p_mb_type_info[mb_type].partition_count;
5694 mb_type= p_mb_type_info[mb_type].type;
5697 goto decode_intra_mb;
5700 if(h->slice_type == FF_SI_TYPE && mb_type)
5702 assert(h->slice_type_nos == FF_I_TYPE);
5704 partition_count = 0;
5705 cbp= i_mb_type_info[mb_type].cbp;
5706 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5707 mb_type= i_mb_type_info[mb_type].type;
5710 mb_type |= MB_TYPE_INTERLACED;
5712 h->slice_table[ mb_xy ]= h->slice_num;
5714 if(IS_INTRA_PCM(mb_type)) {
5718 // We assume these blocks are very rare so we do not optimize it.
5719 // FIXME The two following lines get the bitstream position in the cabac
5720 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5721 ptr= h->cabac.bytestream;
5722 if(h->cabac.low&0x1) ptr--;
5724 if(h->cabac.low&0x1FF) ptr--;
5727 // The pixels are stored in the same order as levels in h->mb array.
5728 for(y=0; y<16; y++){
5729 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5730 for(x=0; x<16; x++){
5731 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5732 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5736 const int index= 256 + 4*(y&3) + 32*(y>>2);
5738 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5739 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5743 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5745 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5746 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5750 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5752 // All blocks are present
5753 h->cbp_table[mb_xy] = 0x1ef;
5754 h->chroma_pred_mode_table[mb_xy] = 0;
5755 // In deblocking, the quantizer is 0
5756 s->current_picture.qscale_table[mb_xy]= 0;
5757 // All coeffs are present
5758 memset(h->non_zero_count[mb_xy], 16, 16);
5759 s->current_picture.mb_type[mb_xy]= mb_type;
5760 h->last_qscale_diff = 0;
5765 h->ref_count[0] <<= 1;
5766 h->ref_count[1] <<= 1;
5769 fill_caches(h, mb_type, 0);
5771 if( IS_INTRA( mb_type ) ) {
5773 if( IS_INTRA4x4( mb_type ) ) {
5774 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5775 mb_type |= MB_TYPE_8x8DCT;
5776 for( i = 0; i < 16; i+=4 ) {
5777 int pred = pred_intra_mode( h, i );
5778 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5779 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5782 for( i = 0; i < 16; i++ ) {
5783 int pred = pred_intra_mode( h, i );
5784 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5786 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5789 write_back_intra_pred_mode(h);
5790 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5792 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5793 if( h->intra16x16_pred_mode < 0 ) return -1;
5795 h->chroma_pred_mode_table[mb_xy] =
5796 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5798 pred_mode= check_intra_pred_mode( h, pred_mode );
5799 if( pred_mode < 0 ) return -1;
5800 h->chroma_pred_mode= pred_mode;
5801 } else if( partition_count == 4 ) {
5802 int i, j, sub_partition_count[4], list, ref[2][4];
5804 if( h->slice_type_nos == FF_B_TYPE ) {
5805 for( i = 0; i < 4; i++ ) {
5806 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5807 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5808 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5810 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5811 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5812 pred_direct_motion(h, &mb_type);
5813 h->ref_cache[0][scan8[4]] =
5814 h->ref_cache[1][scan8[4]] =
5815 h->ref_cache[0][scan8[12]] =
5816 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5817 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5818 for( i = 0; i < 4; i++ )
5819 if( IS_DIRECT(h->sub_mb_type[i]) )
5820 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5824 for( i = 0; i < 4; i++ ) {
5825 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5826 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5827 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5831 for( list = 0; list < h->list_count; list++ ) {
5832 for( i = 0; i < 4; i++ ) {
5833 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5834 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5835 if( h->ref_count[list] > 1 )
5836 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5842 h->ref_cache[list][ scan8[4*i]+1 ]=
5843 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5848 dct8x8_allowed = get_dct8x8_allowed(h);
5850 for(list=0; list<h->list_count; list++){
5852 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5853 if(IS_DIRECT(h->sub_mb_type[i])){
5854 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5858 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5859 const int sub_mb_type= h->sub_mb_type[i];
5860 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5861 for(j=0; j<sub_partition_count[i]; j++){
5864 const int index= 4*i + block_width*j;
5865 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5866 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5867 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5869 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5870 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5871 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5873 if(IS_SUB_8X8(sub_mb_type)){
5875 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5877 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5880 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5882 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5883 }else if(IS_SUB_8X4(sub_mb_type)){
5884 mv_cache[ 1 ][0]= mx;
5885 mv_cache[ 1 ][1]= my;
5887 mvd_cache[ 1 ][0]= mx - mpx;
5888 mvd_cache[ 1 ][1]= my - mpy;
5889 }else if(IS_SUB_4X8(sub_mb_type)){
5890 mv_cache[ 8 ][0]= mx;
5891 mv_cache[ 8 ][1]= my;
5893 mvd_cache[ 8 ][0]= mx - mpx;
5894 mvd_cache[ 8 ][1]= my - mpy;
5896 mv_cache[ 0 ][0]= mx;
5897 mv_cache[ 0 ][1]= my;
5899 mvd_cache[ 0 ][0]= mx - mpx;
5900 mvd_cache[ 0 ][1]= my - mpy;
5903 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5904 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5905 p[0] = p[1] = p[8] = p[9] = 0;
5906 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5910 } else if( IS_DIRECT(mb_type) ) {
5911 pred_direct_motion(h, &mb_type);
5912 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5913 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5914 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5916 int list, mx, my, i, mpx, mpy;
5917 if(IS_16X16(mb_type)){
5918 for(list=0; list<h->list_count; list++){
5919 if(IS_DIR(mb_type, 0, list)){
5920 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5921 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5923 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5925 for(list=0; list<h->list_count; list++){
5926 if(IS_DIR(mb_type, 0, list)){
5927 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5929 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5930 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5931 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5933 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5934 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5936 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5939 else if(IS_16X8(mb_type)){
5940 for(list=0; list<h->list_count; list++){
5942 if(IS_DIR(mb_type, i, list)){
5943 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5944 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5946 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5949 for(list=0; list<h->list_count; list++){
5951 if(IS_DIR(mb_type, i, list)){
5952 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5953 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5954 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5955 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5957 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5958 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5960 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5961 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5966 assert(IS_8X16(mb_type));
5967 for(list=0; list<h->list_count; list++){
5969 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5970 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5971 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5973 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5976 for(list=0; list<h->list_count; list++){
5978 if(IS_DIR(mb_type, i, list)){
5979 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5980 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5981 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5983 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5984 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5985 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5987 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5988 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5995 if( IS_INTER( mb_type ) ) {
5996 h->chroma_pred_mode_table[mb_xy] = 0;
5997 write_back_motion( h, mb_type );
6000 if( !IS_INTRA16x16( mb_type ) ) {
6001 cbp = decode_cabac_mb_cbp_luma( h );
6002 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6005 h->cbp_table[mb_xy] = h->cbp = cbp;
6007 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6008 if( decode_cabac_mb_transform_size( h ) )
6009 mb_type |= MB_TYPE_8x8DCT;
6011 s->current_picture.mb_type[mb_xy]= mb_type;
6013 if( cbp || IS_INTRA16x16( mb_type ) ) {
6014 const uint8_t *scan, *scan8x8, *dc_scan;
6015 const uint32_t *qmul;
6018 if(IS_INTERLACED(mb_type)){
6019 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6020 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6021 dc_scan= luma_dc_field_scan;
6023 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6024 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6025 dc_scan= luma_dc_zigzag_scan;
6028 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6029 if( dqp == INT_MIN ){
6030 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6034 if(((unsigned)s->qscale) > 51){
6035 if(s->qscale<0) s->qscale+= 52;
6036 else s->qscale-= 52;
6038 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6039 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6041 if( IS_INTRA16x16( mb_type ) ) {
6043 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6044 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6047 qmul = h->dequant4_coeff[0][s->qscale];
6048 for( i = 0; i < 16; i++ ) {
6049 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6050 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6053 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6057 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6058 if( cbp & (1<<i8x8) ) {
6059 if( IS_8x8DCT(mb_type) ) {
6060 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6061 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6063 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6064 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6065 const int index = 4*i8x8 + i4x4;
6066 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6068 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6069 //STOP_TIMER("decode_residual")
6073 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6074 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6081 for( c = 0; c < 2; c++ ) {
6082 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6083 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6089 for( c = 0; c < 2; c++ ) {
6090 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6091 for( i = 0; i < 4; i++ ) {
6092 const int index = 16 + 4 * c + i;
6093 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6094 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6098 uint8_t * const nnz= &h->non_zero_count_cache[0];
6099 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6100 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6103 uint8_t * const nnz= &h->non_zero_count_cache[0];
6104 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6105 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6106 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6107 h->last_qscale_diff = 0;
6110 s->current_picture.qscale_table[mb_xy]= s->qscale;
6111 write_back_non_zero_count(h);
6114 h->ref_count[0] >>= 1;
6115 h->ref_count[1] >>= 1;
6122 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6124 const int index_a = qp + h->slice_alpha_c0_offset;
6125 const int alpha = (alpha_table+52)[index_a];
6126 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6131 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6132 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6134 /* 16px edge length, because bS=4 is triggered by being at
6135 * the edge of an intra MB, so all 4 bS are the same */
6136 for( d = 0; d < 16; d++ ) {
6137 const int p0 = pix[-1];
6138 const int p1 = pix[-2];
6139 const int p2 = pix[-3];
6141 const int q0 = pix[0];
6142 const int q1 = pix[1];
6143 const int q2 = pix[2];
6145 if( FFABS( p0 - q0 ) < alpha &&
6146 FFABS( p1 - p0 ) < beta &&
6147 FFABS( q1 - q0 ) < beta ) {
6149 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6150 if( FFABS( p2 - p0 ) < beta)
6152 const int p3 = pix[-4];
6154 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6155 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6156 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6159 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6161 if( FFABS( q2 - q0 ) < beta)
6163 const int q3 = pix[3];
6165 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6166 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6167 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6170 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6174 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6175 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6177 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6183 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6185 const int index_a = qp + h->slice_alpha_c0_offset;
6186 const int alpha = (alpha_table+52)[index_a];
6187 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6192 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6193 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6195 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6199 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6201 for( i = 0; i < 16; i++, pix += stride) {
6207 int bS_index = (i >> 1);
6210 bS_index |= (i & 1);
6213 if( bS[bS_index] == 0 ) {
6217 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6218 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6219 alpha = (alpha_table+52)[index_a];
6220 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6222 if( bS[bS_index] < 4 ) {
6223 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6224 const int p0 = pix[-1];
6225 const int p1 = pix[-2];
6226 const int p2 = pix[-3];
6227 const int q0 = pix[0];
6228 const int q1 = pix[1];
6229 const int q2 = pix[2];
6231 if( FFABS( p0 - q0 ) < alpha &&
6232 FFABS( p1 - p0 ) < beta &&
6233 FFABS( q1 - q0 ) < beta ) {
6237 if( FFABS( p2 - p0 ) < beta ) {
6238 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6241 if( FFABS( q2 - q0 ) < beta ) {
6242 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6246 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6247 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6248 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6249 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6252 const int p0 = pix[-1];
6253 const int p1 = pix[-2];
6254 const int p2 = pix[-3];
6256 const int q0 = pix[0];
6257 const int q1 = pix[1];
6258 const int q2 = pix[2];
6260 if( FFABS( p0 - q0 ) < alpha &&
6261 FFABS( p1 - p0 ) < beta &&
6262 FFABS( q1 - q0 ) < beta ) {
6264 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6265 if( FFABS( p2 - p0 ) < beta)
6267 const int p3 = pix[-4];
6269 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6270 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6271 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6274 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6276 if( FFABS( q2 - q0 ) < beta)
6278 const int q3 = pix[3];
6280 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6281 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6282 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6285 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6289 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6290 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6292 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6297 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6299 for( i = 0; i < 8; i++, pix += stride) {
6307 if( bS[bS_index] == 0 ) {
6311 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6312 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6313 alpha = (alpha_table+52)[index_a];
6314 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6316 if( bS[bS_index] < 4 ) {
6317 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6318 const int p0 = pix[-1];
6319 const int p1 = pix[-2];
6320 const int q0 = pix[0];
6321 const int q1 = pix[1];
6323 if( FFABS( p0 - q0 ) < alpha &&
6324 FFABS( p1 - p0 ) < beta &&
6325 FFABS( q1 - q0 ) < beta ) {
6326 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6328 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6329 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6330 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6333 const int p0 = pix[-1];
6334 const int p1 = pix[-2];
6335 const int q0 = pix[0];
6336 const int q1 = pix[1];
6338 if( FFABS( p0 - q0 ) < alpha &&
6339 FFABS( p1 - p0 ) < beta &&
6340 FFABS( q1 - q0 ) < beta ) {
6342 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6343 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6344 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6350 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6352 const int index_a = qp + h->slice_alpha_c0_offset;
6353 const int alpha = (alpha_table+52)[index_a];
6354 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6355 const int pix_next = stride;
6360 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6361 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6363 /* 16px edge length, see filter_mb_edgev */
6364 for( d = 0; d < 16; d++ ) {
6365 const int p0 = pix[-1*pix_next];
6366 const int p1 = pix[-2*pix_next];
6367 const int p2 = pix[-3*pix_next];
6368 const int q0 = pix[0];
6369 const int q1 = pix[1*pix_next];
6370 const int q2 = pix[2*pix_next];
6372 if( FFABS( p0 - q0 ) < alpha &&
6373 FFABS( p1 - p0 ) < beta &&
6374 FFABS( q1 - q0 ) < beta ) {
6376 const int p3 = pix[-4*pix_next];
6377 const int q3 = pix[ 3*pix_next];
6379 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6380 if( FFABS( p2 - p0 ) < beta) {
6382 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6383 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6384 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6387 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6389 if( FFABS( q2 - q0 ) < beta) {
6391 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6392 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6393 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6396 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6400 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6401 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6403 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6410 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6412 const int index_a = qp + h->slice_alpha_c0_offset;
6413 const int alpha = (alpha_table+52)[index_a];
6414 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6419 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6420 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6422 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6426 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6427 MpegEncContext * const s = &h->s;
6428 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6430 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6434 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6436 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6437 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6438 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6441 assert(!FRAME_MBAFF);
6443 mb_type = s->current_picture.mb_type[mb_xy];
6444 qp = s->current_picture.qscale_table[mb_xy];
6445 qp0 = s->current_picture.qscale_table[mb_xy-1];
6446 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6447 qpc = get_chroma_qp( h, 0, qp );
6448 qpc0 = get_chroma_qp( h, 0, qp0 );
6449 qpc1 = get_chroma_qp( h, 0, qp1 );
6450 qp0 = (qp + qp0 + 1) >> 1;
6451 qp1 = (qp + qp1 + 1) >> 1;
6452 qpc0 = (qpc + qpc0 + 1) >> 1;
6453 qpc1 = (qpc + qpc1 + 1) >> 1;
6454 qp_thresh = 15 - h->slice_alpha_c0_offset;
6455 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6456 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6459 if( IS_INTRA(mb_type) ) {
6460 int16_t bS4[4] = {4,4,4,4};
6461 int16_t bS3[4] = {3,3,3,3};
6462 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6463 if( IS_8x8DCT(mb_type) ) {
6464 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6465 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6466 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6467 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6469 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6470 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6471 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6472 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6473 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6474 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6475 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6476 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6478 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6479 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6480 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6481 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6482 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6483 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6484 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6485 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6488 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6489 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6491 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6493 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6495 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6496 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6497 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6498 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6500 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6501 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6502 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6503 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6505 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6506 bSv[0][0] = 0x0004000400040004ULL;
6507 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6508 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6510 #define FILTER(hv,dir,edge)\
6511 if(bSv[dir][edge]) {\
6512 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6514 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6515 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6521 } else if( IS_8x8DCT(mb_type) ) {
6540 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6541 MpegEncContext * const s = &h->s;
6542 const int mb_xy= mb_x + mb_y*s->mb_stride;
6543 const int mb_type = s->current_picture.mb_type[mb_xy];
6544 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6545 int first_vertical_edge_done = 0;
6548 //for sufficiently low qp, filtering wouldn't do anything
6549 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6551 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6552 int qp = s->current_picture.qscale_table[mb_xy];
6554 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6555 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6560 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6561 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6562 int top_type, left_type[2];
6563 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6564 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6565 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6567 if(IS_8x8DCT(top_type)){
6568 h->non_zero_count_cache[4+8*0]=
6569 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6570 h->non_zero_count_cache[6+8*0]=
6571 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6573 if(IS_8x8DCT(left_type[0])){
6574 h->non_zero_count_cache[3+8*1]=
6575 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6577 if(IS_8x8DCT(left_type[1])){
6578 h->non_zero_count_cache[3+8*3]=
6579 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6582 if(IS_8x8DCT(mb_type)){
6583 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6584 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6586 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6587 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6589 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6590 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6592 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6593 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6598 // left mb is in picture
6599 && h->slice_table[mb_xy-1] != 255
6600 // and current and left pair do not have the same interlaced type
6601 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6602 // and left mb is in the same slice if deblocking_filter == 2
6603 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6604 /* First vertical edge is different in MBAFF frames
6605 * There are 8 different bS to compute and 2 different Qp
6607 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6608 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6613 int mb_qp, mbn0_qp, mbn1_qp;
6615 first_vertical_edge_done = 1;
6617 if( IS_INTRA(mb_type) )
6618 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6620 for( i = 0; i < 8; i++ ) {
6621 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6623 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6625 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6626 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6627 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6634 mb_qp = s->current_picture.qscale_table[mb_xy];
6635 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6636 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6637 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6638 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6639 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6640 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6641 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6642 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6643 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6644 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6645 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6646 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6649 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6650 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6651 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6652 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6653 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6655 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6656 for( dir = 0; dir < 2; dir++ )
6659 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6660 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6661 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6662 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6663 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6665 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6666 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6667 // how often to recheck mv-based bS when iterating between edges
6668 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6669 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6670 // how often to recheck mv-based bS when iterating along each edge
6671 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6673 if (first_vertical_edge_done) {
6675 first_vertical_edge_done = 0;
6678 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6681 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6682 && !IS_INTERLACED(mb_type)
6683 && IS_INTERLACED(mbm_type)
6685 // This is a special case in the norm where the filtering must
6686 // be done twice (one each of the field) even if we are in a
6687 // frame macroblock.
6689 static const int nnz_idx[4] = {4,5,6,3};
6690 unsigned int tmp_linesize = 2 * linesize;
6691 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6692 int mbn_xy = mb_xy - 2 * s->mb_stride;
6697 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6698 if( IS_INTRA(mb_type) ||
6699 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6700 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6702 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6703 for( i = 0; i < 4; i++ ) {
6704 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6705 mbn_nnz[nnz_idx[i]] != 0 )
6711 // Do not use s->qscale as luma quantizer because it has not the same
6712 // value in IPCM macroblocks.
6713 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6714 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6715 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6716 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6717 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6718 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6719 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6720 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6727 for( edge = start; edge < edges; edge++ ) {
6728 /* mbn_xy: neighbor macroblock */
6729 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6730 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6731 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6735 if( (edge&1) && IS_8x8DCT(mb_type) )
6738 if( IS_INTRA(mb_type) ||
6739 IS_INTRA(mbn_type) ) {
6742 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6743 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6752 bS[0] = bS[1] = bS[2] = bS[3] = value;
6757 if( edge & mask_edge ) {
6758 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6761 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6762 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6765 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6766 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6767 int bn_idx= b_idx - (dir ? 8:1);
6770 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6771 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6772 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6773 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6776 if(h->slice_type_nos == FF_B_TYPE && v){
6778 for( l = 0; !v && l < 2; l++ ) {
6780 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6781 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6782 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6786 bS[0] = bS[1] = bS[2] = bS[3] = v;
6792 for( i = 0; i < 4; i++ ) {
6793 int x = dir == 0 ? edge : i;
6794 int y = dir == 0 ? i : edge;
6795 int b_idx= 8 + 4 + x + 8*y;
6796 int bn_idx= b_idx - (dir ? 8:1);
6798 if( h->non_zero_count_cache[b_idx] != 0 ||
6799 h->non_zero_count_cache[bn_idx] != 0 ) {
6805 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6806 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6807 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6808 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6814 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6816 for( l = 0; l < 2; l++ ) {
6818 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6819 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6820 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6829 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6834 // Do not use s->qscale as luma quantizer because it has not the same
6835 // value in IPCM macroblocks.
6836 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6837 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6838 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6839 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6841 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6842 if( (edge&1) == 0 ) {
6843 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6844 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6845 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6846 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6849 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6850 if( (edge&1) == 0 ) {
6851 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6852 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6853 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6854 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6861 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6862 MpegEncContext * const s = &h->s;
6863 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6867 if( h->pps.cabac ) {
6871 align_get_bits( &s->gb );
6874 ff_init_cabac_states( &h->cabac);
6875 ff_init_cabac_decoder( &h->cabac,
6876 s->gb.buffer + get_bits_count(&s->gb)/8,
6877 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6878 /* calculate pre-state */
6879 for( i= 0; i < 460; i++ ) {
6881 if( h->slice_type_nos == FF_I_TYPE )
6882 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6884 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6887 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6889 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6894 int ret = decode_mb_cabac(h);
6896 //STOP_TIMER("decode_mb_cabac")
6898 if(ret>=0) hl_decode_mb(h);
6900 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6903 if(ret>=0) ret = decode_mb_cabac(h);
6905 if(ret>=0) hl_decode_mb(h);
6908 eos = get_cabac_terminate( &h->cabac );
6910 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6911 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6912 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6916 if( ++s->mb_x >= s->mb_width ) {
6918 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6920 if(FIELD_OR_MBAFF_PICTURE) {
6925 if( eos || s->mb_y >= s->mb_height ) {
6926 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6927 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6934 int ret = decode_mb_cavlc(h);
6936 if(ret>=0) hl_decode_mb(h);
6938 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6940 ret = decode_mb_cavlc(h);
6942 if(ret>=0) hl_decode_mb(h);
6947 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6948 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6953 if(++s->mb_x >= s->mb_width){
6955 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6957 if(FIELD_OR_MBAFF_PICTURE) {
6960 if(s->mb_y >= s->mb_height){
6961 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6963 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6964 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6968 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6975 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6976 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6977 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6978 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6982 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6991 for(;s->mb_y < s->mb_height; s->mb_y++){
6992 for(;s->mb_x < s->mb_width; s->mb_x++){
6993 int ret= decode_mb(h);
6998 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6999 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7004 if(++s->mb_x >= s->mb_width){
7006 if(++s->mb_y >= s->mb_height){
7007 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7008 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7012 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7019 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7020 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7021 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7025 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7032 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7035 return -1; //not reached
7038 static int decode_unregistered_user_data(H264Context *h, int size){
7039 MpegEncContext * const s = &h->s;
7040 uint8_t user_data[16+256];
7046 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7047 user_data[i]= get_bits(&s->gb, 8);
7051 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7052 if(e==1 && build>=0)
7053 h->x264_build= build;
7055 if(s->avctx->debug & FF_DEBUG_BUGS)
7056 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7059 skip_bits(&s->gb, 8);
7064 static int decode_sei(H264Context *h){
7065 MpegEncContext * const s = &h->s;
7067 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7072 type+= show_bits(&s->gb, 8);
7073 }while(get_bits(&s->gb, 8) == 255);
7077 size+= show_bits(&s->gb, 8);
7078 }while(get_bits(&s->gb, 8) == 255);
7082 if(decode_unregistered_user_data(h, size) < 0)
7086 skip_bits(&s->gb, 8*size);
7089 //FIXME check bits here
7090 align_get_bits(&s->gb);
7096 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7097 MpegEncContext * const s = &h->s;
7099 cpb_count = get_ue_golomb(&s->gb) + 1;
7100 get_bits(&s->gb, 4); /* bit_rate_scale */
7101 get_bits(&s->gb, 4); /* cpb_size_scale */
7102 for(i=0; i<cpb_count; i++){
7103 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7104 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7105 get_bits1(&s->gb); /* cbr_flag */
7107 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7108 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7109 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7110 get_bits(&s->gb, 5); /* time_offset_length */
7113 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7114 MpegEncContext * const s = &h->s;
7115 int aspect_ratio_info_present_flag;
7116 unsigned int aspect_ratio_idc;
7117 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7119 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7121 if( aspect_ratio_info_present_flag ) {
7122 aspect_ratio_idc= get_bits(&s->gb, 8);
7123 if( aspect_ratio_idc == EXTENDED_SAR ) {
7124 sps->sar.num= get_bits(&s->gb, 16);
7125 sps->sar.den= get_bits(&s->gb, 16);
7126 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7127 sps->sar= pixel_aspect[aspect_ratio_idc];
7129 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7136 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7138 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7139 get_bits1(&s->gb); /* overscan_appropriate_flag */
7142 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7143 get_bits(&s->gb, 3); /* video_format */
7144 get_bits1(&s->gb); /* video_full_range_flag */
7145 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7146 get_bits(&s->gb, 8); /* colour_primaries */
7147 get_bits(&s->gb, 8); /* transfer_characteristics */
7148 get_bits(&s->gb, 8); /* matrix_coefficients */
7152 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7153 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7154 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7157 sps->timing_info_present_flag = get_bits1(&s->gb);
7158 if(sps->timing_info_present_flag){
7159 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7160 sps->time_scale = get_bits_long(&s->gb, 32);
7161 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7164 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7165 if(nal_hrd_parameters_present_flag)
7166 decode_hrd_parameters(h, sps);
7167 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7168 if(vcl_hrd_parameters_present_flag)
7169 decode_hrd_parameters(h, sps);
7170 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7171 get_bits1(&s->gb); /* low_delay_hrd_flag */
7172 get_bits1(&s->gb); /* pic_struct_present_flag */
7174 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7175 if(sps->bitstream_restriction_flag){
7176 unsigned int num_reorder_frames;
7177 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7178 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7179 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7180 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7181 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7182 num_reorder_frames= get_ue_golomb(&s->gb);
7183 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7185 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7186 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7190 sps->num_reorder_frames= num_reorder_frames;
7196 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7197 const uint8_t *jvt_list, const uint8_t *fallback_list){
7198 MpegEncContext * const s = &h->s;
7199 int i, last = 8, next = 8;
7200 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7201 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7202 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7204 for(i=0;i<size;i++){
7206 next = (last + get_se_golomb(&s->gb)) & 0xff;
7207 if(!i && !next){ /* matrix not written, we use the preset one */
7208 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7211 last = factors[scan[i]] = next ? next : last;
7215 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7216 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7217 MpegEncContext * const s = &h->s;
7218 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7219 const uint8_t *fallback[4] = {
7220 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7221 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7222 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7223 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7225 if(get_bits1(&s->gb)){
7226 sps->scaling_matrix_present |= is_sps;
7227 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7228 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7229 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7230 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7231 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7232 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7233 if(is_sps || pps->transform_8x8_mode){
7234 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7235 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7237 } else if(fallback_sps) {
7238 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7239 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7244 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7247 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7248 const size_t size, const char *name)
7251 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7256 vec[id] = av_mallocz(size);
7258 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7263 static inline int decode_seq_parameter_set(H264Context *h){
7264 MpegEncContext * const s = &h->s;
7265 int profile_idc, level_idc;
7266 unsigned int sps_id, tmp, mb_width, mb_height;
7270 profile_idc= get_bits(&s->gb, 8);
7271 get_bits1(&s->gb); //constraint_set0_flag
7272 get_bits1(&s->gb); //constraint_set1_flag
7273 get_bits1(&s->gb); //constraint_set2_flag
7274 get_bits1(&s->gb); //constraint_set3_flag
7275 get_bits(&s->gb, 4); // reserved
7276 level_idc= get_bits(&s->gb, 8);
7277 sps_id= get_ue_golomb(&s->gb);
7279 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7283 sps->profile_idc= profile_idc;
7284 sps->level_idc= level_idc;
7286 if(sps->profile_idc >= 100){ //high profile
7287 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7288 get_bits1(&s->gb); //residual_color_transform_flag
7289 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7290 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7291 sps->transform_bypass = get_bits1(&s->gb);
7292 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7294 sps->scaling_matrix_present = 0;
7296 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7297 sps->poc_type= get_ue_golomb(&s->gb);
7299 if(sps->poc_type == 0){ //FIXME #define
7300 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7301 } else if(sps->poc_type == 1){//FIXME #define
7302 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7303 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7304 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7305 tmp= get_ue_golomb(&s->gb);
7307 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7308 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7311 sps->poc_cycle_length= tmp;
7313 for(i=0; i<sps->poc_cycle_length; i++)
7314 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7315 }else if(sps->poc_type != 2){
7316 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7320 tmp= get_ue_golomb(&s->gb);
7321 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7322 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7325 sps->ref_frame_count= tmp;
7326 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7327 mb_width= get_ue_golomb(&s->gb) + 1;
7328 mb_height= get_ue_golomb(&s->gb) + 1;
7329 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7330 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7331 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7334 sps->mb_width = mb_width;
7335 sps->mb_height= mb_height;
7337 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7338 if(!sps->frame_mbs_only_flag)
7339 sps->mb_aff= get_bits1(&s->gb);
7343 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7345 #ifndef ALLOW_INTERLACE
7347 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7349 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7350 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7352 sps->crop= get_bits1(&s->gb);
7354 sps->crop_left = get_ue_golomb(&s->gb);
7355 sps->crop_right = get_ue_golomb(&s->gb);
7356 sps->crop_top = get_ue_golomb(&s->gb);
7357 sps->crop_bottom= get_ue_golomb(&s->gb);
7358 if(sps->crop_left || sps->crop_top){
7359 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7361 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7362 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7368 sps->crop_bottom= 0;
7371 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7372 if( sps->vui_parameters_present_flag )
7373 decode_vui_parameters(h, sps);
7375 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7376 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7377 sps_id, sps->profile_idc, sps->level_idc,
7379 sps->ref_frame_count,
7380 sps->mb_width, sps->mb_height,
7381 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7382 sps->direct_8x8_inference_flag ? "8B8" : "",
7383 sps->crop_left, sps->crop_right,
7384 sps->crop_top, sps->crop_bottom,
7385 sps->vui_parameters_present_flag ? "VUI" : ""
7392 build_qp_table(PPS *pps, int t, int index)
7395 for(i = 0; i < 52; i++)
7396 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7399 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7400 MpegEncContext * const s = &h->s;
7401 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7404 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7408 tmp= get_ue_golomb(&s->gb);
7409 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7410 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7415 pps->cabac= get_bits1(&s->gb);
7416 pps->pic_order_present= get_bits1(&s->gb);
7417 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7418 if(pps->slice_group_count > 1 ){
7419 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7420 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7421 switch(pps->mb_slice_group_map_type){
7424 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7425 | run_length[ i ] |1 |ue(v) |
7430 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7432 | top_left_mb[ i ] |1 |ue(v) |
7433 | bottom_right_mb[ i ] |1 |ue(v) |
7441 | slice_group_change_direction_flag |1 |u(1) |
7442 | slice_group_change_rate_minus1 |1 |ue(v) |
7447 | slice_group_id_cnt_minus1 |1 |ue(v) |
7448 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7450 | slice_group_id[ i ] |1 |u(v) |
7455 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7456 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7457 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7458 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7459 pps->ref_count[0]= pps->ref_count[1]= 1;
7463 pps->weighted_pred= get_bits1(&s->gb);
7464 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7465 pps->init_qp= get_se_golomb(&s->gb) + 26;
7466 pps->init_qs= get_se_golomb(&s->gb) + 26;
7467 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7468 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7469 pps->constrained_intra_pred= get_bits1(&s->gb);
7470 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7472 pps->transform_8x8_mode= 0;
7473 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7474 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7475 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7477 if(get_bits_count(&s->gb) < bit_length){
7478 pps->transform_8x8_mode= get_bits1(&s->gb);
7479 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7480 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7482 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7485 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7486 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7487 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7488 h->pps.chroma_qp_diff= 1;
7490 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7491 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7492 pps_id, pps->sps_id,
7493 pps->cabac ? "CABAC" : "CAVLC",
7494 pps->slice_group_count,
7495 pps->ref_count[0], pps->ref_count[1],
7496 pps->weighted_pred ? "weighted" : "",
7497 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7498 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7499 pps->constrained_intra_pred ? "CONSTR" : "",
7500 pps->redundant_pic_cnt_present ? "REDU" : "",
7501 pps->transform_8x8_mode ? "8x8DCT" : ""
7509 * Call decode_slice() for each context.
7511 * @param h h264 master context
7512 * @param context_count number of contexts to execute
7514 static void execute_decode_slices(H264Context *h, int context_count){
7515 MpegEncContext * const s = &h->s;
7516 AVCodecContext * const avctx= s->avctx;
7520 if(context_count == 1) {
7521 decode_slice(avctx, h);
7523 for(i = 1; i < context_count; i++) {
7524 hx = h->thread_context[i];
7525 hx->s.error_resilience = avctx->error_resilience;
7526 hx->s.error_count = 0;
7529 avctx->execute(avctx, (void *)decode_slice,
7530 (void **)h->thread_context, NULL, context_count);
7532 /* pull back stuff from slices to master context */
7533 hx = h->thread_context[context_count - 1];
7534 s->mb_x = hx->s.mb_x;
7535 s->mb_y = hx->s.mb_y;
7536 s->dropable = hx->s.dropable;
7537 s->picture_structure = hx->s.picture_structure;
7538 for(i = 1; i < context_count; i++)
7539 h->s.error_count += h->thread_context[i]->s.error_count;
7544 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7545 MpegEncContext * const s = &h->s;
7546 AVCodecContext * const avctx= s->avctx;
7548 H264Context *hx; ///< thread context
7549 int context_count = 0;
7551 h->max_contexts = avctx->thread_count;
7554 for(i=0; i<50; i++){
7555 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7558 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7559 h->current_slice = 0;
7560 if (!s->first_field)
7561 s->current_picture_ptr= NULL;
7573 if(buf_index >= buf_size) break;
7575 for(i = 0; i < h->nal_length_size; i++)
7576 nalsize = (nalsize << 8) | buf[buf_index++];
7577 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7582 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7587 // start code prefix search
7588 for(; buf_index + 3 < buf_size; buf_index++){
7589 // This should always succeed in the first iteration.
7590 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7594 if(buf_index+3 >= buf_size) break;
7599 hx = h->thread_context[context_count];
7601 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7602 if (ptr==NULL || dst_length < 0){
7605 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7607 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7609 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7610 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7613 if (h->is_avc && (nalsize != consumed)){
7614 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7618 buf_index += consumed;
7620 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7621 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7626 switch(hx->nal_unit_type){
7628 if (h->nal_unit_type != NAL_IDR_SLICE) {
7629 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7632 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7634 init_get_bits(&hx->s.gb, ptr, bit_length);
7636 hx->inter_gb_ptr= &hx->s.gb;
7637 hx->s.data_partitioning = 0;
7639 if((err = decode_slice_header(hx, h)))
7642 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7643 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7644 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7645 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7646 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7647 && avctx->skip_frame < AVDISCARD_ALL)
7651 init_get_bits(&hx->s.gb, ptr, bit_length);
7653 hx->inter_gb_ptr= NULL;
7654 hx->s.data_partitioning = 1;
7656 err = decode_slice_header(hx, h);
7659 init_get_bits(&hx->intra_gb, ptr, bit_length);
7660 hx->intra_gb_ptr= &hx->intra_gb;
7663 init_get_bits(&hx->inter_gb, ptr, bit_length);
7664 hx->inter_gb_ptr= &hx->inter_gb;
7666 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7667 && s->context_initialized
7669 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7670 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7671 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7672 && avctx->skip_frame < AVDISCARD_ALL)
7676 init_get_bits(&s->gb, ptr, bit_length);
7680 init_get_bits(&s->gb, ptr, bit_length);
7681 decode_seq_parameter_set(h);
7683 if(s->flags& CODEC_FLAG_LOW_DELAY)
7686 if(avctx->has_b_frames < 2)
7687 avctx->has_b_frames= !s->low_delay;
7690 init_get_bits(&s->gb, ptr, bit_length);
7692 decode_picture_parameter_set(h, bit_length);
7696 case NAL_END_SEQUENCE:
7697 case NAL_END_STREAM:
7698 case NAL_FILLER_DATA:
7700 case NAL_AUXILIARY_SLICE:
7703 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7706 if(context_count == h->max_contexts) {
7707 execute_decode_slices(h, context_count);
7712 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7714 /* Slice could not be decoded in parallel mode, copy down
7715 * NAL unit stuff to context 0 and restart. Note that
7716 * rbsp_buffer is not transferred, but since we no longer
7717 * run in parallel mode this should not be an issue. */
7718 h->nal_unit_type = hx->nal_unit_type;
7719 h->nal_ref_idc = hx->nal_ref_idc;
7725 execute_decode_slices(h, context_count);
7730 * returns the number of bytes consumed for building the current frame
7732 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7733 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7734 if(pos+10>buf_size) pos=buf_size; // oops ;)
7739 static int decode_frame(AVCodecContext *avctx,
7740 void *data, int *data_size,
7741 const uint8_t *buf, int buf_size)
7743 H264Context *h = avctx->priv_data;
7744 MpegEncContext *s = &h->s;
7745 AVFrame *pict = data;
7748 s->flags= avctx->flags;
7749 s->flags2= avctx->flags2;
7751 /* end of stream, output what is still in the buffers */
7752 if (buf_size == 0) {
7756 //FIXME factorize this with the output code below
7757 out = h->delayed_pic[0];
7759 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7760 if(h->delayed_pic[i]->poc < out->poc){
7761 out = h->delayed_pic[i];
7765 for(i=out_idx; h->delayed_pic[i]; i++)
7766 h->delayed_pic[i] = h->delayed_pic[i+1];
7769 *data_size = sizeof(AVFrame);
7770 *pict= *(AVFrame*)out;
7776 if(h->is_avc && !h->got_avcC) {
7777 int i, cnt, nalsize;
7778 unsigned char *p = avctx->extradata;
7779 if(avctx->extradata_size < 7) {
7780 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7784 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7787 /* sps and pps in the avcC always have length coded with 2 bytes,
7788 so put a fake nal_length_size = 2 while parsing them */
7789 h->nal_length_size = 2;
7790 // Decode sps from avcC
7791 cnt = *(p+5) & 0x1f; // Number of sps
7793 for (i = 0; i < cnt; i++) {
7794 nalsize = AV_RB16(p) + 2;
7795 if(decode_nal_units(h, p, nalsize) < 0) {
7796 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7801 // Decode pps from avcC
7802 cnt = *(p++); // Number of pps
7803 for (i = 0; i < cnt; i++) {
7804 nalsize = AV_RB16(p) + 2;
7805 if(decode_nal_units(h, p, nalsize) != nalsize) {
7806 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7811 // Now store right nal length size, that will be use to parse all other nals
7812 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7813 // Do not reparse avcC
7817 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7818 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7822 buf_index=decode_nal_units(h, buf, buf_size);
7826 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7827 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7828 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7832 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7833 Picture *out = s->current_picture_ptr;
7834 Picture *cur = s->current_picture_ptr;
7835 int i, pics, cross_idr, out_of_order, out_idx;
7839 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7840 s->current_picture_ptr->pict_type= s->pict_type;
7843 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7844 h->prev_poc_msb= h->poc_msb;
7845 h->prev_poc_lsb= h->poc_lsb;
7847 h->prev_frame_num_offset= h->frame_num_offset;
7848 h->prev_frame_num= h->frame_num;
7851 * FIXME: Error handling code does not seem to support interlaced
7852 * when slices span multiple rows
7853 * The ff_er_add_slice calls don't work right for bottom
7854 * fields; they cause massive erroneous error concealing
7855 * Error marking covers both fields (top and bottom).
7856 * This causes a mismatched s->error_count
7857 * and a bad error table. Further, the error count goes to
7858 * INT_MAX when called for bottom field, because mb_y is
7859 * past end by one (callers fault) and resync_mb_y != 0
7860 * causes problems for the first MB line, too.
7867 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7868 /* Wait for second field. */
7872 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7873 /* Derive top_field_first from field pocs. */
7874 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7876 //FIXME do something with unavailable reference frames
7878 /* Sort B-frames into display order */
7880 if(h->sps.bitstream_restriction_flag
7881 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7882 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7886 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7887 && !h->sps.bitstream_restriction_flag){
7888 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7893 while(h->delayed_pic[pics]) pics++;
7895 assert(pics <= MAX_DELAYED_PIC_COUNT);
7897 h->delayed_pic[pics++] = cur;
7898 if(cur->reference == 0)
7899 cur->reference = DELAYED_PIC_REF;
7901 out = h->delayed_pic[0];
7903 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7904 if(h->delayed_pic[i]->poc < out->poc){
7905 out = h->delayed_pic[i];
7908 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7910 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7912 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7914 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7916 ((!cross_idr && out->poc > h->outputed_poc + 2)
7917 || cur->pict_type == FF_B_TYPE)))
7920 s->avctx->has_b_frames++;
7923 if(out_of_order || pics > s->avctx->has_b_frames){
7924 out->reference &= ~DELAYED_PIC_REF;
7925 for(i=out_idx; h->delayed_pic[i]; i++)
7926 h->delayed_pic[i] = h->delayed_pic[i+1];
7928 if(!out_of_order && pics > s->avctx->has_b_frames){
7929 *data_size = sizeof(AVFrame);
7931 h->outputed_poc = out->poc;
7932 *pict= *(AVFrame*)out;
7934 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7939 assert(pict->data[0] || !*data_size);
7940 ff_print_debug_info(s, pict);
7941 //printf("out %d\n", (int)pict->data[0]);
7944 /* Return the Picture timestamp as the frame number */
7945 /* we subtract 1 because it is added on utils.c */
7946 avctx->frame_number = s->picture_number - 1;
7948 return get_consumed_bytes(s, buf_index, buf_size);
7951 static inline void fill_mb_avail(H264Context *h){
7952 MpegEncContext * const s = &h->s;
7953 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7956 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7957 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7958 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7964 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7965 h->mb_avail[4]= 1; //FIXME move out
7966 h->mb_avail[5]= 0; //FIXME move out
7974 #define SIZE (COUNT*40)
7980 // int int_temp[10000];
7982 AVCodecContext avctx;
7984 dsputil_init(&dsp, &avctx);
7986 init_put_bits(&pb, temp, SIZE);
7987 printf("testing unsigned exp golomb\n");
7988 for(i=0; i<COUNT; i++){
7990 set_ue_golomb(&pb, i);
7991 STOP_TIMER("set_ue_golomb");
7993 flush_put_bits(&pb);
7995 init_get_bits(&gb, temp, 8*SIZE);
7996 for(i=0; i<COUNT; i++){
7999 s= show_bits(&gb, 24);
8002 j= get_ue_golomb(&gb);
8004 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8007 STOP_TIMER("get_ue_golomb");
8011 init_put_bits(&pb, temp, SIZE);
8012 printf("testing signed exp golomb\n");
8013 for(i=0; i<COUNT; i++){
8015 set_se_golomb(&pb, i - COUNT/2);
8016 STOP_TIMER("set_se_golomb");
8018 flush_put_bits(&pb);
8020 init_get_bits(&gb, temp, 8*SIZE);
8021 for(i=0; i<COUNT; i++){
8024 s= show_bits(&gb, 24);
8027 j= get_se_golomb(&gb);
8028 if(j != i - COUNT/2){
8029 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8032 STOP_TIMER("get_se_golomb");
8036 printf("testing 4x4 (I)DCT\n");
8039 uint8_t src[16], ref[16];
8040 uint64_t error= 0, max_error=0;
8042 for(i=0; i<COUNT; i++){
8044 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8045 for(j=0; j<16; j++){
8046 ref[j]= random()%255;
8047 src[j]= random()%255;
8050 h264_diff_dct_c(block, src, ref, 4);
8053 for(j=0; j<16; j++){
8054 // printf("%d ", block[j]);
8055 block[j]= block[j]*4;
8056 if(j&1) block[j]= (block[j]*4 + 2)/5;
8057 if(j&4) block[j]= (block[j]*4 + 2)/5;
8061 s->dsp.h264_idct_add(ref, block, 4);
8062 /* for(j=0; j<16; j++){
8063 printf("%d ", ref[j]);
8067 for(j=0; j<16; j++){
8068 int diff= FFABS(src[j] - ref[j]);
8071 max_error= FFMAX(max_error, diff);
8074 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8075 printf("testing quantizer\n");
8076 for(qp=0; qp<52; qp++){
8078 src1_block[i]= src2_block[i]= random()%255;
8081 printf("Testing NAL layer\n");
8083 uint8_t bitstream[COUNT];
8084 uint8_t nal[COUNT*2];
8086 memset(&h, 0, sizeof(H264Context));
8088 for(i=0; i<COUNT; i++){
8096 for(j=0; j<COUNT; j++){
8097 bitstream[j]= (random() % 255) + 1;
8100 for(j=0; j<zeros; j++){
8101 int pos= random() % COUNT;
8102 while(bitstream[pos] == 0){
8111 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8113 printf("encoding failed\n");
8117 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8121 if(out_length != COUNT){
8122 printf("incorrect length %d %d\n", out_length, COUNT);
8126 if(consumed != nal_length){
8127 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8131 if(memcmp(bitstream, out, COUNT)){
8132 printf("mismatch\n");
8138 printf("Testing RBSP\n");
8146 static av_cold int decode_end(AVCodecContext *avctx)
8148 H264Context *h = avctx->priv_data;
8149 MpegEncContext *s = &h->s;
8151 av_freep(&h->rbsp_buffer[0]);
8152 av_freep(&h->rbsp_buffer[1]);
8153 free_tables(h); //FIXME cleanup init stuff perhaps
8156 // memset(h, 0, sizeof(H264Context));
8162 AVCodec h264_decoder = {
8166 sizeof(H264Context),
8171 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8173 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),