2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type_nos == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
573 if(!(h->top_samples_available&0x8000)){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if(!(h->left_samples_available&0x8000)){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
667 // store all luma nnzs, for deblocking
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non-zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
785 }else if(top_ref==ref){
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
891 pred_motion(h, 0, 4, 0, 0, mx, my);
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
984 *mb_type |= MB_TYPE_DIRECT2;
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1004 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1009 if(ref[0] < 0 && ref[1] < 0){
1010 ref[0] = ref[1] = 0;
1011 mv[0][0] = mv[0][1] =
1012 mv[1][0] = mv[1][1] = 0;
1014 for(list=0; list<2; list++){
1016 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1018 mv[list][0] = mv[list][1] = 0;
1024 *mb_type &= ~MB_TYPE_L1;
1025 sub_mb_type &= ~MB_TYPE_L1;
1026 }else if(ref[0] < 0){
1028 *mb_type &= ~MB_TYPE_L0;
1029 sub_mb_type &= ~MB_TYPE_L0;
1032 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1033 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1034 int mb_types_col[2];
1035 int b8_stride = h->b8_stride;
1036 int b4_stride = h->b_stride;
1038 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1040 if(IS_INTERLACED(*mb_type)){
1041 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1042 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1044 l1ref0 -= 2*b8_stride;
1045 l1ref1 -= 2*b8_stride;
1046 l1mv0 -= 4*b4_stride;
1047 l1mv1 -= 4*b4_stride;
1052 int cur_poc = s->current_picture_ptr->poc;
1053 int *col_poc = h->ref_list[1]->field_poc;
1054 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1055 int dy = 2*col_parity - (s->mb_y&1);
1057 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1058 l1ref0 += dy*b8_stride;
1059 l1ref1 += dy*b8_stride;
1060 l1mv0 += 2*dy*b4_stride;
1061 l1mv1 += 2*dy*b4_stride;
1065 for(i8=0; i8<4; i8++){
1068 int xy8 = x8+y8*b8_stride;
1069 int xy4 = 3*x8+y8*b4_stride;
1072 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1074 h->sub_mb_type[i8] = sub_mb_type;
1076 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1077 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1078 if(!IS_INTRA(mb_types_col[y8])
1079 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1080 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1082 a= pack16to32(mv[0][0],mv[0][1]);
1084 b= pack16to32(mv[1][0],mv[1][1]);
1086 a= pack16to32(mv[0][0],mv[0][1]);
1087 b= pack16to32(mv[1][0],mv[1][1]);
1089 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1090 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1092 }else if(IS_16X16(*mb_type)){
1095 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1096 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1097 if(!IS_INTRA(mb_type_col)
1098 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1099 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1100 && (h->x264_build>33 || !h->x264_build)))){
1102 a= pack16to32(mv[0][0],mv[0][1]);
1104 b= pack16to32(mv[1][0],mv[1][1]);
1106 a= pack16to32(mv[0][0],mv[0][1]);
1107 b= pack16to32(mv[1][0],mv[1][1]);
1109 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1110 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1112 for(i8=0; i8<4; i8++){
1113 const int x8 = i8&1;
1114 const int y8 = i8>>1;
1116 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1118 h->sub_mb_type[i8] = sub_mb_type;
1120 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1121 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1122 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1123 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1126 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1127 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1128 && (h->x264_build>33 || !h->x264_build)))){
1129 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1130 if(IS_SUB_8X8(sub_mb_type)){
1131 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1132 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1134 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1139 for(i4=0; i4<4; i4++){
1140 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1141 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1143 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1145 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1151 }else{ /* direct temporal mv pred */
1152 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1153 const int *dist_scale_factor = h->dist_scale_factor;
1156 if(IS_INTERLACED(*mb_type)){
1157 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1158 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1159 dist_scale_factor = h->dist_scale_factor_field;
1161 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1162 /* FIXME assumes direct_8x8_inference == 1 */
1163 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1164 int mb_types_col[2];
1167 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1168 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1169 | (*mb_type & MB_TYPE_INTERLACED);
1170 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1172 if(IS_INTERLACED(*mb_type)){
1173 /* frame to field scaling */
1174 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1175 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1177 l1ref0 -= 2*h->b8_stride;
1178 l1ref1 -= 2*h->b8_stride;
1179 l1mv0 -= 4*h->b_stride;
1180 l1mv1 -= 4*h->b_stride;
1184 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1185 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1187 *mb_type |= MB_TYPE_16x8;
1189 *mb_type |= MB_TYPE_8x8;
1191 /* field to frame scaling */
1192 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1193 * but in MBAFF, top and bottom POC are equal */
1194 int dy = (s->mb_y&1) ? 1 : 2;
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1197 l1ref0 += dy*h->b8_stride;
1198 l1ref1 += dy*h->b8_stride;
1199 l1mv0 += 2*dy*h->b_stride;
1200 l1mv1 += 2*dy*h->b_stride;
1203 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1205 *mb_type |= MB_TYPE_16x16;
1207 *mb_type |= MB_TYPE_8x8;
1210 for(i8=0; i8<4; i8++){
1211 const int x8 = i8&1;
1212 const int y8 = i8>>1;
1214 const int16_t (*l1mv)[2]= l1mv0;
1216 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 h->sub_mb_type[i8] = sub_mb_type;
1220 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1221 if(IS_INTRA(mb_types_col[y8])){
1222 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1223 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1224 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1230 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1232 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1235 scale = dist_scale_factor[ref0];
1236 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1239 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1240 int my_col = (mv_col[1]<<y_shift)/2;
1241 int mx = (scale * mv_col[0] + 128) >> 8;
1242 int my = (scale * my_col + 128) >> 8;
1243 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1244 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col)){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1261 : map_col_to_list0[1][l1ref1[0]];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col)){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*h->b8_stride];
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1591 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1616 // last_non_zero = i;
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1634 // last_non_zero = i;
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1650 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1654 level= (bias + level)>>QUANT_SHIFT;
1657 level= (bias - level)>>QUANT_SHIFT;
1666 return last_non_zero;
1669 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1676 const int luma_xy= (mx&3) + ((my&3)<<2);
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
1684 const int pic_width = 16*s->mb_width;
1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
1687 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1710 // chroma offset when predicting from a field of opposite parity
1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1719 src_cb= s->edge_emu_buffer;
1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cr= s->edge_emu_buffer;
1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1743 x_offset += 8*s->mb_x;
1744 y_offset += 8*(s->mb_y >> MB_FIELD);
1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1753 chroma_op= chroma_avg;
1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1764 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1776 x_offset += 8*s->mb_x;
1777 y_offset += 8*(s->mb_y >> MB_FIELD);
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1831 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1849 static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1865 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1869 MpegEncContext * const s = &h->s;
1870 const int mb_xy= h->mb_xy;
1871 const int mb_type= s->current_picture.mb_type[mb_xy];
1873 assert(IS_INTER(mb_type));
1875 prefetch_motion(h, 0);
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1880 &weight_op[0], &weight_avg[0],
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1885 &weight_op[1], &weight_avg[1],
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1894 &weight_op[2], &weight_avg[2],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1903 assert(IS_8X8(mb_type));
1906 const int sub_mb_type= h->sub_mb_type[i];
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1914 &weight_op[3], &weight_avg[3],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1919 &weight_op[4], &weight_avg[4],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1928 &weight_op[5], &weight_avg[5],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 assert(IS_SUB_4X4(sub_mb_type));
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[6], &weight_avg[6],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 prefetch_motion(h, 1);
1952 static av_cold void decode_init_vlc(void){
1953 static int done = 0;
1959 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1960 &chroma_dc_coeff_token_len [0], 1, 1,
1961 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1964 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1965 &coeff_token_len [i][0], 1, 1,
1966 &coeff_token_bits[i][0], 1, 1, 1);
1970 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1971 &chroma_dc_total_zeros_len [i][0], 1, 1,
1972 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1974 for(i=0; i<15; i++){
1975 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1976 &total_zeros_len [i][0], 1, 1,
1977 &total_zeros_bits[i][0], 1, 1, 1);
1981 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1982 &run_len [i][0], 1, 1,
1983 &run_bits[i][0], 1, 1, 1);
1985 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1986 &run_len [6][0], 1, 1,
1987 &run_bits[6][0], 1, 1, 1);
1991 static void free_tables(H264Context *h){
1994 av_freep(&h->intra4x4_pred_mode);
1995 av_freep(&h->chroma_pred_mode_table);
1996 av_freep(&h->cbp_table);
1997 av_freep(&h->mvd_table[0]);
1998 av_freep(&h->mvd_table[1]);
1999 av_freep(&h->direct_table);
2000 av_freep(&h->non_zero_count);
2001 av_freep(&h->slice_table_base);
2002 h->slice_table= NULL;
2004 av_freep(&h->mb2b_xy);
2005 av_freep(&h->mb2b8_xy);
2007 for(i = 0; i < MAX_SPS_COUNT; i++)
2008 av_freep(h->sps_buffers + i);
2010 for(i = 0; i < MAX_PPS_COUNT; i++)
2011 av_freep(h->pps_buffers + i);
2013 for(i = 0; i < h->s.avctx->thread_count; i++) {
2014 hx = h->thread_context[i];
2016 av_freep(&hx->top_borders[1]);
2017 av_freep(&hx->top_borders[0]);
2018 av_freep(&hx->s.obmc_scratchpad);
2022 static void init_dequant8_coeff_table(H264Context *h){
2024 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2025 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2026 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2028 for(i=0; i<2; i++ ){
2029 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2030 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2034 for(q=0; q<52; q++){
2035 int shift = ff_div6[q];
2036 int idx = ff_rem6[q];
2038 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2039 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2040 h->pps.scaling_matrix8[i][x]) << shift;
2045 static void init_dequant4_coeff_table(H264Context *h){
2047 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2048 for(i=0; i<6; i++ ){
2049 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2051 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2052 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2059 for(q=0; q<52; q++){
2060 int shift = ff_div6[q] + 2;
2061 int idx = ff_rem6[q];
2063 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2064 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2065 h->pps.scaling_matrix4[i][x]) << shift;
2070 static void init_dequant_tables(H264Context *h){
2072 init_dequant4_coeff_table(h);
2073 if(h->pps.transform_8x8_mode)
2074 init_dequant8_coeff_table(h);
2075 if(h->sps.transform_bypass){
2078 h->dequant4_coeff[i][0][x] = 1<<6;
2079 if(h->pps.transform_8x8_mode)
2082 h->dequant8_coeff[i][0][x] = 1<<6;
2089 * needs width/height
2091 static int alloc_tables(H264Context *h){
2092 MpegEncContext * const s = &h->s;
2093 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2096 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2098 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2102 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2104 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2107 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2108 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2110 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2111 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2112 for(y=0; y<s->mb_height; y++){
2113 for(x=0; x<s->mb_width; x++){
2114 const int mb_xy= x + y*s->mb_stride;
2115 const int b_xy = 4*x + 4*y*h->b_stride;
2116 const int b8_xy= 2*x + 2*y*h->b8_stride;
2118 h->mb2b_xy [mb_xy]= b_xy;
2119 h->mb2b8_xy[mb_xy]= b8_xy;
2123 s->obmc_scratchpad = NULL;
2125 if(!h->dequant4_coeff[0])
2126 init_dequant_tables(h);
2135 * Mimic alloc_tables(), but for every context thread.
2137 static void clone_tables(H264Context *dst, H264Context *src){
2138 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2139 dst->non_zero_count = src->non_zero_count;
2140 dst->slice_table = src->slice_table;
2141 dst->cbp_table = src->cbp_table;
2142 dst->mb2b_xy = src->mb2b_xy;
2143 dst->mb2b8_xy = src->mb2b8_xy;
2144 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2145 dst->mvd_table[0] = src->mvd_table[0];
2146 dst->mvd_table[1] = src->mvd_table[1];
2147 dst->direct_table = src->direct_table;
2149 dst->s.obmc_scratchpad = NULL;
2150 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2155 * Allocate buffers which are not shared amongst multiple threads.
2157 static int context_init(H264Context *h){
2158 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2159 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 return -1; // free_tables will clean up for us
2166 static av_cold void common_init(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2169 s->width = s->avctx->width;
2170 s->height = s->avctx->height;
2171 s->codec_id= s->avctx->codec->id;
2173 ff_h264_pred_init(&h->hpc, s->codec_id);
2175 h->dequant_coeff_pps= -1;
2176 s->unrestricted_mv=1;
2177 s->decode=1; //FIXME
2179 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2180 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2183 static av_cold int decode_init(AVCodecContext *avctx){
2184 H264Context *h= avctx->priv_data;
2185 MpegEncContext * const s = &h->s;
2187 MPV_decode_defaults(s);
2192 s->out_format = FMT_H264;
2193 s->workaround_bugs= avctx->workaround_bugs;
2196 // s->decode_mb= ff_h263_decode_mb;
2197 s->quarter_sample = 1;
2200 if(avctx->codec_id == CODEC_ID_SVQ3)
2201 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2203 avctx->pix_fmt= PIX_FMT_YUV420P;
2207 if(avctx->extradata_size > 0 && avctx->extradata &&
2208 *(char *)avctx->extradata == 1){
2215 h->thread_context[0] = h;
2219 static int frame_start(H264Context *h){
2220 MpegEncContext * const s = &h->s;
2223 if(MPV_frame_start(s, s->avctx) < 0)
2225 ff_er_frame_start(s);
2227 * MPV_frame_start uses pict_type to derive key_frame.
2228 * This is incorrect for H.264; IDR markings must be used.
2229 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2230 * See decode_nal_units().
2232 s->current_picture_ptr->key_frame= 0;
2234 assert(s->linesize && s->uvlinesize);
2236 for(i=0; i<16; i++){
2237 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[16+i]=
2242 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2243 h->block_offset[24+16+i]=
2244 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 /* can't be in alloc_tables because linesize isn't known there.
2248 * FIXME: redo bipred weight to not require extra buffer? */
2249 for(i = 0; i < s->avctx->thread_count; i++)
2250 if(!h->thread_context[i]->s.obmc_scratchpad)
2251 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2253 /* some macroblocks will be accessed before they're available */
2254 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2255 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2257 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2259 // We mark the current picture as non-reference after allocating it, so
2260 // that if we break out due to an error it can be released automatically
2261 // in the next MPV_frame_start().
2262 // SVQ3 as well as most other codecs have only last/next/current and thus
2263 // get released even with set reference, besides SVQ3 and others do not
2264 // mark frames as reference later "naturally".
2265 if(s->codec_id != CODEC_ID_SVQ3)
2266 s->current_picture_ptr->reference= 0;
2270 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2271 MpegEncContext * const s = &h->s;
2275 src_cb -= uvlinesize;
2276 src_cr -= uvlinesize;
2278 // There are two lines saved, the line above the the top macroblock of a pair,
2279 // and the line above the bottom macroblock
2280 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2281 for(i=1; i<17; i++){
2282 h->left_border[i]= src_y[15+i* linesize];
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2288 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2289 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2290 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2292 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2293 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2300 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2301 MpegEncContext * const s = &h->s;
2308 if(h->deblocking_filter == 2) {
2310 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2311 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2313 deblock_left = (s->mb_x > 0);
2314 deblock_top = (s->mb_y > 0);
2317 src_y -= linesize + 1;
2318 src_cb -= uvlinesize + 1;
2319 src_cr -= uvlinesize + 1;
2321 #define XCHG(a,b,t,xchg)\
2328 for(i = !deblock_top; i<17; i++){
2329 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2334 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2335 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2336 if(s->mb_x+1 < s->mb_width){
2337 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2341 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2343 for(i = !deblock_top; i<9; i++){
2344 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2345 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2350 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2355 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2356 MpegEncContext * const s = &h->s;
2359 src_y -= 2 * linesize;
2360 src_cb -= 2 * uvlinesize;
2361 src_cr -= 2 * uvlinesize;
2363 // There are two lines saved, the line above the the top macroblock of a pair,
2364 // and the line above the bottom macroblock
2365 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2366 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2367 for(i=2; i<34; i++){
2368 h->left_border[i]= src_y[15+i* linesize];
2371 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2372 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2373 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2374 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2376 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2377 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2378 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2379 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2380 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2381 for(i=2; i<18; i++){
2382 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2383 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2385 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2386 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2387 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2388 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2392 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2393 MpegEncContext * const s = &h->s;
2396 int deblock_left = (s->mb_x > 0);
2397 int deblock_top = (s->mb_y > 1);
2399 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2401 src_y -= 2 * linesize + 1;
2402 src_cb -= 2 * uvlinesize + 1;
2403 src_cr -= 2 * uvlinesize + 1;
2405 #define XCHG(a,b,t,xchg)\
2412 for(i = (!deblock_top)<<1; i<34; i++){
2413 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2418 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2419 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2420 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2421 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2422 if(s->mb_x+1 < s->mb_width){
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2428 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2430 for(i = (!deblock_top) << 1; i<18; i++){
2431 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2432 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2439 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2444 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2445 MpegEncContext * const s = &h->s;
2446 const int mb_x= s->mb_x;
2447 const int mb_y= s->mb_y;
2448 const int mb_xy= h->mb_xy;
2449 const int mb_type= s->current_picture.mb_type[mb_xy];
2450 uint8_t *dest_y, *dest_cb, *dest_cr;
2451 int linesize, uvlinesize /*dct_offset*/;
2453 int *block_offset = &h->block_offset[0];
2454 const unsigned int bottom = mb_y & 1;
2455 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2456 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2457 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2459 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2460 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2461 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2463 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2464 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2466 if (!simple && MB_FIELD) {
2467 linesize = h->mb_linesize = s->linesize * 2;
2468 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2469 block_offset = &h->block_offset[24];
2470 if(mb_y&1){ //FIXME move out of this function?
2471 dest_y -= s->linesize*15;
2472 dest_cb-= s->uvlinesize*7;
2473 dest_cr-= s->uvlinesize*7;
2477 for(list=0; list<h->list_count; list++){
2478 if(!USES_LIST(mb_type, list))
2480 if(IS_16X16(mb_type)){
2481 int8_t *ref = &h->ref_cache[list][scan8[0]];
2482 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2484 for(i=0; i<16; i+=4){
2485 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2486 int ref = h->ref_cache[list][scan8[i]];
2488 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2494 linesize = h->mb_linesize = s->linesize;
2495 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2496 // dct_offset = s->linesize * 16;
2499 if(transform_bypass){
2501 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2502 }else if(IS_8x8DCT(mb_type)){
2503 idct_dc_add = s->dsp.h264_idct8_dc_add;
2504 idct_add = s->dsp.h264_idct8_add;
2506 idct_dc_add = s->dsp.h264_idct_dc_add;
2507 idct_add = s->dsp.h264_idct_add;
2510 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2511 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2512 int mbt_y = mb_y&~1;
2513 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2514 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2515 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2516 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2519 if (!simple && IS_INTRA_PCM(mb_type)) {
2522 // The pixels are stored in h->mb array in the same order as levels,
2523 // copy them in output in the correct order.
2524 for(i=0; i<16; i++) {
2525 for (y=0; y<4; y++) {
2526 for (x=0; x<4; x++) {
2527 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2531 for(i=16; i<16+4; i++) {
2532 for (y=0; y<4; y++) {
2533 for (x=0; x<4; x++) {
2534 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2538 for(i=20; i<20+4; i++) {
2539 for (y=0; y<4; y++) {
2540 for (x=0; x<4; x++) {
2541 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2546 if(IS_INTRA(mb_type)){
2547 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2548 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2550 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2551 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2552 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2555 if(IS_INTRA4x4(mb_type)){
2556 if(simple || !s->encoding){
2557 if(IS_8x8DCT(mb_type)){
2558 for(i=0; i<16; i+=4){
2559 uint8_t * const ptr= dest_y + block_offset[i];
2560 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2561 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2562 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2563 (h->topright_samples_available<<i)&0x4000, linesize);
2565 if(nnz == 1 && h->mb[i*16])
2566 idct_dc_add(ptr, h->mb + i*16, linesize);
2568 idct_add(ptr, h->mb + i*16, linesize);
2572 for(i=0; i<16; i++){
2573 uint8_t * const ptr= dest_y + block_offset[i];
2575 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2579 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2580 assert(mb_y || linesize <= block_offset[i]);
2581 if(!topright_avail){
2582 tr= ptr[3 - linesize]*0x01010101;
2583 topright= (uint8_t*) &tr;
2585 topright= ptr + 4 - linesize;
2589 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2590 nnz = h->non_zero_count_cache[ scan8[i] ];
2593 if(nnz == 1 && h->mb[i*16])
2594 idct_dc_add(ptr, h->mb + i*16, linesize);
2596 idct_add(ptr, h->mb + i*16, linesize);
2598 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2603 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2605 if(!transform_bypass)
2606 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2608 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2610 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2611 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2613 hl_motion(h, dest_y, dest_cb, dest_cr,
2614 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2615 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2616 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2620 if(!IS_INTRA4x4(mb_type)){
2622 if(IS_INTRA16x16(mb_type)){
2623 for(i=0; i<16; i++){
2624 if(h->non_zero_count_cache[ scan8[i] ])
2625 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2626 else if(h->mb[i*16])
2627 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2630 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2631 for(i=0; i<16; i+=di){
2632 int nnz = h->non_zero_count_cache[ scan8[i] ];
2634 if(nnz==1 && h->mb[i*16])
2635 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2637 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2642 for(i=0; i<16; i++){
2643 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2644 uint8_t * const ptr= dest_y + block_offset[i];
2645 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2651 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2652 uint8_t *dest[2] = {dest_cb, dest_cr};
2653 if(transform_bypass){
2654 idct_add = idct_dc_add = s->dsp.add_pixels4;
2656 idct_add = s->dsp.h264_idct_add;
2657 idct_dc_add = s->dsp.h264_idct_dc_add;
2658 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2659 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2662 for(i=16; i<16+8; i++){
2663 if(h->non_zero_count_cache[ scan8[i] ])
2664 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2665 else if(h->mb[i*16])
2666 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2669 for(i=16; i<16+8; i++){
2670 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2671 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2672 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2678 if(h->deblocking_filter) {
2679 if (!simple && FRAME_MBAFF) {
2680 //FIXME try deblocking one mb at a time?
2681 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2682 const int mb_y = s->mb_y - 1;
2683 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2684 const int mb_xy= mb_x + mb_y*s->mb_stride;
2685 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2686 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2687 if (!bottom) return;
2688 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2689 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2690 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2692 if(IS_INTRA(mb_type_top | mb_type_bottom))
2693 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2695 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2698 s->mb_y--; h->mb_xy -= s->mb_stride;
2699 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2700 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2701 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2702 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2703 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2705 s->mb_y++; h->mb_xy += s->mb_stride;
2706 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2707 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2708 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2709 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2710 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2712 tprintf(h->s.avctx, "call filter_mb\n");
2713 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2714 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2715 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2716 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2717 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2723 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2725 static void hl_decode_mb_simple(H264Context *h){
2726 hl_decode_mb_internal(h, 1);
2730 * Process a macroblock; this handles edge cases, such as interlacing.
2732 static void av_noinline hl_decode_mb_complex(H264Context *h){
2733 hl_decode_mb_internal(h, 0);
2736 static void hl_decode_mb(H264Context *h){
2737 MpegEncContext * const s = &h->s;
2738 const int mb_xy= h->mb_xy;
2739 const int mb_type= s->current_picture.mb_type[mb_xy];
2740 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2741 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2743 if(ENABLE_H264_ENCODER && !s->decode)
2747 hl_decode_mb_complex(h);
2748 else hl_decode_mb_simple(h);
2751 static void pic_as_field(Picture *pic, const int parity){
2753 for (i = 0; i < 4; ++i) {
2754 if (parity == PICT_BOTTOM_FIELD)
2755 pic->data[i] += pic->linesize[i];
2756 pic->reference = parity;
2757 pic->linesize[i] *= 2;
2761 static int split_field_copy(Picture *dest, Picture *src,
2762 int parity, int id_add){
2763 int match = !!(src->reference & parity);
2767 pic_as_field(dest, parity);
2769 dest->pic_id += id_add;
2776 * Split one reference list into field parts, interleaving by parity
2777 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2778 * set to look at the actual start of data for that field.
2780 * @param dest output list
2781 * @param dest_len maximum number of fields to put in dest
2782 * @param src the source reference list containing fields and/or field pairs
2783 * (aka short_ref/long_ref, or
2784 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2785 * @param src_len number of Picture's in source (pairs and unmatched fields)
2786 * @param parity the parity of the picture being decoded/needing
2787 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2788 * @return number of fields placed in dest
2790 static int split_field_half_ref_list(Picture *dest, int dest_len,
2791 Picture *src, int src_len, int parity){
2792 int same_parity = 1;
2798 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2799 if (same_parity && same_i < src_len) {
2800 field_output = split_field_copy(dest + out_i, src + same_i,
2802 same_parity = !field_output;
2805 } else if (opp_i < src_len) {
2806 field_output = split_field_copy(dest + out_i, src + opp_i,
2807 PICT_FRAME - parity, 0);
2808 same_parity = field_output;
2820 * Split the reference frame list into a reference field list.
2821 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2822 * The input list contains both reference field pairs and
2823 * unmatched reference fields; it is ordered as spec describes
2824 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2825 * unmatched field pairs are also present. Conceptually this is equivalent
2826 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2828 * @param dest output reference list where ordered fields are to be placed
2829 * @param dest_len max number of fields to place at dest
2830 * @param src source reference list, as described above
2831 * @param src_len number of pictures (pairs and unmatched fields) in src
2832 * @param parity parity of field being currently decoded
2833 * (one of PICT_{TOP,BOTTOM}_FIELD)
2834 * @param long_i index into src array that holds first long reference picture,
2835 * or src_len if no long refs present.
2837 static int split_field_ref_list(Picture *dest, int dest_len,
2838 Picture *src, int src_len,
2839 int parity, int long_i){
2841 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2845 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2846 src_len - long_i, parity);
2851 * fills the default_ref_list.
2853 static int fill_default_ref_list(H264Context *h){
2854 MpegEncContext * const s = &h->s;
2856 int smallest_poc_greater_than_current = -1;
2858 Picture sorted_short_ref[32];
2859 Picture field_entry_list[2][32];
2860 Picture *frame_list[2];
2862 if (FIELD_PICTURE) {
2863 structure_sel = PICT_FRAME;
2864 frame_list[0] = field_entry_list[0];
2865 frame_list[1] = field_entry_list[1];
2868 frame_list[0] = h->default_ref_list[0];
2869 frame_list[1] = h->default_ref_list[1];
2872 if(h->slice_type_nos==FF_B_TYPE){
2879 /* sort frame according to POC in B slice */
2880 for(out_i=0; out_i<h->short_ref_count; out_i++){
2882 int best_poc=INT_MAX;
2884 for(i=0; i<h->short_ref_count; i++){
2885 const int poc= h->short_ref[i]->poc;
2886 if(poc > limit && poc < best_poc){
2892 assert(best_i != INT_MIN);
2895 sorted_short_ref[out_i]= *h->short_ref[best_i];
2896 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2897 if (-1 == smallest_poc_greater_than_current) {
2898 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2899 smallest_poc_greater_than_current = out_i;
2904 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2906 // find the largest POC
2907 for(list=0; list<2; list++){
2910 int step= list ? -1 : 1;
2912 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2914 while(j<0 || j>= h->short_ref_count){
2915 if(j != -99 && step == (list ? -1 : 1))
2918 j= smallest_poc_greater_than_current + (step>>1);
2920 sel = sorted_short_ref[j].reference | structure_sel;
2921 if(sel != PICT_FRAME) continue;
2922 frame_list[list][index ]= sorted_short_ref[j];
2923 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2925 short_len[list] = index;
2927 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2929 if(h->long_ref[i] == NULL) continue;
2930 sel = h->long_ref[i]->reference | structure_sel;
2931 if(sel != PICT_FRAME) continue;
2933 frame_list[ list ][index ]= *h->long_ref[i];
2934 frame_list[ list ][index++].pic_id= i;
2939 for(list=0; list<2; list++){
2941 len[list] = split_field_ref_list(h->default_ref_list[list],
2945 s->picture_structure,
2948 // swap the two first elements of L1 when L0 and L1 are identical
2949 if(list && len[0] > 1 && len[0] == len[1])
2950 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2952 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2956 if(len[list] < h->ref_count[ list ])
2957 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2964 for(i=0; i<h->short_ref_count; i++){
2966 sel = h->short_ref[i]->reference | structure_sel;
2967 if(sel != PICT_FRAME) continue;
2968 frame_list[0][index ]= *h->short_ref[i];
2969 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2972 for(i = 0; i < 16; i++){
2974 if(h->long_ref[i] == NULL) continue;
2975 sel = h->long_ref[i]->reference | structure_sel;
2976 if(sel != PICT_FRAME) continue;
2977 frame_list[0][index ]= *h->long_ref[i];
2978 frame_list[0][index++].pic_id= i;
2982 index = split_field_ref_list(h->default_ref_list[0],
2983 h->ref_count[0], frame_list[0],
2984 index, s->picture_structure,
2987 if(index < h->ref_count[0])
2988 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2991 for (i=0; i<h->ref_count[0]; i++) {
2992 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2994 if(h->slice_type_nos==FF_B_TYPE){
2995 for (i=0; i<h->ref_count[1]; i++) {
2996 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
3003 static void print_short_term(H264Context *h);
3004 static void print_long_term(H264Context *h);
3007 * Extract structure information about the picture described by pic_num in
3008 * the current decoding context (frame or field). Note that pic_num is
3009 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3010 * @param pic_num picture number for which to extract structure information
3011 * @param structure one of PICT_XXX describing structure of picture
3013 * @return frame number (short term) or long term index of picture
3014 * described by pic_num
3016 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3017 MpegEncContext * const s = &h->s;
3019 *structure = s->picture_structure;
3022 /* opposite field */
3023 *structure ^= PICT_FRAME;
3030 static int decode_ref_pic_list_reordering(H264Context *h){
3031 MpegEncContext * const s = &h->s;
3032 int list, index, pic_structure;
3034 print_short_term(h);
3036 if(h->slice_type_nos==FF_I_TYPE) return 0; //FIXME move before function
3038 for(list=0; list<h->list_count; list++){
3039 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3041 if(get_bits1(&s->gb)){
3042 int pred= h->curr_pic_num;
3044 for(index=0; ; index++){
3045 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3046 unsigned int pic_id;
3048 Picture *ref = NULL;
3050 if(reordering_of_pic_nums_idc==3)
3053 if(index >= h->ref_count[list]){
3054 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3058 if(reordering_of_pic_nums_idc<3){
3059 if(reordering_of_pic_nums_idc<2){
3060 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3063 if(abs_diff_pic_num > h->max_pic_num){
3064 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3068 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3069 else pred+= abs_diff_pic_num;
3070 pred &= h->max_pic_num - 1;
3072 frame_num = pic_num_extract(h, pred, &pic_structure);
3074 for(i= h->short_ref_count-1; i>=0; i--){
3075 ref = h->short_ref[i];
3076 assert(ref->reference);
3077 assert(!ref->long_ref);
3078 if(ref->data[0] != NULL &&
3079 ref->frame_num == frame_num &&
3080 (ref->reference & pic_structure) &&
3081 ref->long_ref == 0) // ignore non-existing pictures by testing data[0] pointer
3088 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3090 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3093 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3096 ref = h->long_ref[long_idx];
3097 assert(!(ref && !ref->reference));
3098 if(ref && (ref->reference & pic_structure)){
3099 ref->pic_id= pic_id;
3100 assert(ref->long_ref);
3108 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3109 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3111 for(i=index; i+1<h->ref_count[list]; i++){
3112 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3115 for(; i > index; i--){
3116 h->ref_list[list][i]= h->ref_list[list][i-1];
3118 h->ref_list[list][index]= *ref;
3120 pic_as_field(&h->ref_list[list][index], pic_structure);
3124 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3130 for(list=0; list<h->list_count; list++){
3131 for(index= 0; index < h->ref_count[list]; index++){
3132 if(!h->ref_list[list][index].data[0])
3133 h->ref_list[list][index]= s->current_picture;
3137 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3138 direct_dist_scale_factor(h);
3139 direct_ref_list_init(h);
3143 static void fill_mbaff_ref_list(H264Context *h){
3145 for(list=0; list<2; list++){ //FIXME try list_count
3146 for(i=0; i<h->ref_count[list]; i++){
3147 Picture *frame = &h->ref_list[list][i];
3148 Picture *field = &h->ref_list[list][16+2*i];
3151 field[0].linesize[j] <<= 1;
3152 field[0].reference = PICT_TOP_FIELD;
3153 field[1] = field[0];
3155 field[1].data[j] += frame->linesize[j];
3156 field[1].reference = PICT_BOTTOM_FIELD;
3158 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3159 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3161 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3162 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3166 for(j=0; j<h->ref_count[1]; j++){
3167 for(i=0; i<h->ref_count[0]; i++)
3168 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3169 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3170 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3174 static int pred_weight_table(H264Context *h){
3175 MpegEncContext * const s = &h->s;
3177 int luma_def, chroma_def;
3180 h->use_weight_chroma= 0;
3181 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3182 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3183 luma_def = 1<<h->luma_log2_weight_denom;
3184 chroma_def = 1<<h->chroma_log2_weight_denom;
3186 for(list=0; list<2; list++){
3187 for(i=0; i<h->ref_count[list]; i++){
3188 int luma_weight_flag, chroma_weight_flag;
3190 luma_weight_flag= get_bits1(&s->gb);
3191 if(luma_weight_flag){
3192 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3193 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3194 if( h->luma_weight[list][i] != luma_def
3195 || h->luma_offset[list][i] != 0)
3198 h->luma_weight[list][i]= luma_def;
3199 h->luma_offset[list][i]= 0;
3202 chroma_weight_flag= get_bits1(&s->gb);
3203 if(chroma_weight_flag){
3206 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3207 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3208 if( h->chroma_weight[list][i][j] != chroma_def
3209 || h->chroma_offset[list][i][j] != 0)
3210 h->use_weight_chroma= 1;
3215 h->chroma_weight[list][i][j]= chroma_def;
3216 h->chroma_offset[list][i][j]= 0;
3220 if(h->slice_type_nos != FF_B_TYPE) break;
3222 h->use_weight= h->use_weight || h->use_weight_chroma;
3226 static void implicit_weight_table(H264Context *h){
3227 MpegEncContext * const s = &h->s;
3229 int cur_poc = s->current_picture_ptr->poc;
3231 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3232 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3234 h->use_weight_chroma= 0;
3239 h->use_weight_chroma= 2;
3240 h->luma_log2_weight_denom= 5;
3241 h->chroma_log2_weight_denom= 5;
3243 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3244 int poc0 = h->ref_list[0][ref0].poc;
3245 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3246 int poc1 = h->ref_list[1][ref1].poc;
3247 int td = av_clip(poc1 - poc0, -128, 127);
3249 int tb = av_clip(cur_poc - poc0, -128, 127);
3250 int tx = (16384 + (FFABS(td) >> 1)) / td;
3251 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3252 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3253 h->implicit_weight[ref0][ref1] = 32;
3255 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3257 h->implicit_weight[ref0][ref1] = 32;
3263 * Mark a picture as no longer needed for reference. The refmask
3264 * argument allows unreferencing of individual fields or the whole frame.
3265 * If the picture becomes entirely unreferenced, but is being held for
3266 * display purposes, it is marked as such.
3267 * @param refmask mask of fields to unreference; the mask is bitwise
3268 * anded with the reference marking of pic
3269 * @return non-zero if pic becomes entirely unreferenced (except possibly
3270 * for display purposes) zero if one of the fields remains in
3273 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3275 if (pic->reference &= refmask) {
3278 for(i = 0; h->delayed_pic[i]; i++)
3279 if(pic == h->delayed_pic[i]){
3280 pic->reference=DELAYED_PIC_REF;
3288 * instantaneous decoder refresh.
3290 static void idr(H264Context *h){
3293 for(i=0; i<16; i++){
3294 if (h->long_ref[i] != NULL) {
3295 unreference_pic(h, h->long_ref[i], 0);
3296 h->long_ref[i]= NULL;
3299 h->long_ref_count=0;
3301 for(i=0; i<h->short_ref_count; i++){
3302 unreference_pic(h, h->short_ref[i], 0);
3303 h->short_ref[i]= NULL;
3305 h->short_ref_count=0;
3306 h->prev_frame_num= 0;
3307 h->prev_frame_num_offset= 0;
3312 /* forget old pics after a seek */
3313 static void flush_dpb(AVCodecContext *avctx){
3314 H264Context *h= avctx->priv_data;
3316 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3317 if(h->delayed_pic[i])
3318 h->delayed_pic[i]->reference= 0;
3319 h->delayed_pic[i]= NULL;
3321 h->outputed_poc= INT_MIN;
3323 if(h->s.current_picture_ptr)
3324 h->s.current_picture_ptr->reference= 0;
3325 h->s.first_field= 0;
3326 ff_mpeg_flush(avctx);
3330 * Find a Picture in the short term reference list by frame number.
3331 * @param frame_num frame number to search for
3332 * @param idx the index into h->short_ref where returned picture is found
3333 * undefined if no picture found.
3334 * @return pointer to the found picture, or NULL if no pic with the provided
3335 * frame number is found
3337 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3338 MpegEncContext * const s = &h->s;
3341 for(i=0; i<h->short_ref_count; i++){
3342 Picture *pic= h->short_ref[i];
3343 if(s->avctx->debug&FF_DEBUG_MMCO)
3344 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3345 if(pic->frame_num == frame_num) {
3354 * Remove a picture from the short term reference list by its index in
3355 * that list. This does no checking on the provided index; it is assumed
3356 * to be valid. Other list entries are shifted down.
3357 * @param i index into h->short_ref of picture to remove.
3359 static void remove_short_at_index(H264Context *h, int i){
3360 assert(i >= 0 && i < h->short_ref_count);
3361 h->short_ref[i]= NULL;
3362 if (--h->short_ref_count)
3363 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3368 * @return the removed picture or NULL if an error occurs
3370 static Picture * remove_short(H264Context *h, int frame_num){
3371 MpegEncContext * const s = &h->s;
3375 if(s->avctx->debug&FF_DEBUG_MMCO)
3376 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3378 pic = find_short(h, frame_num, &i);
3380 remove_short_at_index(h, i);
3386 * Remove a picture from the long term reference list by its index in
3387 * that list. This does no checking on the provided index; it is assumed
3388 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3389 * @param i index into h->long_ref of picture to remove.
3391 static void remove_long_at_index(H264Context *h, int i){
3392 h->long_ref[i]= NULL;
3393 h->long_ref_count--;
3398 * @return the removed picture or NULL if an error occurs
3400 static Picture * remove_long(H264Context *h, int i){
3403 pic= h->long_ref[i];
3405 remove_long_at_index(h, i);
3411 * print short term list
3413 static void print_short_term(H264Context *h) {
3415 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3416 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3417 for(i=0; i<h->short_ref_count; i++){
3418 Picture *pic= h->short_ref[i];
3419 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3425 * print long term list
3427 static void print_long_term(H264Context *h) {
3429 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3430 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3431 for(i = 0; i < 16; i++){
3432 Picture *pic= h->long_ref[i];
3434 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3441 * Executes the reference picture marking (memory management control operations).
3443 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3444 MpegEncContext * const s = &h->s;
3446 int current_ref_assigned=0;
3449 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3450 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3452 for(i=0; i<mmco_count; i++){
3453 int structure, frame_num, unref_pic;
3454 if(s->avctx->debug&FF_DEBUG_MMCO)
3455 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3457 switch(mmco[i].opcode){
3458 case MMCO_SHORT2UNUSED:
3459 if(s->avctx->debug&FF_DEBUG_MMCO)
3460 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3461 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3462 pic = find_short(h, frame_num, &j);
3464 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3465 remove_short_at_index(h, j);
3466 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3467 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3469 case MMCO_SHORT2LONG:
3470 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3471 h->long_ref[mmco[i].long_arg]->frame_num ==
3472 mmco[i].short_pic_num / 2) {
3473 /* do nothing, we've already moved this field pair. */
3475 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3477 pic= remove_long(h, mmco[i].long_arg);
3478 if(pic) unreference_pic(h, pic, 0);
3480 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3481 if (h->long_ref[ mmco[i].long_arg ]){
3482 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3483 h->long_ref_count++;
3487 case MMCO_LONG2UNUSED:
3488 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3489 pic = h->long_ref[j];
3491 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3492 remove_long_at_index(h, j);
3493 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3494 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3498 if (FIELD_PICTURE && !s->first_field) {
3499 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3500 /* Just mark second field as referenced */
3502 } else if (s->current_picture_ptr->reference) {
3503 /* First field in pair is in short term list or
3504 * at a different long term index.
3505 * This is not allowed; see 7.4.3, notes 2 and 3.
3506 * Report the problem and keep the pair where it is,
3507 * and mark this field valid.
3509 av_log(h->s.avctx, AV_LOG_ERROR,
3510 "illegal long term reference assignment for second "
3511 "field in complementary field pair (first field is "
3512 "short term or has non-matching long index)\n");
3518 pic= remove_long(h, mmco[i].long_arg);
3519 if(pic) unreference_pic(h, pic, 0);
3521 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3522 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3523 h->long_ref_count++;
3526 s->current_picture_ptr->reference |= s->picture_structure;
3527 current_ref_assigned=1;
3529 case MMCO_SET_MAX_LONG:
3530 assert(mmco[i].long_arg <= 16);
3531 // just remove the long term which index is greater than new max
3532 for(j = mmco[i].long_arg; j<16; j++){
3533 pic = remove_long(h, j);
3534 if (pic) unreference_pic(h, pic, 0);
3538 while(h->short_ref_count){
3539 pic= remove_short(h, h->short_ref[0]->frame_num);
3540 if(pic) unreference_pic(h, pic, 0);
3542 for(j = 0; j < 16; j++) {
3543 pic= remove_long(h, j);
3544 if(pic) unreference_pic(h, pic, 0);
3546 s->current_picture_ptr->poc=
3547 s->current_picture_ptr->field_poc[0]=
3548 s->current_picture_ptr->field_poc[1]=
3552 s->current_picture_ptr->frame_num= 0;
3558 if (!current_ref_assigned && FIELD_PICTURE &&
3559 !s->first_field && s->current_picture_ptr->reference) {
3561 /* Second field of complementary field pair; the first field of
3562 * which is already referenced. If short referenced, it
3563 * should be first entry in short_ref. If not, it must exist
3564 * in long_ref; trying to put it on the short list here is an
3565 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3567 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3568 /* Just mark the second field valid */
3569 s->current_picture_ptr->reference = PICT_FRAME;
3570 } else if (s->current_picture_ptr->long_ref) {
3571 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3572 "assignment for second field "
3573 "in complementary field pair "
3574 "(first field is long term)\n");
3577 * First field in reference, but not in any sensible place on our
3578 * reference lists. This shouldn't happen unless reference
3579 * handling somewhere else is wrong.
3583 current_ref_assigned = 1;
3586 if(!current_ref_assigned){
3587 pic= remove_short(h, s->current_picture_ptr->frame_num);
3589 unreference_pic(h, pic, 0);
3590 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3593 if(h->short_ref_count)
3594 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3596 h->short_ref[0]= s->current_picture_ptr;
3597 h->short_ref[0]->long_ref=0;
3598 h->short_ref_count++;
3599 s->current_picture_ptr->reference |= s->picture_structure;
3602 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3604 /* We have too many reference frames, probably due to corrupted
3605 * stream. Need to discard one frame. Prevents overrun of the
3606 * short_ref and long_ref buffers.
3608 av_log(h->s.avctx, AV_LOG_ERROR,
3609 "number of reference frames exceeds max (probably "
3610 "corrupt input), discarding one\n");
3612 if (h->long_ref_count && !h->short_ref_count) {
3613 for (i = 0; i < 16; ++i)
3618 pic = h->long_ref[i];
3619 remove_long_at_index(h, i);
3621 pic = h->short_ref[h->short_ref_count - 1];
3622 remove_short_at_index(h, h->short_ref_count - 1);
3624 unreference_pic(h, pic, 0);
3627 print_short_term(h);
3632 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3633 MpegEncContext * const s = &h->s;
3636 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3637 s->broken_link= get_bits1(gb) -1;
3638 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3639 if(h->mmco[0].long_arg == -1)
3642 h->mmco[0].opcode= MMCO_LONG;
3646 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3647 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3648 MMCOOpcode opcode= get_ue_golomb(gb);
3650 h->mmco[i].opcode= opcode;
3651 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3652 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3653 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3654 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3658 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3659 unsigned int long_arg= get_ue_golomb(gb);
3660 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3661 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3664 h->mmco[i].long_arg= long_arg;
3667 if(opcode > (unsigned)MMCO_LONG){
3668 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3671 if(opcode == MMCO_END)
3676 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3678 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3679 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3680 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3681 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3683 if (FIELD_PICTURE) {
3684 h->mmco[0].short_pic_num *= 2;
3685 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3686 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3697 static int init_poc(H264Context *h){
3698 MpegEncContext * const s = &h->s;
3699 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3702 if(h->frame_num < h->prev_frame_num)
3703 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3705 h->frame_num_offset= h->prev_frame_num_offset;
3707 if(h->sps.poc_type==0){
3708 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3710 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3711 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3712 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3713 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3715 h->poc_msb = h->prev_poc_msb;
3716 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3718 field_poc[1] = h->poc_msb + h->poc_lsb;
3719 if(s->picture_structure == PICT_FRAME)
3720 field_poc[1] += h->delta_poc_bottom;
3721 }else if(h->sps.poc_type==1){
3722 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3725 if(h->sps.poc_cycle_length != 0)
3726 abs_frame_num = h->frame_num_offset + h->frame_num;
3730 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3733 expected_delta_per_poc_cycle = 0;
3734 for(i=0; i < h->sps.poc_cycle_length; i++)
3735 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3737 if(abs_frame_num > 0){
3738 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3739 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3741 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3742 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3743 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3747 if(h->nal_ref_idc == 0)
3748 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3750 field_poc[0] = expectedpoc + h->delta_poc[0];
3751 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3753 if(s->picture_structure == PICT_FRAME)
3754 field_poc[1] += h->delta_poc[1];
3757 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3758 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3763 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3764 s->current_picture_ptr->field_poc[0]= field_poc[0];
3765 s->current_picture_ptr->poc = field_poc[0];
3767 if(s->picture_structure != PICT_TOP_FIELD) {
3768 s->current_picture_ptr->field_poc[1]= field_poc[1];
3769 s->current_picture_ptr->poc = field_poc[1];
3771 if(!FIELD_PICTURE || !s->first_field) {
3772 Picture *cur = s->current_picture_ptr;
3773 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3781 * initialize scan tables
3783 static void init_scan_tables(H264Context *h){
3784 MpegEncContext * const s = &h->s;
3786 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3787 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3788 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3790 for(i=0; i<16; i++){
3791 #define T(x) (x>>2) | ((x<<2) & 0xF)
3792 h->zigzag_scan[i] = T(zigzag_scan[i]);
3793 h-> field_scan[i] = T( field_scan[i]);
3797 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3798 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3799 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3800 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3801 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3803 for(i=0; i<64; i++){
3804 #define T(x) (x>>3) | ((x&7)<<3)
3805 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3806 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3807 h->field_scan8x8[i] = T(field_scan8x8[i]);
3808 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3812 if(h->sps.transform_bypass){ //FIXME same ugly
3813 h->zigzag_scan_q0 = zigzag_scan;
3814 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3815 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3816 h->field_scan_q0 = field_scan;
3817 h->field_scan8x8_q0 = field_scan8x8;
3818 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3820 h->zigzag_scan_q0 = h->zigzag_scan;
3821 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3822 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3823 h->field_scan_q0 = h->field_scan;
3824 h->field_scan8x8_q0 = h->field_scan8x8;
3825 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3830 * Replicates H264 "master" context to thread contexts.
3832 static void clone_slice(H264Context *dst, H264Context *src)
3834 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3835 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3836 dst->s.current_picture = src->s.current_picture;
3837 dst->s.linesize = src->s.linesize;
3838 dst->s.uvlinesize = src->s.uvlinesize;
3839 dst->s.first_field = src->s.first_field;
3841 dst->prev_poc_msb = src->prev_poc_msb;
3842 dst->prev_poc_lsb = src->prev_poc_lsb;
3843 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3844 dst->prev_frame_num = src->prev_frame_num;
3845 dst->short_ref_count = src->short_ref_count;
3847 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3848 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3849 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3850 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3852 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3853 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3857 * decodes a slice header.
3858 * This will also call MPV_common_init() and frame_start() as needed.
3860 * @param h h264context
3861 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3863 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3865 static int decode_slice_header(H264Context *h, H264Context *h0){
3866 MpegEncContext * const s = &h->s;
3867 MpegEncContext * const s0 = &h0->s;
3868 unsigned int first_mb_in_slice;
3869 unsigned int pps_id;
3870 int num_ref_idx_active_override_flag;
3871 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3872 unsigned int slice_type, tmp, i, j;
3873 int default_ref_list_done = 0;
3874 int last_pic_structure;
3876 s->dropable= h->nal_ref_idc == 0;
3878 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3879 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3880 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3882 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3883 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3886 first_mb_in_slice= get_ue_golomb(&s->gb);
3888 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3889 h0->current_slice = 0;
3890 if (!s0->first_field)
3891 s->current_picture_ptr= NULL;
3894 slice_type= get_ue_golomb(&s->gb);
3896 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3901 h->slice_type_fixed=1;
3903 h->slice_type_fixed=0;
3905 slice_type= slice_type_map[ slice_type ];
3906 if (slice_type == FF_I_TYPE
3907 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3908 default_ref_list_done = 1;
3910 h->slice_type= slice_type;
3911 h->slice_type_nos= slice_type & 3;
3913 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3914 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3915 av_log(h->s.avctx, AV_LOG_ERROR,
3916 "B picture before any references, skipping\n");
3920 pps_id= get_ue_golomb(&s->gb);
3921 if(pps_id>=MAX_PPS_COUNT){
3922 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3925 if(!h0->pps_buffers[pps_id]) {
3926 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3929 h->pps= *h0->pps_buffers[pps_id];
3931 if(!h0->sps_buffers[h->pps.sps_id]) {
3932 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3935 h->sps = *h0->sps_buffers[h->pps.sps_id];
3937 if(h == h0 && h->dequant_coeff_pps != pps_id){
3938 h->dequant_coeff_pps = pps_id;
3939 init_dequant_tables(h);
3942 s->mb_width= h->sps.mb_width;
3943 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3945 h->b_stride= s->mb_width*4;
3946 h->b8_stride= s->mb_width*2;
3948 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3949 if(h->sps.frame_mbs_only_flag)
3950 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3952 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3954 if (s->context_initialized
3955 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3957 return -1; // width / height changed during parallelized decoding
3961 if (!s->context_initialized) {
3963 return -1; // we cant (re-)initialize context during parallel decoding
3964 if (MPV_common_init(s) < 0)
3968 init_scan_tables(h);
3971 for(i = 1; i < s->avctx->thread_count; i++) {
3973 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3974 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3975 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3978 init_scan_tables(c);
3982 for(i = 0; i < s->avctx->thread_count; i++)
3983 if(context_init(h->thread_context[i]) < 0)
3986 s->avctx->width = s->width;
3987 s->avctx->height = s->height;
3988 s->avctx->sample_aspect_ratio= h->sps.sar;
3989 if(!s->avctx->sample_aspect_ratio.den)
3990 s->avctx->sample_aspect_ratio.den = 1;
3992 if(h->sps.timing_info_present_flag){
3993 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3994 if(h->x264_build > 0 && h->x264_build < 44)
3995 s->avctx->time_base.den *= 2;
3996 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3997 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4001 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4004 h->mb_aff_frame = 0;
4005 last_pic_structure = s0->picture_structure;
4006 if(h->sps.frame_mbs_only_flag){
4007 s->picture_structure= PICT_FRAME;
4009 if(get_bits1(&s->gb)) { //field_pic_flag
4010 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4012 s->picture_structure= PICT_FRAME;
4013 h->mb_aff_frame = h->sps.mb_aff;
4017 if(h0->current_slice == 0){
4018 while(h->frame_num != h->prev_frame_num &&
4019 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
4020 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
4022 h->prev_frame_num++;
4023 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
4024 s->current_picture_ptr->frame_num= h->prev_frame_num;
4025 execute_ref_pic_marking(h, NULL, 0);
4028 /* See if we have a decoded first field looking for a pair... */
4029 if (s0->first_field) {
4030 assert(s0->current_picture_ptr);
4031 assert(s0->current_picture_ptr->data[0]);
4032 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4034 /* figure out if we have a complementary field pair */
4035 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4037 * Previous field is unmatched. Don't display it, but let it
4038 * remain for reference if marked as such.
4040 s0->current_picture_ptr = NULL;
4041 s0->first_field = FIELD_PICTURE;
4044 if (h->nal_ref_idc &&
4045 s0->current_picture_ptr->reference &&
4046 s0->current_picture_ptr->frame_num != h->frame_num) {
4048 * This and previous field were reference, but had
4049 * different frame_nums. Consider this field first in
4050 * pair. Throw away previous field except for reference
4053 s0->first_field = 1;
4054 s0->current_picture_ptr = NULL;
4057 /* Second field in complementary pair */
4058 s0->first_field = 0;
4063 /* Frame or first field in a potentially complementary pair */
4064 assert(!s0->current_picture_ptr);
4065 s0->first_field = FIELD_PICTURE;
4068 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4069 s0->first_field = 0;
4076 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4078 assert(s->mb_num == s->mb_width * s->mb_height);
4079 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4080 first_mb_in_slice >= s->mb_num){
4081 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4084 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4085 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4086 if (s->picture_structure == PICT_BOTTOM_FIELD)
4087 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4088 assert(s->mb_y < s->mb_height);
4090 if(s->picture_structure==PICT_FRAME){
4091 h->curr_pic_num= h->frame_num;
4092 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4094 h->curr_pic_num= 2*h->frame_num + 1;
4095 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4098 if(h->nal_unit_type == NAL_IDR_SLICE){
4099 get_ue_golomb(&s->gb); /* idr_pic_id */
4102 if(h->sps.poc_type==0){
4103 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4105 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4106 h->delta_poc_bottom= get_se_golomb(&s->gb);
4110 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4111 h->delta_poc[0]= get_se_golomb(&s->gb);
4113 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4114 h->delta_poc[1]= get_se_golomb(&s->gb);
4119 if(h->pps.redundant_pic_cnt_present){
4120 h->redundant_pic_count= get_ue_golomb(&s->gb);
4123 //set defaults, might be overridden a few lines later
4124 h->ref_count[0]= h->pps.ref_count[0];
4125 h->ref_count[1]= h->pps.ref_count[1];
4127 if(h->slice_type_nos != FF_I_TYPE){
4128 if(h->slice_type_nos == FF_B_TYPE){
4129 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4131 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4133 if(num_ref_idx_active_override_flag){
4134 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4135 if(h->slice_type_nos==FF_B_TYPE)
4136 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4138 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4139 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4140 h->ref_count[0]= h->ref_count[1]= 1;
4144 if(h->slice_type_nos == FF_B_TYPE)
4151 if(!default_ref_list_done){
4152 fill_default_ref_list(h);
4155 if(decode_ref_pic_list_reordering(h) < 0)
4158 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4159 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4160 pred_weight_table(h);
4161 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4162 implicit_weight_table(h);
4167 decode_ref_pic_marking(h0, &s->gb);
4170 fill_mbaff_ref_list(h);
4172 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4173 tmp = get_ue_golomb(&s->gb);
4175 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4178 h->cabac_init_idc= tmp;
4181 h->last_qscale_diff = 0;
4182 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4184 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4188 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4189 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4190 //FIXME qscale / qp ... stuff
4191 if(h->slice_type == FF_SP_TYPE){
4192 get_bits1(&s->gb); /* sp_for_switch_flag */
4194 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4195 get_se_golomb(&s->gb); /* slice_qs_delta */
4198 h->deblocking_filter = 1;
4199 h->slice_alpha_c0_offset = 0;
4200 h->slice_beta_offset = 0;
4201 if( h->pps.deblocking_filter_parameters_present ) {
4202 tmp= get_ue_golomb(&s->gb);
4204 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4207 h->deblocking_filter= tmp;
4208 if(h->deblocking_filter < 2)
4209 h->deblocking_filter^= 1; // 1<->0
4211 if( h->deblocking_filter ) {
4212 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4213 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4217 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4218 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4219 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4220 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4221 h->deblocking_filter= 0;
4223 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4224 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4225 /* Cheat slightly for speed:
4226 Do not bother to deblock across slices. */
4227 h->deblocking_filter = 2;
4229 h0->max_contexts = 1;
4230 if(!h0->single_decode_warning) {
4231 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4232 h0->single_decode_warning = 1;
4235 return 1; // deblocking switched inside frame
4240 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4241 slice_group_change_cycle= get_bits(&s->gb, ?);
4244 h0->last_slice_type = slice_type;
4245 h->slice_num = ++h0->current_slice;
4248 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4252 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4253 +(h->ref_list[j][i].reference&3);
4256 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4257 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4259 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4260 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4262 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4264 av_get_pict_type_char(h->slice_type),
4265 pps_id, h->frame_num,
4266 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4267 h->ref_count[0], h->ref_count[1],
4269 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4271 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4272 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4282 static inline int get_level_prefix(GetBitContext *gb){
4286 OPEN_READER(re, gb);
4287 UPDATE_CACHE(re, gb);
4288 buf=GET_CACHE(re, gb);
4290 log= 32 - av_log2(buf);
4292 print_bin(buf>>(32-log), log);
4293 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4296 LAST_SKIP_BITS(re, gb, log);
4297 CLOSE_READER(re, gb);
4302 static inline int get_dct8x8_allowed(H264Context *h){
4305 if(!IS_SUB_8X8(h->sub_mb_type[i])
4306 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4313 * decodes a residual block.
4314 * @param n block index
4315 * @param scantable scantable
4316 * @param max_coeff number of coefficients in the block
4317 * @return <0 if an error occurred
4319 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4320 MpegEncContext * const s = &h->s;
4321 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4323 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4325 //FIXME put trailing_onex into the context
4327 if(n == CHROMA_DC_BLOCK_INDEX){
4328 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4329 total_coeff= coeff_token>>2;
4331 if(n == LUMA_DC_BLOCK_INDEX){
4332 total_coeff= pred_non_zero_count(h, 0);
4333 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4334 total_coeff= coeff_token>>2;
4336 total_coeff= pred_non_zero_count(h, n);
4337 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4338 total_coeff= coeff_token>>2;
4339 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4343 //FIXME set last_non_zero?
4347 if(total_coeff > (unsigned)max_coeff) {
4348 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4352 trailing_ones= coeff_token&3;
4353 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4354 assert(total_coeff<=16);
4356 for(i=0; i<trailing_ones; i++){
4357 level[i]= 1 - 2*get_bits1(gb);
4361 int level_code, mask;
4362 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4363 int prefix= get_level_prefix(gb);
4365 //first coefficient has suffix_length equal to 0 or 1
4366 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4368 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4370 level_code= (prefix<<suffix_length); //part
4371 }else if(prefix==14){
4373 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4375 level_code= prefix + get_bits(gb, 4); //part
4377 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4378 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4380 level_code += (1<<(prefix-3))-4096;
4383 if(trailing_ones < 3) level_code += 2;
4388 mask= -(level_code&1);
4389 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4392 //remaining coefficients have suffix_length > 0
4393 for(;i<total_coeff;i++) {
4394 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4395 prefix = get_level_prefix(gb);
4397 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4399 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4401 level_code += (1<<(prefix-3))-4096;
4403 mask= -(level_code&1);
4404 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4405 if(level_code > suffix_limit[suffix_length])
4410 if(total_coeff == max_coeff)
4413 if(n == CHROMA_DC_BLOCK_INDEX)
4414 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4416 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4419 coeff_num = zeros_left + total_coeff - 1;
4420 j = scantable[coeff_num];
4422 block[j] = level[0];
4423 for(i=1;i<total_coeff;i++) {
4426 else if(zeros_left < 7){
4427 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4429 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4431 zeros_left -= run_before;
4432 coeff_num -= 1 + run_before;
4433 j= scantable[ coeff_num ];
4438 block[j] = (level[0] * qmul[j] + 32)>>6;
4439 for(i=1;i<total_coeff;i++) {
4442 else if(zeros_left < 7){
4443 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4445 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4447 zeros_left -= run_before;
4448 coeff_num -= 1 + run_before;
4449 j= scantable[ coeff_num ];
4451 block[j]= (level[i] * qmul[j] + 32)>>6;
4456 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4463 static void predict_field_decoding_flag(H264Context *h){
4464 MpegEncContext * const s = &h->s;
4465 const int mb_xy= h->mb_xy;
4466 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4467 ? s->current_picture.mb_type[mb_xy-1]
4468 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4469 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4471 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4475 * decodes a P_SKIP or B_SKIP macroblock
4477 static void decode_mb_skip(H264Context *h){
4478 MpegEncContext * const s = &h->s;
4479 const int mb_xy= h->mb_xy;
4482 memset(h->non_zero_count[mb_xy], 0, 16);
4483 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4486 mb_type|= MB_TYPE_INTERLACED;
4488 if( h->slice_type_nos == FF_B_TYPE )
4490 // just for fill_caches. pred_direct_motion will set the real mb_type
4491 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4493 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4494 pred_direct_motion(h, &mb_type);
4495 mb_type|= MB_TYPE_SKIP;
4500 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4502 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4503 pred_pskip_motion(h, &mx, &my);
4504 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4505 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4508 write_back_motion(h, mb_type);
4509 s->current_picture.mb_type[mb_xy]= mb_type;
4510 s->current_picture.qscale_table[mb_xy]= s->qscale;
4511 h->slice_table[ mb_xy ]= h->slice_num;
4512 h->prev_mb_skipped= 1;
4516 * decodes a macroblock
4517 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4519 static int decode_mb_cavlc(H264Context *h){
4520 MpegEncContext * const s = &h->s;
4522 int partition_count;
4523 unsigned int mb_type, cbp;
4524 int dct8x8_allowed= h->pps.transform_8x8_mode;
4526 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4528 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4530 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4531 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4533 if(h->slice_type_nos != FF_I_TYPE){
4534 if(s->mb_skip_run==-1)
4535 s->mb_skip_run= get_ue_golomb(&s->gb);
4537 if (s->mb_skip_run--) {
4538 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4539 if(s->mb_skip_run==0)
4540 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4542 predict_field_decoding_flag(h);
4549 if( (s->mb_y&1) == 0 )
4550 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4552 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4554 h->prev_mb_skipped= 0;
4556 mb_type= get_ue_golomb(&s->gb);
4557 if(h->slice_type_nos == FF_B_TYPE){
4559 partition_count= b_mb_type_info[mb_type].partition_count;
4560 mb_type= b_mb_type_info[mb_type].type;
4563 goto decode_intra_mb;
4565 }else if(h->slice_type_nos == FF_P_TYPE){
4567 partition_count= p_mb_type_info[mb_type].partition_count;
4568 mb_type= p_mb_type_info[mb_type].type;
4571 goto decode_intra_mb;
4574 assert(h->slice_type_nos == FF_I_TYPE);
4575 if(h->slice_type == FF_SI_TYPE && mb_type)
4579 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4583 cbp= i_mb_type_info[mb_type].cbp;
4584 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4585 mb_type= i_mb_type_info[mb_type].type;
4589 mb_type |= MB_TYPE_INTERLACED;
4591 h->slice_table[ mb_xy ]= h->slice_num;
4593 if(IS_INTRA_PCM(mb_type)){
4596 // We assume these blocks are very rare so we do not optimize it.
4597 align_get_bits(&s->gb);
4599 // The pixels are stored in the same order as levels in h->mb array.
4600 for(y=0; y<16; y++){
4601 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4602 for(x=0; x<16; x++){
4603 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4604 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4608 const int index= 256 + 4*(y&3) + 32*(y>>2);
4610 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4611 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4615 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4617 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4618 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4622 // In deblocking, the quantizer is 0
4623 s->current_picture.qscale_table[mb_xy]= 0;
4624 // All coeffs are present
4625 memset(h->non_zero_count[mb_xy], 16, 16);
4627 s->current_picture.mb_type[mb_xy]= mb_type;
4632 h->ref_count[0] <<= 1;
4633 h->ref_count[1] <<= 1;
4636 fill_caches(h, mb_type, 0);
4639 if(IS_INTRA(mb_type)){
4641 // init_top_left_availability(h);
4642 if(IS_INTRA4x4(mb_type)){
4645 if(dct8x8_allowed && get_bits1(&s->gb)){
4646 mb_type |= MB_TYPE_8x8DCT;
4650 // fill_intra4x4_pred_table(h);
4651 for(i=0; i<16; i+=di){
4652 int mode= pred_intra_mode(h, i);
4654 if(!get_bits1(&s->gb)){
4655 const int rem_mode= get_bits(&s->gb, 3);
4656 mode = rem_mode + (rem_mode >= mode);
4660 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4662 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4664 write_back_intra_pred_mode(h);
4665 if( check_intra4x4_pred_mode(h) < 0)
4668 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4669 if(h->intra16x16_pred_mode < 0)
4673 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4676 h->chroma_pred_mode= pred_mode;
4677 }else if(partition_count==4){
4678 int i, j, sub_partition_count[4], list, ref[2][4];
4680 if(h->slice_type_nos == FF_B_TYPE){
4682 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4683 if(h->sub_mb_type[i] >=13){
4684 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4687 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4688 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4690 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4691 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4692 pred_direct_motion(h, &mb_type);
4693 h->ref_cache[0][scan8[4]] =
4694 h->ref_cache[1][scan8[4]] =
4695 h->ref_cache[0][scan8[12]] =
4696 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4699 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4701 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4702 if(h->sub_mb_type[i] >=4){
4703 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4706 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4707 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4711 for(list=0; list<h->list_count; list++){
4712 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4714 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4715 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4716 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4718 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4730 dct8x8_allowed = get_dct8x8_allowed(h);
4732 for(list=0; list<h->list_count; list++){
4734 if(IS_DIRECT(h->sub_mb_type[i])) {
4735 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4738 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4739 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4741 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4742 const int sub_mb_type= h->sub_mb_type[i];
4743 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4744 for(j=0; j<sub_partition_count[i]; j++){
4746 const int index= 4*i + block_width*j;
4747 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4748 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4749 mx += get_se_golomb(&s->gb);
4750 my += get_se_golomb(&s->gb);
4751 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4753 if(IS_SUB_8X8(sub_mb_type)){
4755 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4757 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4758 }else if(IS_SUB_8X4(sub_mb_type)){
4759 mv_cache[ 1 ][0]= mx;
4760 mv_cache[ 1 ][1]= my;
4761 }else if(IS_SUB_4X8(sub_mb_type)){
4762 mv_cache[ 8 ][0]= mx;
4763 mv_cache[ 8 ][1]= my;
4765 mv_cache[ 0 ][0]= mx;
4766 mv_cache[ 0 ][1]= my;
4769 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4775 }else if(IS_DIRECT(mb_type)){
4776 pred_direct_motion(h, &mb_type);
4777 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4779 int list, mx, my, i;
4780 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4781 if(IS_16X16(mb_type)){
4782 for(list=0; list<h->list_count; list++){
4784 if(IS_DIR(mb_type, 0, list)){
4785 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4786 if(val >= h->ref_count[list]){
4787 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4791 val= LIST_NOT_USED&0xFF;
4792 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4794 for(list=0; list<h->list_count; list++){
4796 if(IS_DIR(mb_type, 0, list)){
4797 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4798 mx += get_se_golomb(&s->gb);
4799 my += get_se_golomb(&s->gb);
4800 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4802 val= pack16to32(mx,my);
4805 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4808 else if(IS_16X8(mb_type)){
4809 for(list=0; list<h->list_count; list++){
4812 if(IS_DIR(mb_type, i, list)){
4813 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4814 if(val >= h->ref_count[list]){
4815 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4819 val= LIST_NOT_USED&0xFF;
4820 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4823 for(list=0; list<h->list_count; list++){
4826 if(IS_DIR(mb_type, i, list)){
4827 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4828 mx += get_se_golomb(&s->gb);
4829 my += get_se_golomb(&s->gb);
4830 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4832 val= pack16to32(mx,my);
4835 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4839 assert(IS_8X16(mb_type));
4840 for(list=0; list<h->list_count; list++){
4843 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4844 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4845 if(val >= h->ref_count[list]){
4846 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4850 val= LIST_NOT_USED&0xFF;
4851 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4854 for(list=0; list<h->list_count; list++){
4857 if(IS_DIR(mb_type, i, list)){
4858 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4859 mx += get_se_golomb(&s->gb);
4860 my += get_se_golomb(&s->gb);
4861 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4863 val= pack16to32(mx,my);
4866 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4872 if(IS_INTER(mb_type))
4873 write_back_motion(h, mb_type);
4875 if(!IS_INTRA16x16(mb_type)){
4876 cbp= get_ue_golomb(&s->gb);
4878 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4882 if(IS_INTRA4x4(mb_type))
4883 cbp= golomb_to_intra4x4_cbp[cbp];
4885 cbp= golomb_to_inter_cbp[cbp];
4889 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4890 if(get_bits1(&s->gb)){
4891 mb_type |= MB_TYPE_8x8DCT;
4892 h->cbp_table[mb_xy]= cbp;
4895 s->current_picture.mb_type[mb_xy]= mb_type;
4897 if(cbp || IS_INTRA16x16(mb_type)){
4898 int i8x8, i4x4, chroma_idx;
4900 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4901 const uint8_t *scan, *scan8x8, *dc_scan;
4903 // fill_non_zero_count_cache(h);
4905 if(IS_INTERLACED(mb_type)){
4906 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4907 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4908 dc_scan= luma_dc_field_scan;
4910 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4911 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4912 dc_scan= luma_dc_zigzag_scan;
4915 dquant= get_se_golomb(&s->gb);
4917 if( dquant > 25 || dquant < -26 ){
4918 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4922 s->qscale += dquant;
4923 if(((unsigned)s->qscale) > 51){
4924 if(s->qscale<0) s->qscale+= 52;
4925 else s->qscale-= 52;
4928 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4929 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4930 if(IS_INTRA16x16(mb_type)){
4931 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4932 return -1; //FIXME continue if partitioned and other return -1 too
4935 assert((cbp&15) == 0 || (cbp&15) == 15);
4938 for(i8x8=0; i8x8<4; i8x8++){
4939 for(i4x4=0; i4x4<4; i4x4++){
4940 const int index= i4x4 + 4*i8x8;
4941 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4947 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4950 for(i8x8=0; i8x8<4; i8x8++){
4951 if(cbp & (1<<i8x8)){
4952 if(IS_8x8DCT(mb_type)){
4953 DCTELEM *buf = &h->mb[64*i8x8];
4955 for(i4x4=0; i4x4<4; i4x4++){
4956 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4957 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4960 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4961 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4963 for(i4x4=0; i4x4<4; i4x4++){
4964 const int index= i4x4 + 4*i8x8;
4966 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4972 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4973 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4979 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4980 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4986 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4987 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4988 for(i4x4=0; i4x4<4; i4x4++){
4989 const int index= 16 + 4*chroma_idx + i4x4;
4990 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4996 uint8_t * const nnz= &h->non_zero_count_cache[0];
4997 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4998 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5001 uint8_t * const nnz= &h->non_zero_count_cache[0];
5002 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5003 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5004 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5006 s->current_picture.qscale_table[mb_xy]= s->qscale;
5007 write_back_non_zero_count(h);
5010 h->ref_count[0] >>= 1;
5011 h->ref_count[1] >>= 1;
5017 static int decode_cabac_field_decoding_flag(H264Context *h) {
5018 MpegEncContext * const s = &h->s;
5019 const int mb_x = s->mb_x;
5020 const int mb_y = s->mb_y & ~1;
5021 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5022 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5024 unsigned int ctx = 0;
5026 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5029 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5033 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5036 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5037 uint8_t *state= &h->cabac_state[ctx_base];
5041 MpegEncContext * const s = &h->s;
5042 const int mba_xy = h->left_mb_xy[0];
5043 const int mbb_xy = h->top_mb_xy;
5045 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5047 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5049 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5050 return 0; /* I4x4 */
5053 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5054 return 0; /* I4x4 */
5057 if( get_cabac_terminate( &h->cabac ) )
5058 return 25; /* PCM */
5060 mb_type = 1; /* I16x16 */
5061 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5062 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5063 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5064 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5065 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5069 static int decode_cabac_mb_type( H264Context *h ) {
5070 MpegEncContext * const s = &h->s;
5072 if( h->slice_type_nos == FF_I_TYPE ) {
5073 return decode_cabac_intra_mb_type(h, 3, 1);
5074 } else if( h->slice_type_nos == FF_P_TYPE ) {
5075 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5077 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5078 /* P_L0_D16x16, P_8x8 */
5079 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5081 /* P_L0_D8x16, P_L0_D16x8 */
5082 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5085 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5087 } else if( h->slice_type_nos == FF_B_TYPE ) {
5088 const int mba_xy = h->left_mb_xy[0];
5089 const int mbb_xy = h->top_mb_xy;
5093 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5095 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5098 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5099 return 0; /* B_Direct_16x16 */
5101 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5102 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5105 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5106 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5107 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5108 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5110 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5111 else if( bits == 13 ) {
5112 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5113 } else if( bits == 14 )
5114 return 11; /* B_L1_L0_8x16 */
5115 else if( bits == 15 )
5116 return 22; /* B_8x8 */
5118 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5119 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5121 /* TODO SI/SP frames? */
5126 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5127 MpegEncContext * const s = &h->s;
5131 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5132 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5135 && h->slice_table[mba_xy] == h->slice_num
5136 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5137 mba_xy += s->mb_stride;
5139 mbb_xy = mb_xy - s->mb_stride;
5141 && h->slice_table[mbb_xy] == h->slice_num
5142 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5143 mbb_xy -= s->mb_stride;
5145 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5147 int mb_xy = h->mb_xy;
5149 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5152 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5154 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5157 if( h->slice_type_nos == FF_B_TYPE )
5159 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5162 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5165 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5168 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5169 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5170 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5172 if( mode >= pred_mode )
5178 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5179 const int mba_xy = h->left_mb_xy[0];
5180 const int mbb_xy = h->top_mb_xy;
5184 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5185 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5188 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5191 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5194 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5196 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5202 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5203 int cbp_b, cbp_a, ctx, cbp = 0;
5205 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5206 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5208 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5209 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5210 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5211 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5212 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5213 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5214 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5215 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5218 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5222 cbp_a = (h->left_cbp>>4)&0x03;
5223 cbp_b = (h-> top_cbp>>4)&0x03;
5226 if( cbp_a > 0 ) ctx++;
5227 if( cbp_b > 0 ) ctx += 2;
5228 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5232 if( cbp_a == 2 ) ctx++;
5233 if( cbp_b == 2 ) ctx += 2;
5234 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5236 static int decode_cabac_mb_dqp( H264Context *h) {
5240 if( h->last_qscale_diff != 0 )
5243 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5249 if(val > 102) //prevent infinite loop
5256 return -(val + 1)/2;
5258 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5259 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5261 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5263 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5267 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5269 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5270 return 0; /* B_Direct_8x8 */
5271 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5272 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5274 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5275 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5276 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5279 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5280 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5284 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5285 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5288 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5289 int refa = h->ref_cache[list][scan8[n] - 1];
5290 int refb = h->ref_cache[list][scan8[n] - 8];
5294 if( h->slice_type_nos == FF_B_TYPE) {
5295 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5297 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5306 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5312 if(ref >= 32 /*h->ref_list[list]*/){
5313 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5314 return 0; //FIXME we should return -1 and check the return everywhere
5320 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5321 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5322 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5323 int ctxbase = (l == 0) ? 40 : 47;
5328 else if( amvd > 32 )
5333 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5338 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5346 while( get_cabac_bypass( &h->cabac ) ) {
5350 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5355 if( get_cabac_bypass( &h->cabac ) )
5359 return get_cabac_bypass_sign( &h->cabac, -mvd );
5362 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5368 nza = h->left_cbp&0x100;
5369 nzb = h-> top_cbp&0x100;
5371 nza = (h->left_cbp>>(6+idx))&0x01;
5372 nzb = (h-> top_cbp>>(6+idx))&0x01;
5376 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5377 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5379 assert(cat == 1 || cat == 2);
5380 nza = h->non_zero_count_cache[scan8[idx] - 1];
5381 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5391 return ctx + 4 * cat;
5394 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5395 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5396 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5397 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5398 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5401 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5402 static const int significant_coeff_flag_offset[2][6] = {
5403 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5404 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5406 static const int last_coeff_flag_offset[2][6] = {
5407 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5408 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5410 static const int coeff_abs_level_m1_offset[6] = {
5411 227+0, 227+10, 227+20, 227+30, 227+39, 426
5413 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5414 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5415 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5416 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5417 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5418 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5419 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5420 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5421 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5423 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5424 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5425 * map node ctx => cabac ctx for level=1 */
5426 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5427 /* map node ctx => cabac ctx for level>1 */
5428 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5429 static const uint8_t coeff_abs_level_transition[2][8] = {
5430 /* update node ctx after decoding a level=1 */
5431 { 1, 2, 3, 3, 4, 5, 6, 7 },
5432 /* update node ctx after decoding a level>1 */
5433 { 4, 4, 4, 4, 5, 6, 7, 7 }
5439 int coeff_count = 0;
5442 uint8_t *significant_coeff_ctx_base;
5443 uint8_t *last_coeff_ctx_base;
5444 uint8_t *abs_level_m1_ctx_base;
5447 #define CABAC_ON_STACK
5449 #ifdef CABAC_ON_STACK
5452 cc.range = h->cabac.range;
5453 cc.low = h->cabac.low;
5454 cc.bytestream= h->cabac.bytestream;
5456 #define CC &h->cabac
5460 /* cat: 0-> DC 16x16 n = 0
5461 * 1-> AC 16x16 n = luma4x4idx
5462 * 2-> Luma4x4 n = luma4x4idx
5463 * 3-> DC Chroma n = iCbCr
5464 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5465 * 5-> Luma8x8 n = 4 * luma8x8idx
5468 /* read coded block flag */
5469 if( is_dc || cat != 5 ) {
5470 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5473 h->non_zero_count_cache[scan8[16+n]] = 0;
5475 h->non_zero_count_cache[scan8[n]] = 0;
5478 #ifdef CABAC_ON_STACK
5479 h->cabac.range = cc.range ;
5480 h->cabac.low = cc.low ;
5481 h->cabac.bytestream= cc.bytestream;
5487 significant_coeff_ctx_base = h->cabac_state
5488 + significant_coeff_flag_offset[MB_FIELD][cat];
5489 last_coeff_ctx_base = h->cabac_state
5490 + last_coeff_flag_offset[MB_FIELD][cat];
5491 abs_level_m1_ctx_base = h->cabac_state
5492 + coeff_abs_level_m1_offset[cat];
5494 if( !is_dc && cat == 5 ) {
5495 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5496 for(last= 0; last < coefs; last++) { \
5497 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5498 if( get_cabac( CC, sig_ctx )) { \
5499 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5500 index[coeff_count++] = last; \
5501 if( get_cabac( CC, last_ctx ) ) { \
5507 if( last == max_coeff -1 ) {\
5508 index[coeff_count++] = last;\
5510 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5511 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5512 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5514 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5516 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5518 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5521 assert(coeff_count > 0);
5525 h->cbp_table[h->mb_xy] |= 0x100;
5527 h->cbp_table[h->mb_xy] |= 0x40 << n;
5530 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5532 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5534 assert( cat == 1 || cat == 2 );
5535 h->non_zero_count_cache[scan8[n]] = coeff_count;
5539 while( coeff_count-- ) {
5540 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5542 int j= scantable[index[coeff_count]];
5544 if( get_cabac( CC, ctx ) == 0 ) {
5545 node_ctx = coeff_abs_level_transition[0][node_ctx];
5547 block[j] = get_cabac_bypass_sign( CC, -1);
5549 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5553 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5554 node_ctx = coeff_abs_level_transition[1][node_ctx];
5556 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5560 if( coeff_abs >= 15 ) {
5562 while( get_cabac_bypass( CC ) ) {
5568 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5574 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5576 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5580 #ifdef CABAC_ON_STACK
5581 h->cabac.range = cc.range ;
5582 h->cabac.low = cc.low ;
5583 h->cabac.bytestream= cc.bytestream;
5588 #ifndef CONFIG_SMALL
5589 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5590 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5593 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5594 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5598 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5600 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5602 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5603 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5607 static inline void compute_mb_neighbors(H264Context *h)
5609 MpegEncContext * const s = &h->s;
5610 const int mb_xy = h->mb_xy;
5611 h->top_mb_xy = mb_xy - s->mb_stride;
5612 h->left_mb_xy[0] = mb_xy - 1;
5614 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5615 const int top_pair_xy = pair_xy - s->mb_stride;
5616 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5617 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5618 const int curr_mb_frame_flag = !MB_FIELD;
5619 const int bottom = (s->mb_y & 1);
5621 ? !curr_mb_frame_flag // bottom macroblock
5622 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5624 h->top_mb_xy -= s->mb_stride;
5626 if (left_mb_frame_flag != curr_mb_frame_flag) {
5627 h->left_mb_xy[0] = pair_xy - 1;
5629 } else if (FIELD_PICTURE) {
5630 h->top_mb_xy -= s->mb_stride;
5636 * decodes a macroblock
5637 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5639 static int decode_mb_cabac(H264Context *h) {
5640 MpegEncContext * const s = &h->s;
5642 int mb_type, partition_count, cbp = 0;
5643 int dct8x8_allowed= h->pps.transform_8x8_mode;
5645 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5647 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5649 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5650 if( h->slice_type_nos != FF_I_TYPE ) {
5652 /* a skipped mb needs the aff flag from the following mb */
5653 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5654 predict_field_decoding_flag(h);
5655 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5656 skip = h->next_mb_skipped;
5658 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5659 /* read skip flags */
5661 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5662 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5663 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5664 if(h->next_mb_skipped)
5665 predict_field_decoding_flag(h);
5667 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5672 h->cbp_table[mb_xy] = 0;
5673 h->chroma_pred_mode_table[mb_xy] = 0;
5674 h->last_qscale_diff = 0;
5681 if( (s->mb_y&1) == 0 )
5683 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5685 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5687 h->prev_mb_skipped = 0;
5689 compute_mb_neighbors(h);
5690 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5691 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5695 if( h->slice_type_nos == FF_B_TYPE ) {
5697 partition_count= b_mb_type_info[mb_type].partition_count;
5698 mb_type= b_mb_type_info[mb_type].type;
5701 goto decode_intra_mb;
5703 } else if( h->slice_type_nos == FF_P_TYPE ) {
5705 partition_count= p_mb_type_info[mb_type].partition_count;
5706 mb_type= p_mb_type_info[mb_type].type;
5709 goto decode_intra_mb;
5712 if(h->slice_type == FF_SI_TYPE && mb_type)
5714 assert(h->slice_type_nos == FF_I_TYPE);
5716 partition_count = 0;
5717 cbp= i_mb_type_info[mb_type].cbp;
5718 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5719 mb_type= i_mb_type_info[mb_type].type;
5722 mb_type |= MB_TYPE_INTERLACED;
5724 h->slice_table[ mb_xy ]= h->slice_num;
5726 if(IS_INTRA_PCM(mb_type)) {
5730 // We assume these blocks are very rare so we do not optimize it.
5731 // FIXME The two following lines get the bitstream position in the cabac
5732 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5733 ptr= h->cabac.bytestream;
5734 if(h->cabac.low&0x1) ptr--;
5736 if(h->cabac.low&0x1FF) ptr--;
5739 // The pixels are stored in the same order as levels in h->mb array.
5740 for(y=0; y<16; y++){
5741 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5742 for(x=0; x<16; x++){
5743 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5744 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5748 const int index= 256 + 4*(y&3) + 32*(y>>2);
5750 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5751 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5755 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5757 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5758 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5762 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5764 // All blocks are present
5765 h->cbp_table[mb_xy] = 0x1ef;
5766 h->chroma_pred_mode_table[mb_xy] = 0;
5767 // In deblocking, the quantizer is 0
5768 s->current_picture.qscale_table[mb_xy]= 0;
5769 // All coeffs are present
5770 memset(h->non_zero_count[mb_xy], 16, 16);
5771 s->current_picture.mb_type[mb_xy]= mb_type;
5772 h->last_qscale_diff = 0;
5777 h->ref_count[0] <<= 1;
5778 h->ref_count[1] <<= 1;
5781 fill_caches(h, mb_type, 0);
5783 if( IS_INTRA( mb_type ) ) {
5785 if( IS_INTRA4x4( mb_type ) ) {
5786 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5787 mb_type |= MB_TYPE_8x8DCT;
5788 for( i = 0; i < 16; i+=4 ) {
5789 int pred = pred_intra_mode( h, i );
5790 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5791 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5794 for( i = 0; i < 16; i++ ) {
5795 int pred = pred_intra_mode( h, i );
5796 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5798 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5801 write_back_intra_pred_mode(h);
5802 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5804 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5805 if( h->intra16x16_pred_mode < 0 ) return -1;
5807 h->chroma_pred_mode_table[mb_xy] =
5808 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5810 pred_mode= check_intra_pred_mode( h, pred_mode );
5811 if( pred_mode < 0 ) return -1;
5812 h->chroma_pred_mode= pred_mode;
5813 } else if( partition_count == 4 ) {
5814 int i, j, sub_partition_count[4], list, ref[2][4];
5816 if( h->slice_type_nos == FF_B_TYPE ) {
5817 for( i = 0; i < 4; i++ ) {
5818 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5819 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5820 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5822 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5823 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5824 pred_direct_motion(h, &mb_type);
5825 h->ref_cache[0][scan8[4]] =
5826 h->ref_cache[1][scan8[4]] =
5827 h->ref_cache[0][scan8[12]] =
5828 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5829 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5830 for( i = 0; i < 4; i++ )
5831 if( IS_DIRECT(h->sub_mb_type[i]) )
5832 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5836 for( i = 0; i < 4; i++ ) {
5837 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5838 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5839 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5843 for( list = 0; list < h->list_count; list++ ) {
5844 for( i = 0; i < 4; i++ ) {
5845 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5846 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5847 if( h->ref_count[list] > 1 )
5848 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5854 h->ref_cache[list][ scan8[4*i]+1 ]=
5855 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5860 dct8x8_allowed = get_dct8x8_allowed(h);
5862 for(list=0; list<h->list_count; list++){
5864 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5865 if(IS_DIRECT(h->sub_mb_type[i])){
5866 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5870 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5871 const int sub_mb_type= h->sub_mb_type[i];
5872 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5873 for(j=0; j<sub_partition_count[i]; j++){
5876 const int index= 4*i + block_width*j;
5877 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5878 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5879 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5881 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5882 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5883 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5885 if(IS_SUB_8X8(sub_mb_type)){
5887 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5889 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5892 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5894 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5895 }else if(IS_SUB_8X4(sub_mb_type)){
5896 mv_cache[ 1 ][0]= mx;
5897 mv_cache[ 1 ][1]= my;
5899 mvd_cache[ 1 ][0]= mx - mpx;
5900 mvd_cache[ 1 ][1]= my - mpy;
5901 }else if(IS_SUB_4X8(sub_mb_type)){
5902 mv_cache[ 8 ][0]= mx;
5903 mv_cache[ 8 ][1]= my;
5905 mvd_cache[ 8 ][0]= mx - mpx;
5906 mvd_cache[ 8 ][1]= my - mpy;
5908 mv_cache[ 0 ][0]= mx;
5909 mv_cache[ 0 ][1]= my;
5911 mvd_cache[ 0 ][0]= mx - mpx;
5912 mvd_cache[ 0 ][1]= my - mpy;
5915 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5916 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5917 p[0] = p[1] = p[8] = p[9] = 0;
5918 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5922 } else if( IS_DIRECT(mb_type) ) {
5923 pred_direct_motion(h, &mb_type);
5924 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5925 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5926 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5928 int list, mx, my, i, mpx, mpy;
5929 if(IS_16X16(mb_type)){
5930 for(list=0; list<h->list_count; list++){
5931 if(IS_DIR(mb_type, 0, list)){
5932 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5933 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5935 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5937 for(list=0; list<h->list_count; list++){
5938 if(IS_DIR(mb_type, 0, list)){
5939 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5941 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5942 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5943 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5945 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5946 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5948 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5951 else if(IS_16X8(mb_type)){
5952 for(list=0; list<h->list_count; list++){
5954 if(IS_DIR(mb_type, i, list)){
5955 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5956 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5958 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5961 for(list=0; list<h->list_count; list++){
5963 if(IS_DIR(mb_type, i, list)){
5964 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5965 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5966 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5967 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5969 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5970 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5972 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5973 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5978 assert(IS_8X16(mb_type));
5979 for(list=0; list<h->list_count; list++){
5981 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5982 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5983 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5985 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5988 for(list=0; list<h->list_count; list++){
5990 if(IS_DIR(mb_type, i, list)){
5991 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5992 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5993 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5995 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5996 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5997 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5999 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6000 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6007 if( IS_INTER( mb_type ) ) {
6008 h->chroma_pred_mode_table[mb_xy] = 0;
6009 write_back_motion( h, mb_type );
6012 if( !IS_INTRA16x16( mb_type ) ) {
6013 cbp = decode_cabac_mb_cbp_luma( h );
6014 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6017 h->cbp_table[mb_xy] = h->cbp = cbp;
6019 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6020 if( decode_cabac_mb_transform_size( h ) )
6021 mb_type |= MB_TYPE_8x8DCT;
6023 s->current_picture.mb_type[mb_xy]= mb_type;
6025 if( cbp || IS_INTRA16x16( mb_type ) ) {
6026 const uint8_t *scan, *scan8x8, *dc_scan;
6027 const uint32_t *qmul;
6030 if(IS_INTERLACED(mb_type)){
6031 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6032 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6033 dc_scan= luma_dc_field_scan;
6035 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6036 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6037 dc_scan= luma_dc_zigzag_scan;
6040 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6041 if( dqp == INT_MIN ){
6042 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6046 if(((unsigned)s->qscale) > 51){
6047 if(s->qscale<0) s->qscale+= 52;
6048 else s->qscale-= 52;
6050 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6051 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6053 if( IS_INTRA16x16( mb_type ) ) {
6055 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6056 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6059 qmul = h->dequant4_coeff[0][s->qscale];
6060 for( i = 0; i < 16; i++ ) {
6061 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6062 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6065 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6069 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6070 if( cbp & (1<<i8x8) ) {
6071 if( IS_8x8DCT(mb_type) ) {
6072 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6073 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6075 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6076 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6077 const int index = 4*i8x8 + i4x4;
6078 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6080 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6081 //STOP_TIMER("decode_residual")
6085 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6086 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6093 for( c = 0; c < 2; c++ ) {
6094 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6095 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6101 for( c = 0; c < 2; c++ ) {
6102 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6103 for( i = 0; i < 4; i++ ) {
6104 const int index = 16 + 4 * c + i;
6105 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6106 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6110 uint8_t * const nnz= &h->non_zero_count_cache[0];
6111 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6112 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6115 uint8_t * const nnz= &h->non_zero_count_cache[0];
6116 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6117 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6118 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6119 h->last_qscale_diff = 0;
6122 s->current_picture.qscale_table[mb_xy]= s->qscale;
6123 write_back_non_zero_count(h);
6126 h->ref_count[0] >>= 1;
6127 h->ref_count[1] >>= 1;
6134 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6136 const int index_a = qp + h->slice_alpha_c0_offset;
6137 const int alpha = (alpha_table+52)[index_a];
6138 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6143 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6144 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6146 /* 16px edge length, because bS=4 is triggered by being at
6147 * the edge of an intra MB, so all 4 bS are the same */
6148 for( d = 0; d < 16; d++ ) {
6149 const int p0 = pix[-1];
6150 const int p1 = pix[-2];
6151 const int p2 = pix[-3];
6153 const int q0 = pix[0];
6154 const int q1 = pix[1];
6155 const int q2 = pix[2];
6157 if( FFABS( p0 - q0 ) < alpha &&
6158 FFABS( p1 - p0 ) < beta &&
6159 FFABS( q1 - q0 ) < beta ) {
6161 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6162 if( FFABS( p2 - p0 ) < beta)
6164 const int p3 = pix[-4];
6166 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6167 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6168 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6171 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6173 if( FFABS( q2 - q0 ) < beta)
6175 const int q3 = pix[3];
6177 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6178 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6179 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6182 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6186 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6187 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6189 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6195 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6197 const int index_a = qp + h->slice_alpha_c0_offset;
6198 const int alpha = (alpha_table+52)[index_a];
6199 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6204 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6205 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6207 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6211 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6213 for( i = 0; i < 16; i++, pix += stride) {
6219 int bS_index = (i >> 1);
6222 bS_index |= (i & 1);
6225 if( bS[bS_index] == 0 ) {
6229 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6230 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6231 alpha = (alpha_table+52)[index_a];
6232 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6234 if( bS[bS_index] < 4 ) {
6235 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6236 const int p0 = pix[-1];
6237 const int p1 = pix[-2];
6238 const int p2 = pix[-3];
6239 const int q0 = pix[0];
6240 const int q1 = pix[1];
6241 const int q2 = pix[2];
6243 if( FFABS( p0 - q0 ) < alpha &&
6244 FFABS( p1 - p0 ) < beta &&
6245 FFABS( q1 - q0 ) < beta ) {
6249 if( FFABS( p2 - p0 ) < beta ) {
6250 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6253 if( FFABS( q2 - q0 ) < beta ) {
6254 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6258 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6259 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6260 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6261 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6264 const int p0 = pix[-1];
6265 const int p1 = pix[-2];
6266 const int p2 = pix[-3];
6268 const int q0 = pix[0];
6269 const int q1 = pix[1];
6270 const int q2 = pix[2];
6272 if( FFABS( p0 - q0 ) < alpha &&
6273 FFABS( p1 - p0 ) < beta &&
6274 FFABS( q1 - q0 ) < beta ) {
6276 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6277 if( FFABS( p2 - p0 ) < beta)
6279 const int p3 = pix[-4];
6281 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6282 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6283 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6286 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6288 if( FFABS( q2 - q0 ) < beta)
6290 const int q3 = pix[3];
6292 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6293 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6294 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6297 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6301 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6302 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6304 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6309 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6311 for( i = 0; i < 8; i++, pix += stride) {
6319 if( bS[bS_index] == 0 ) {
6323 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6324 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6325 alpha = (alpha_table+52)[index_a];
6326 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6328 if( bS[bS_index] < 4 ) {
6329 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6330 const int p0 = pix[-1];
6331 const int p1 = pix[-2];
6332 const int q0 = pix[0];
6333 const int q1 = pix[1];
6335 if( FFABS( p0 - q0 ) < alpha &&
6336 FFABS( p1 - p0 ) < beta &&
6337 FFABS( q1 - q0 ) < beta ) {
6338 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6340 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6341 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6342 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6345 const int p0 = pix[-1];
6346 const int p1 = pix[-2];
6347 const int q0 = pix[0];
6348 const int q1 = pix[1];
6350 if( FFABS( p0 - q0 ) < alpha &&
6351 FFABS( p1 - p0 ) < beta &&
6352 FFABS( q1 - q0 ) < beta ) {
6354 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6355 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6356 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6362 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6364 const int index_a = qp + h->slice_alpha_c0_offset;
6365 const int alpha = (alpha_table+52)[index_a];
6366 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6367 const int pix_next = stride;
6372 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6373 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6375 /* 16px edge length, see filter_mb_edgev */
6376 for( d = 0; d < 16; d++ ) {
6377 const int p0 = pix[-1*pix_next];
6378 const int p1 = pix[-2*pix_next];
6379 const int p2 = pix[-3*pix_next];
6380 const int q0 = pix[0];
6381 const int q1 = pix[1*pix_next];
6382 const int q2 = pix[2*pix_next];
6384 if( FFABS( p0 - q0 ) < alpha &&
6385 FFABS( p1 - p0 ) < beta &&
6386 FFABS( q1 - q0 ) < beta ) {
6388 const int p3 = pix[-4*pix_next];
6389 const int q3 = pix[ 3*pix_next];
6391 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6392 if( FFABS( p2 - p0 ) < beta) {
6394 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6395 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6396 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6399 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6401 if( FFABS( q2 - q0 ) < beta) {
6403 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6404 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6405 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6408 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6412 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6413 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6415 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6422 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6424 const int index_a = qp + h->slice_alpha_c0_offset;
6425 const int alpha = (alpha_table+52)[index_a];
6426 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6431 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6432 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6434 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6438 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6439 MpegEncContext * const s = &h->s;
6440 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6442 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6446 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6448 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6449 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6450 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6453 assert(!FRAME_MBAFF);
6455 mb_type = s->current_picture.mb_type[mb_xy];
6456 qp = s->current_picture.qscale_table[mb_xy];
6457 qp0 = s->current_picture.qscale_table[mb_xy-1];
6458 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6459 qpc = get_chroma_qp( h, 0, qp );
6460 qpc0 = get_chroma_qp( h, 0, qp0 );
6461 qpc1 = get_chroma_qp( h, 0, qp1 );
6462 qp0 = (qp + qp0 + 1) >> 1;
6463 qp1 = (qp + qp1 + 1) >> 1;
6464 qpc0 = (qpc + qpc0 + 1) >> 1;
6465 qpc1 = (qpc + qpc1 + 1) >> 1;
6466 qp_thresh = 15 - h->slice_alpha_c0_offset;
6467 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6468 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6471 if( IS_INTRA(mb_type) ) {
6472 int16_t bS4[4] = {4,4,4,4};
6473 int16_t bS3[4] = {3,3,3,3};
6474 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6475 if( IS_8x8DCT(mb_type) ) {
6476 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6477 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6478 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6479 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6481 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6482 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6483 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6484 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6485 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6486 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6487 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6488 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6490 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6491 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6492 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6493 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6494 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6495 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6496 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6497 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6500 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6501 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6503 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6505 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6507 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6508 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6509 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6510 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6512 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6513 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6514 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6515 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6517 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6518 bSv[0][0] = 0x0004000400040004ULL;
6519 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6520 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6522 #define FILTER(hv,dir,edge)\
6523 if(bSv[dir][edge]) {\
6524 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6526 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6527 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6533 } else if( IS_8x8DCT(mb_type) ) {
6552 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6553 MpegEncContext * const s = &h->s;
6554 const int mb_xy= mb_x + mb_y*s->mb_stride;
6555 const int mb_type = s->current_picture.mb_type[mb_xy];
6556 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6557 int first_vertical_edge_done = 0;
6560 //for sufficiently low qp, filtering wouldn't do anything
6561 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6563 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6564 int qp = s->current_picture.qscale_table[mb_xy];
6566 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6567 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6572 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6573 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6574 int top_type, left_type[2];
6575 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6576 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6577 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6579 if(IS_8x8DCT(top_type)){
6580 h->non_zero_count_cache[4+8*0]=
6581 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6582 h->non_zero_count_cache[6+8*0]=
6583 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6585 if(IS_8x8DCT(left_type[0])){
6586 h->non_zero_count_cache[3+8*1]=
6587 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6589 if(IS_8x8DCT(left_type[1])){
6590 h->non_zero_count_cache[3+8*3]=
6591 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6594 if(IS_8x8DCT(mb_type)){
6595 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6596 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6598 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6599 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6601 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6602 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6604 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6605 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6610 // left mb is in picture
6611 && h->slice_table[mb_xy-1] != 255
6612 // and current and left pair do not have the same interlaced type
6613 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6614 // and left mb is in the same slice if deblocking_filter == 2
6615 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6616 /* First vertical edge is different in MBAFF frames
6617 * There are 8 different bS to compute and 2 different Qp
6619 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6620 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6625 int mb_qp, mbn0_qp, mbn1_qp;
6627 first_vertical_edge_done = 1;
6629 if( IS_INTRA(mb_type) )
6630 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6632 for( i = 0; i < 8; i++ ) {
6633 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6635 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6637 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6638 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6639 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6646 mb_qp = s->current_picture.qscale_table[mb_xy];
6647 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6648 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6649 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6650 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6651 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6652 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6653 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6654 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6655 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6656 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6657 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6658 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6661 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6662 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6663 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6664 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6665 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6667 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6668 for( dir = 0; dir < 2; dir++ )
6671 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6672 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6673 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6674 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6675 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6677 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6678 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6679 // how often to recheck mv-based bS when iterating between edges
6680 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6681 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6682 // how often to recheck mv-based bS when iterating along each edge
6683 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6685 if (first_vertical_edge_done) {
6687 first_vertical_edge_done = 0;
6690 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6693 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6694 && !IS_INTERLACED(mb_type)
6695 && IS_INTERLACED(mbm_type)
6697 // This is a special case in the norm where the filtering must
6698 // be done twice (one each of the field) even if we are in a
6699 // frame macroblock.
6701 static const int nnz_idx[4] = {4,5,6,3};
6702 unsigned int tmp_linesize = 2 * linesize;
6703 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6704 int mbn_xy = mb_xy - 2 * s->mb_stride;
6709 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6710 if( IS_INTRA(mb_type) ||
6711 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6712 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6714 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6715 for( i = 0; i < 4; i++ ) {
6716 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6717 mbn_nnz[nnz_idx[i]] != 0 )
6723 // Do not use s->qscale as luma quantizer because it has not the same
6724 // value in IPCM macroblocks.
6725 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6726 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6727 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6728 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6729 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6730 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6731 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6732 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6739 for( edge = start; edge < edges; edge++ ) {
6740 /* mbn_xy: neighbor macroblock */
6741 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6742 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6743 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6747 if( (edge&1) && IS_8x8DCT(mb_type) )
6750 if( IS_INTRA(mb_type) ||
6751 IS_INTRA(mbn_type) ) {
6754 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6755 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6764 bS[0] = bS[1] = bS[2] = bS[3] = value;
6769 if( edge & mask_edge ) {
6770 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6773 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6774 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6777 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6778 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6779 int bn_idx= b_idx - (dir ? 8:1);
6782 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6783 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6784 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6785 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6788 if(h->slice_type_nos == FF_B_TYPE && v){
6790 for( l = 0; !v && l < 2; l++ ) {
6792 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6793 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6794 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6798 bS[0] = bS[1] = bS[2] = bS[3] = v;
6804 for( i = 0; i < 4; i++ ) {
6805 int x = dir == 0 ? edge : i;
6806 int y = dir == 0 ? i : edge;
6807 int b_idx= 8 + 4 + x + 8*y;
6808 int bn_idx= b_idx - (dir ? 8:1);
6810 if( h->non_zero_count_cache[b_idx] != 0 ||
6811 h->non_zero_count_cache[bn_idx] != 0 ) {
6817 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6818 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6819 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6820 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6826 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6828 for( l = 0; l < 2; l++ ) {
6830 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6831 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6832 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6841 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6846 // Do not use s->qscale as luma quantizer because it has not the same
6847 // value in IPCM macroblocks.
6848 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6849 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6850 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6851 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6853 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6854 if( (edge&1) == 0 ) {
6855 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6856 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6857 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6858 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6861 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6862 if( (edge&1) == 0 ) {
6863 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6864 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6865 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6866 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6873 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6874 MpegEncContext * const s = &h->s;
6875 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6879 if( h->pps.cabac ) {
6883 align_get_bits( &s->gb );
6886 ff_init_cabac_states( &h->cabac);
6887 ff_init_cabac_decoder( &h->cabac,
6888 s->gb.buffer + get_bits_count(&s->gb)/8,
6889 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6890 /* calculate pre-state */
6891 for( i= 0; i < 460; i++ ) {
6893 if( h->slice_type_nos == FF_I_TYPE )
6894 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6896 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6899 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6901 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6906 int ret = decode_mb_cabac(h);
6908 //STOP_TIMER("decode_mb_cabac")
6910 if(ret>=0) hl_decode_mb(h);
6912 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6915 if(ret>=0) ret = decode_mb_cabac(h);
6917 if(ret>=0) hl_decode_mb(h);
6920 eos = get_cabac_terminate( &h->cabac );
6922 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6923 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6924 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6928 if( ++s->mb_x >= s->mb_width ) {
6930 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6932 if(FIELD_OR_MBAFF_PICTURE) {
6937 if( eos || s->mb_y >= s->mb_height ) {
6938 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6939 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6946 int ret = decode_mb_cavlc(h);
6948 if(ret>=0) hl_decode_mb(h);
6950 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6952 ret = decode_mb_cavlc(h);
6954 if(ret>=0) hl_decode_mb(h);
6959 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6960 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6965 if(++s->mb_x >= s->mb_width){
6967 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6969 if(FIELD_OR_MBAFF_PICTURE) {
6972 if(s->mb_y >= s->mb_height){
6973 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6975 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6976 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6980 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6987 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6988 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6989 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6990 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6994 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7003 for(;s->mb_y < s->mb_height; s->mb_y++){
7004 for(;s->mb_x < s->mb_width; s->mb_x++){
7005 int ret= decode_mb(h);
7010 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7011 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7016 if(++s->mb_x >= s->mb_width){
7018 if(++s->mb_y >= s->mb_height){
7019 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7020 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7024 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7031 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7032 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7033 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7037 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7044 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7047 return -1; //not reached
7050 static int decode_unregistered_user_data(H264Context *h, int size){
7051 MpegEncContext * const s = &h->s;
7052 uint8_t user_data[16+256];
7058 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7059 user_data[i]= get_bits(&s->gb, 8);
7063 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7064 if(e==1 && build>=0)
7065 h->x264_build= build;
7067 if(s->avctx->debug & FF_DEBUG_BUGS)
7068 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7071 skip_bits(&s->gb, 8);
7076 static int decode_sei(H264Context *h){
7077 MpegEncContext * const s = &h->s;
7079 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7084 type+= show_bits(&s->gb, 8);
7085 }while(get_bits(&s->gb, 8) == 255);
7089 size+= show_bits(&s->gb, 8);
7090 }while(get_bits(&s->gb, 8) == 255);
7094 if(decode_unregistered_user_data(h, size) < 0)
7098 skip_bits(&s->gb, 8*size);
7101 //FIXME check bits here
7102 align_get_bits(&s->gb);
7108 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7109 MpegEncContext * const s = &h->s;
7111 cpb_count = get_ue_golomb(&s->gb) + 1;
7112 get_bits(&s->gb, 4); /* bit_rate_scale */
7113 get_bits(&s->gb, 4); /* cpb_size_scale */
7114 for(i=0; i<cpb_count; i++){
7115 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7116 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7117 get_bits1(&s->gb); /* cbr_flag */
7119 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7120 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7121 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7122 get_bits(&s->gb, 5); /* time_offset_length */
7125 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7126 MpegEncContext * const s = &h->s;
7127 int aspect_ratio_info_present_flag;
7128 unsigned int aspect_ratio_idc;
7129 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7131 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7133 if( aspect_ratio_info_present_flag ) {
7134 aspect_ratio_idc= get_bits(&s->gb, 8);
7135 if( aspect_ratio_idc == EXTENDED_SAR ) {
7136 sps->sar.num= get_bits(&s->gb, 16);
7137 sps->sar.den= get_bits(&s->gb, 16);
7138 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7139 sps->sar= pixel_aspect[aspect_ratio_idc];
7141 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7148 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7150 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7151 get_bits1(&s->gb); /* overscan_appropriate_flag */
7154 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7155 get_bits(&s->gb, 3); /* video_format */
7156 get_bits1(&s->gb); /* video_full_range_flag */
7157 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7158 get_bits(&s->gb, 8); /* colour_primaries */
7159 get_bits(&s->gb, 8); /* transfer_characteristics */
7160 get_bits(&s->gb, 8); /* matrix_coefficients */
7164 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7165 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7166 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7169 sps->timing_info_present_flag = get_bits1(&s->gb);
7170 if(sps->timing_info_present_flag){
7171 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7172 sps->time_scale = get_bits_long(&s->gb, 32);
7173 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7176 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7177 if(nal_hrd_parameters_present_flag)
7178 decode_hrd_parameters(h, sps);
7179 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7180 if(vcl_hrd_parameters_present_flag)
7181 decode_hrd_parameters(h, sps);
7182 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7183 get_bits1(&s->gb); /* low_delay_hrd_flag */
7184 get_bits1(&s->gb); /* pic_struct_present_flag */
7186 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7187 if(sps->bitstream_restriction_flag){
7188 unsigned int num_reorder_frames;
7189 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7190 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7191 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7192 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7193 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7194 num_reorder_frames= get_ue_golomb(&s->gb);
7195 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7197 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7198 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7202 sps->num_reorder_frames= num_reorder_frames;
7208 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7209 const uint8_t *jvt_list, const uint8_t *fallback_list){
7210 MpegEncContext * const s = &h->s;
7211 int i, last = 8, next = 8;
7212 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7213 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7214 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7216 for(i=0;i<size;i++){
7218 next = (last + get_se_golomb(&s->gb)) & 0xff;
7219 if(!i && !next){ /* matrix not written, we use the preset one */
7220 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7223 last = factors[scan[i]] = next ? next : last;
7227 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7228 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7229 MpegEncContext * const s = &h->s;
7230 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7231 const uint8_t *fallback[4] = {
7232 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7233 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7234 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7235 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7237 if(get_bits1(&s->gb)){
7238 sps->scaling_matrix_present |= is_sps;
7239 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7240 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7241 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7242 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7243 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7244 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7245 if(is_sps || pps->transform_8x8_mode){
7246 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7247 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7249 } else if(fallback_sps) {
7250 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7251 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7256 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7259 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7260 const size_t size, const char *name)
7263 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7268 vec[id] = av_mallocz(size);
7270 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7275 static inline int decode_seq_parameter_set(H264Context *h){
7276 MpegEncContext * const s = &h->s;
7277 int profile_idc, level_idc;
7278 unsigned int sps_id, tmp, mb_width, mb_height;
7282 profile_idc= get_bits(&s->gb, 8);
7283 get_bits1(&s->gb); //constraint_set0_flag
7284 get_bits1(&s->gb); //constraint_set1_flag
7285 get_bits1(&s->gb); //constraint_set2_flag
7286 get_bits1(&s->gb); //constraint_set3_flag
7287 get_bits(&s->gb, 4); // reserved
7288 level_idc= get_bits(&s->gb, 8);
7289 sps_id= get_ue_golomb(&s->gb);
7291 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7295 sps->profile_idc= profile_idc;
7296 sps->level_idc= level_idc;
7298 if(sps->profile_idc >= 100){ //high profile
7299 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7300 get_bits1(&s->gb); //residual_color_transform_flag
7301 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7302 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7303 sps->transform_bypass = get_bits1(&s->gb);
7304 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7306 sps->scaling_matrix_present = 0;
7308 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7309 sps->poc_type= get_ue_golomb(&s->gb);
7311 if(sps->poc_type == 0){ //FIXME #define
7312 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7313 } else if(sps->poc_type == 1){//FIXME #define
7314 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7315 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7316 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7317 tmp= get_ue_golomb(&s->gb);
7319 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7320 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7323 sps->poc_cycle_length= tmp;
7325 for(i=0; i<sps->poc_cycle_length; i++)
7326 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7327 }else if(sps->poc_type != 2){
7328 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7332 tmp= get_ue_golomb(&s->gb);
7333 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7334 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7337 sps->ref_frame_count= tmp;
7338 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7339 mb_width= get_ue_golomb(&s->gb) + 1;
7340 mb_height= get_ue_golomb(&s->gb) + 1;
7341 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7342 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7343 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7346 sps->mb_width = mb_width;
7347 sps->mb_height= mb_height;
7349 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7350 if(!sps->frame_mbs_only_flag)
7351 sps->mb_aff= get_bits1(&s->gb);
7355 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7357 #ifndef ALLOW_INTERLACE
7359 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7361 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7362 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7364 sps->crop= get_bits1(&s->gb);
7366 sps->crop_left = get_ue_golomb(&s->gb);
7367 sps->crop_right = get_ue_golomb(&s->gb);
7368 sps->crop_top = get_ue_golomb(&s->gb);
7369 sps->crop_bottom= get_ue_golomb(&s->gb);
7370 if(sps->crop_left || sps->crop_top){
7371 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7373 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7374 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7380 sps->crop_bottom= 0;
7383 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7384 if( sps->vui_parameters_present_flag )
7385 decode_vui_parameters(h, sps);
7387 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7388 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7389 sps_id, sps->profile_idc, sps->level_idc,
7391 sps->ref_frame_count,
7392 sps->mb_width, sps->mb_height,
7393 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7394 sps->direct_8x8_inference_flag ? "8B8" : "",
7395 sps->crop_left, sps->crop_right,
7396 sps->crop_top, sps->crop_bottom,
7397 sps->vui_parameters_present_flag ? "VUI" : ""
7404 build_qp_table(PPS *pps, int t, int index)
7407 for(i = 0; i < 52; i++)
7408 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7411 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7412 MpegEncContext * const s = &h->s;
7413 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7416 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7420 tmp= get_ue_golomb(&s->gb);
7421 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7422 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7427 pps->cabac= get_bits1(&s->gb);
7428 pps->pic_order_present= get_bits1(&s->gb);
7429 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7430 if(pps->slice_group_count > 1 ){
7431 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7432 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7433 switch(pps->mb_slice_group_map_type){
7436 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7437 | run_length[ i ] |1 |ue(v) |
7442 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7444 | top_left_mb[ i ] |1 |ue(v) |
7445 | bottom_right_mb[ i ] |1 |ue(v) |
7453 | slice_group_change_direction_flag |1 |u(1) |
7454 | slice_group_change_rate_minus1 |1 |ue(v) |
7459 | slice_group_id_cnt_minus1 |1 |ue(v) |
7460 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7462 | slice_group_id[ i ] |1 |u(v) |
7467 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7468 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7469 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7470 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7471 pps->ref_count[0]= pps->ref_count[1]= 1;
7475 pps->weighted_pred= get_bits1(&s->gb);
7476 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7477 pps->init_qp= get_se_golomb(&s->gb) + 26;
7478 pps->init_qs= get_se_golomb(&s->gb) + 26;
7479 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7480 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7481 pps->constrained_intra_pred= get_bits1(&s->gb);
7482 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7484 pps->transform_8x8_mode= 0;
7485 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7486 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7487 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7489 if(get_bits_count(&s->gb) < bit_length){
7490 pps->transform_8x8_mode= get_bits1(&s->gb);
7491 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7492 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7494 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7497 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7498 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7499 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7500 h->pps.chroma_qp_diff= 1;
7502 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7503 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7504 pps_id, pps->sps_id,
7505 pps->cabac ? "CABAC" : "CAVLC",
7506 pps->slice_group_count,
7507 pps->ref_count[0], pps->ref_count[1],
7508 pps->weighted_pred ? "weighted" : "",
7509 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7510 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7511 pps->constrained_intra_pred ? "CONSTR" : "",
7512 pps->redundant_pic_cnt_present ? "REDU" : "",
7513 pps->transform_8x8_mode ? "8x8DCT" : ""
7521 * Call decode_slice() for each context.
7523 * @param h h264 master context
7524 * @param context_count number of contexts to execute
7526 static void execute_decode_slices(H264Context *h, int context_count){
7527 MpegEncContext * const s = &h->s;
7528 AVCodecContext * const avctx= s->avctx;
7532 if(context_count == 1) {
7533 decode_slice(avctx, h);
7535 for(i = 1; i < context_count; i++) {
7536 hx = h->thread_context[i];
7537 hx->s.error_resilience = avctx->error_resilience;
7538 hx->s.error_count = 0;
7541 avctx->execute(avctx, (void *)decode_slice,
7542 (void **)h->thread_context, NULL, context_count);
7544 /* pull back stuff from slices to master context */
7545 hx = h->thread_context[context_count - 1];
7546 s->mb_x = hx->s.mb_x;
7547 s->mb_y = hx->s.mb_y;
7548 s->dropable = hx->s.dropable;
7549 s->picture_structure = hx->s.picture_structure;
7550 for(i = 1; i < context_count; i++)
7551 h->s.error_count += h->thread_context[i]->s.error_count;
7556 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7557 MpegEncContext * const s = &h->s;
7558 AVCodecContext * const avctx= s->avctx;
7560 H264Context *hx; ///< thread context
7561 int context_count = 0;
7563 h->max_contexts = avctx->thread_count;
7566 for(i=0; i<50; i++){
7567 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7570 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7571 h->current_slice = 0;
7572 if (!s->first_field)
7573 s->current_picture_ptr= NULL;
7585 if(buf_index >= buf_size) break;
7587 for(i = 0; i < h->nal_length_size; i++)
7588 nalsize = (nalsize << 8) | buf[buf_index++];
7589 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7594 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7599 // start code prefix search
7600 for(; buf_index + 3 < buf_size; buf_index++){
7601 // This should always succeed in the first iteration.
7602 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7606 if(buf_index+3 >= buf_size) break;
7611 hx = h->thread_context[context_count];
7613 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7614 if (ptr==NULL || dst_length < 0){
7617 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7619 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7621 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7622 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7625 if (h->is_avc && (nalsize != consumed)){
7626 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7630 buf_index += consumed;
7632 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7633 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7638 switch(hx->nal_unit_type){
7640 if (h->nal_unit_type != NAL_IDR_SLICE) {
7641 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7644 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7646 init_get_bits(&hx->s.gb, ptr, bit_length);
7648 hx->inter_gb_ptr= &hx->s.gb;
7649 hx->s.data_partitioning = 0;
7651 if((err = decode_slice_header(hx, h)))
7654 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7655 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7656 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7657 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7658 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7659 && avctx->skip_frame < AVDISCARD_ALL)
7663 init_get_bits(&hx->s.gb, ptr, bit_length);
7665 hx->inter_gb_ptr= NULL;
7666 hx->s.data_partitioning = 1;
7668 err = decode_slice_header(hx, h);
7671 init_get_bits(&hx->intra_gb, ptr, bit_length);
7672 hx->intra_gb_ptr= &hx->intra_gb;
7675 init_get_bits(&hx->inter_gb, ptr, bit_length);
7676 hx->inter_gb_ptr= &hx->inter_gb;
7678 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7679 && s->context_initialized
7681 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7682 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7683 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7684 && avctx->skip_frame < AVDISCARD_ALL)
7688 init_get_bits(&s->gb, ptr, bit_length);
7692 init_get_bits(&s->gb, ptr, bit_length);
7693 decode_seq_parameter_set(h);
7695 if(s->flags& CODEC_FLAG_LOW_DELAY)
7698 if(avctx->has_b_frames < 2)
7699 avctx->has_b_frames= !s->low_delay;
7702 init_get_bits(&s->gb, ptr, bit_length);
7704 decode_picture_parameter_set(h, bit_length);
7708 case NAL_END_SEQUENCE:
7709 case NAL_END_STREAM:
7710 case NAL_FILLER_DATA:
7712 case NAL_AUXILIARY_SLICE:
7715 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7718 if(context_count == h->max_contexts) {
7719 execute_decode_slices(h, context_count);
7724 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7726 /* Slice could not be decoded in parallel mode, copy down
7727 * NAL unit stuff to context 0 and restart. Note that
7728 * rbsp_buffer is not transferred, but since we no longer
7729 * run in parallel mode this should not be an issue. */
7730 h->nal_unit_type = hx->nal_unit_type;
7731 h->nal_ref_idc = hx->nal_ref_idc;
7737 execute_decode_slices(h, context_count);
7742 * returns the number of bytes consumed for building the current frame
7744 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7745 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7746 if(pos+10>buf_size) pos=buf_size; // oops ;)
7751 static int decode_frame(AVCodecContext *avctx,
7752 void *data, int *data_size,
7753 const uint8_t *buf, int buf_size)
7755 H264Context *h = avctx->priv_data;
7756 MpegEncContext *s = &h->s;
7757 AVFrame *pict = data;
7760 s->flags= avctx->flags;
7761 s->flags2= avctx->flags2;
7763 /* end of stream, output what is still in the buffers */
7764 if (buf_size == 0) {
7768 //FIXME factorize this with the output code below
7769 out = h->delayed_pic[0];
7771 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7772 if(h->delayed_pic[i]->poc < out->poc){
7773 out = h->delayed_pic[i];
7777 for(i=out_idx; h->delayed_pic[i]; i++)
7778 h->delayed_pic[i] = h->delayed_pic[i+1];
7781 *data_size = sizeof(AVFrame);
7782 *pict= *(AVFrame*)out;
7788 if(h->is_avc && !h->got_avcC) {
7789 int i, cnt, nalsize;
7790 unsigned char *p = avctx->extradata;
7791 if(avctx->extradata_size < 7) {
7792 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7796 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7799 /* sps and pps in the avcC always have length coded with 2 bytes,
7800 so put a fake nal_length_size = 2 while parsing them */
7801 h->nal_length_size = 2;
7802 // Decode sps from avcC
7803 cnt = *(p+5) & 0x1f; // Number of sps
7805 for (i = 0; i < cnt; i++) {
7806 nalsize = AV_RB16(p) + 2;
7807 if(decode_nal_units(h, p, nalsize) < 0) {
7808 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7813 // Decode pps from avcC
7814 cnt = *(p++); // Number of pps
7815 for (i = 0; i < cnt; i++) {
7816 nalsize = AV_RB16(p) + 2;
7817 if(decode_nal_units(h, p, nalsize) != nalsize) {
7818 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7823 // Now store right nal length size, that will be use to parse all other nals
7824 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7825 // Do not reparse avcC
7829 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7830 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7834 buf_index=decode_nal_units(h, buf, buf_size);
7838 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7839 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7840 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7844 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7845 Picture *out = s->current_picture_ptr;
7846 Picture *cur = s->current_picture_ptr;
7847 int i, pics, cross_idr, out_of_order, out_idx;
7851 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7852 s->current_picture_ptr->pict_type= s->pict_type;
7855 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7856 h->prev_poc_msb= h->poc_msb;
7857 h->prev_poc_lsb= h->poc_lsb;
7859 h->prev_frame_num_offset= h->frame_num_offset;
7860 h->prev_frame_num= h->frame_num;
7863 * FIXME: Error handling code does not seem to support interlaced
7864 * when slices span multiple rows
7865 * The ff_er_add_slice calls don't work right for bottom
7866 * fields; they cause massive erroneous error concealing
7867 * Error marking covers both fields (top and bottom).
7868 * This causes a mismatched s->error_count
7869 * and a bad error table. Further, the error count goes to
7870 * INT_MAX when called for bottom field, because mb_y is
7871 * past end by one (callers fault) and resync_mb_y != 0
7872 * causes problems for the first MB line, too.
7879 if (s->first_field) {
7880 /* Wait for second field. */
7884 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7885 /* Derive top_field_first from field pocs. */
7886 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7888 //FIXME do something with unavailable reference frames
7890 /* Sort B-frames into display order */
7892 if(h->sps.bitstream_restriction_flag
7893 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7894 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7898 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7899 && !h->sps.bitstream_restriction_flag){
7900 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7905 while(h->delayed_pic[pics]) pics++;
7907 assert(pics <= MAX_DELAYED_PIC_COUNT);
7909 h->delayed_pic[pics++] = cur;
7910 if(cur->reference == 0)
7911 cur->reference = DELAYED_PIC_REF;
7913 out = h->delayed_pic[0];
7915 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7916 if(h->delayed_pic[i]->poc < out->poc){
7917 out = h->delayed_pic[i];
7920 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7922 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7924 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7926 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7928 ((!cross_idr && out->poc > h->outputed_poc + 2)
7929 || cur->pict_type == FF_B_TYPE)))
7932 s->avctx->has_b_frames++;
7935 if(out_of_order || pics > s->avctx->has_b_frames){
7936 out->reference &= ~DELAYED_PIC_REF;
7937 for(i=out_idx; h->delayed_pic[i]; i++)
7938 h->delayed_pic[i] = h->delayed_pic[i+1];
7940 if(!out_of_order && pics > s->avctx->has_b_frames){
7941 *data_size = sizeof(AVFrame);
7943 h->outputed_poc = out->poc;
7944 *pict= *(AVFrame*)out;
7946 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7951 assert(pict->data[0] || !*data_size);
7952 ff_print_debug_info(s, pict);
7953 //printf("out %d\n", (int)pict->data[0]);
7956 /* Return the Picture timestamp as the frame number */
7957 /* we subtract 1 because it is added on utils.c */
7958 avctx->frame_number = s->picture_number - 1;
7960 return get_consumed_bytes(s, buf_index, buf_size);
7963 static inline void fill_mb_avail(H264Context *h){
7964 MpegEncContext * const s = &h->s;
7965 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7968 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7969 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7970 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7976 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7977 h->mb_avail[4]= 1; //FIXME move out
7978 h->mb_avail[5]= 0; //FIXME move out
7986 #define SIZE (COUNT*40)
7992 // int int_temp[10000];
7994 AVCodecContext avctx;
7996 dsputil_init(&dsp, &avctx);
7998 init_put_bits(&pb, temp, SIZE);
7999 printf("testing unsigned exp golomb\n");
8000 for(i=0; i<COUNT; i++){
8002 set_ue_golomb(&pb, i);
8003 STOP_TIMER("set_ue_golomb");
8005 flush_put_bits(&pb);
8007 init_get_bits(&gb, temp, 8*SIZE);
8008 for(i=0; i<COUNT; i++){
8011 s= show_bits(&gb, 24);
8014 j= get_ue_golomb(&gb);
8016 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8019 STOP_TIMER("get_ue_golomb");
8023 init_put_bits(&pb, temp, SIZE);
8024 printf("testing signed exp golomb\n");
8025 for(i=0; i<COUNT; i++){
8027 set_se_golomb(&pb, i - COUNT/2);
8028 STOP_TIMER("set_se_golomb");
8030 flush_put_bits(&pb);
8032 init_get_bits(&gb, temp, 8*SIZE);
8033 for(i=0; i<COUNT; i++){
8036 s= show_bits(&gb, 24);
8039 j= get_se_golomb(&gb);
8040 if(j != i - COUNT/2){
8041 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8044 STOP_TIMER("get_se_golomb");
8048 printf("testing 4x4 (I)DCT\n");
8051 uint8_t src[16], ref[16];
8052 uint64_t error= 0, max_error=0;
8054 for(i=0; i<COUNT; i++){
8056 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8057 for(j=0; j<16; j++){
8058 ref[j]= random()%255;
8059 src[j]= random()%255;
8062 h264_diff_dct_c(block, src, ref, 4);
8065 for(j=0; j<16; j++){
8066 // printf("%d ", block[j]);
8067 block[j]= block[j]*4;
8068 if(j&1) block[j]= (block[j]*4 + 2)/5;
8069 if(j&4) block[j]= (block[j]*4 + 2)/5;
8073 s->dsp.h264_idct_add(ref, block, 4);
8074 /* for(j=0; j<16; j++){
8075 printf("%d ", ref[j]);
8079 for(j=0; j<16; j++){
8080 int diff= FFABS(src[j] - ref[j]);
8083 max_error= FFMAX(max_error, diff);
8086 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8087 printf("testing quantizer\n");
8088 for(qp=0; qp<52; qp++){
8090 src1_block[i]= src2_block[i]= random()%255;
8093 printf("Testing NAL layer\n");
8095 uint8_t bitstream[COUNT];
8096 uint8_t nal[COUNT*2];
8098 memset(&h, 0, sizeof(H264Context));
8100 for(i=0; i<COUNT; i++){
8108 for(j=0; j<COUNT; j++){
8109 bitstream[j]= (random() % 255) + 1;
8112 for(j=0; j<zeros; j++){
8113 int pos= random() % COUNT;
8114 while(bitstream[pos] == 0){
8123 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8125 printf("encoding failed\n");
8129 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8133 if(out_length != COUNT){
8134 printf("incorrect length %d %d\n", out_length, COUNT);
8138 if(consumed != nal_length){
8139 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8143 if(memcmp(bitstream, out, COUNT)){
8144 printf("mismatch\n");
8150 printf("Testing RBSP\n");
8158 static av_cold int decode_end(AVCodecContext *avctx)
8160 H264Context *h = avctx->priv_data;
8161 MpegEncContext *s = &h->s;
8163 av_freep(&h->rbsp_buffer[0]);
8164 av_freep(&h->rbsp_buffer[1]);
8165 free_tables(h); //FIXME cleanup init stuff perhaps
8168 // memset(h, 0, sizeof(H264Context));
8174 AVCodec h264_decoder = {
8178 sizeof(H264Context),
8183 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8185 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),