2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
41 static VLC coeff_token_vlc[4];
42 static VLC chroma_dc_coeff_token_vlc;
44 static VLC total_zeros_vlc[15];
45 static VLC chroma_dc_total_zeros_vlc[3];
47 static VLC run_vlc[6];
50 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
51 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
52 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
53 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
55 static av_always_inline uint32_t pack16to32(int a, int b){
56 #ifdef WORDS_BIGENDIAN
57 return (b&0xFFFF) + (a<<16);
59 return (a&0xFFFF) + (b<<16);
63 const uint8_t ff_rem6[52]={
64 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
67 const uint8_t ff_div6[52]={
68 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
74 * @param h height of the rectangle, should be a constant
75 * @param w width of the rectangle, should be a constant
76 * @param size the size of val (1 or 4), should be a constant
78 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
79 uint8_t *p= (uint8_t*)vp;
80 assert(size==1 || size==4);
86 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
87 assert((stride&(w-1))==0);
89 const uint16_t v= size==4 ? val : val*0x0101;
90 *(uint16_t*)(p + 0*stride)= v;
92 *(uint16_t*)(p + 1*stride)= v;
94 *(uint16_t*)(p + 2*stride)= v;
95 *(uint16_t*)(p + 3*stride)= v;
97 const uint32_t v= size==4 ? val : val*0x01010101;
98 *(uint32_t*)(p + 0*stride)= v;
100 *(uint32_t*)(p + 1*stride)= v;
102 *(uint32_t*)(p + 2*stride)= v;
103 *(uint32_t*)(p + 3*stride)= v;
105 //gcc can't optimize 64bit math on x86_32
106 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
107 const uint64_t v= val*0x0100000001ULL;
108 *(uint64_t*)(p + 0*stride)= v;
110 *(uint64_t*)(p + 1*stride)= v;
112 *(uint64_t*)(p + 2*stride)= v;
113 *(uint64_t*)(p + 3*stride)= v;
115 const uint64_t v= val*0x0100000001ULL;
116 *(uint64_t*)(p + 0+0*stride)= v;
117 *(uint64_t*)(p + 8+0*stride)= v;
118 *(uint64_t*)(p + 0+1*stride)= v;
119 *(uint64_t*)(p + 8+1*stride)= v;
121 *(uint64_t*)(p + 0+2*stride)= v;
122 *(uint64_t*)(p + 8+2*stride)= v;
123 *(uint64_t*)(p + 0+3*stride)= v;
124 *(uint64_t*)(p + 8+3*stride)= v;
126 *(uint32_t*)(p + 0+0*stride)= val;
127 *(uint32_t*)(p + 4+0*stride)= val;
129 *(uint32_t*)(p + 0+1*stride)= val;
130 *(uint32_t*)(p + 4+1*stride)= val;
132 *(uint32_t*)(p + 0+2*stride)= val;
133 *(uint32_t*)(p + 4+2*stride)= val;
134 *(uint32_t*)(p + 0+3*stride)= val;
135 *(uint32_t*)(p + 4+3*stride)= val;
137 *(uint32_t*)(p + 0+0*stride)= val;
138 *(uint32_t*)(p + 4+0*stride)= val;
139 *(uint32_t*)(p + 8+0*stride)= val;
140 *(uint32_t*)(p +12+0*stride)= val;
141 *(uint32_t*)(p + 0+1*stride)= val;
142 *(uint32_t*)(p + 4+1*stride)= val;
143 *(uint32_t*)(p + 8+1*stride)= val;
144 *(uint32_t*)(p +12+1*stride)= val;
146 *(uint32_t*)(p + 0+2*stride)= val;
147 *(uint32_t*)(p + 4+2*stride)= val;
148 *(uint32_t*)(p + 8+2*stride)= val;
149 *(uint32_t*)(p +12+2*stride)= val;
150 *(uint32_t*)(p + 0+3*stride)= val;
151 *(uint32_t*)(p + 4+3*stride)= val;
152 *(uint32_t*)(p + 8+3*stride)= val;
153 *(uint32_t*)(p +12+3*stride)= val;
160 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
161 MpegEncContext * const s = &h->s;
162 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
163 int topleft_xy, top_xy, topright_xy, left_xy[2];
164 int topleft_type, top_type, topright_type, left_type[2];
168 //FIXME deblocking could skip the intra and nnz parts.
169 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
172 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
174 top_xy = mb_xy - s->mb_stride;
175 topleft_xy = top_xy - 1;
176 topright_xy= top_xy + 1;
177 left_xy[1] = left_xy[0] = mb_xy-1;
187 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
188 const int top_pair_xy = pair_xy - s->mb_stride;
189 const int topleft_pair_xy = top_pair_xy - 1;
190 const int topright_pair_xy = top_pair_xy + 1;
191 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
192 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
193 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
194 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
195 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
196 const int bottom = (s->mb_y & 1);
197 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
199 ? !curr_mb_frame_flag // bottom macroblock
200 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
202 top_xy -= s->mb_stride;
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
208 topleft_xy -= s->mb_stride;
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
214 topright_xy -= s->mb_stride;
216 if (left_mb_frame_flag != curr_mb_frame_flag) {
217 left_xy[1] = left_xy[0] = pair_xy - 1;
218 if (curr_mb_frame_flag) {
239 left_xy[1] += s->mb_stride;
252 h->top_mb_xy = top_xy;
253 h->left_mb_xy[0] = left_xy[0];
254 h->left_mb_xy[1] = left_xy[1];
258 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
259 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
260 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
262 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
264 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
266 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
267 for(list=0; list<h->list_count; list++){
268 if(USES_LIST(mb_type,list)){
269 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
270 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
271 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
272 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
278 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
279 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
281 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
282 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
284 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
285 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
290 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
291 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
292 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
293 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
294 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
297 if(IS_INTRA(mb_type)){
298 h->topleft_samples_available=
299 h->top_samples_available=
300 h->left_samples_available= 0xFFFF;
301 h->topright_samples_available= 0xEEEA;
303 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
304 h->topleft_samples_available= 0xB3FF;
305 h->top_samples_available= 0x33FF;
306 h->topright_samples_available= 0x26EA;
309 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available&= 0xDF5F;
311 h->left_samples_available&= 0x5F5F;
315 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
316 h->topleft_samples_available&= 0x7FFF;
318 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
319 h->topright_samples_available&= 0xFBFF;
321 if(IS_INTRA4x4(mb_type)){
322 if(IS_INTRA4x4(top_type)){
323 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
324 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
325 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
326 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
329 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
334 h->intra4x4_pred_mode_cache[4+8*0]=
335 h->intra4x4_pred_mode_cache[5+8*0]=
336 h->intra4x4_pred_mode_cache[6+8*0]=
337 h->intra4x4_pred_mode_cache[7+8*0]= pred;
340 if(IS_INTRA4x4(left_type[i])){
341 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
342 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
345 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
350 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
351 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
366 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
368 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
369 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
370 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
371 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
373 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
374 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
376 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
377 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
380 h->non_zero_count_cache[4+8*0]=
381 h->non_zero_count_cache[5+8*0]=
382 h->non_zero_count_cache[6+8*0]=
383 h->non_zero_count_cache[7+8*0]=
385 h->non_zero_count_cache[1+8*0]=
386 h->non_zero_count_cache[2+8*0]=
388 h->non_zero_count_cache[1+8*3]=
389 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
393 for (i=0; i<2; i++) {
395 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
396 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
397 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
398 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
400 h->non_zero_count_cache[3+8*1 + 2*8*i]=
401 h->non_zero_count_cache[3+8*2 + 2*8*i]=
402 h->non_zero_count_cache[0+8*1 + 8*i]=
403 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
410 h->top_cbp = h->cbp_table[top_xy];
411 } else if(IS_INTRA(mb_type)) {
418 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
419 } else if(IS_INTRA(mb_type)) {
425 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
428 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
433 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
435 for(list=0; list<h->list_count; list++){
436 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
437 /*if(!h->mv_cache_clean[list]){
438 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
439 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
440 h->mv_cache_clean[list]= 1;
444 h->mv_cache_clean[list]= 0;
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
449 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
450 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
451 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
452 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
453 h->ref_cache[list][scan8[0] + 0 - 1*8]=
454 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
455 h->ref_cache[list][scan8[0] + 2 - 1*8]=
456 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
458 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
459 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
460 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
461 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
462 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
466 int cache_idx = scan8[0] - 1 + i*2*8;
467 if(USES_LIST(left_type[i], list)){
468 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
469 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
470 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
471 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
472 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
473 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
475 *(uint32_t*)h->mv_cache [list][cache_idx ]=
476 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
477 h->ref_cache[list][cache_idx ]=
478 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
482 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
485 if(USES_LIST(topleft_type, list)){
486 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
487 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
488 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
489 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
491 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
492 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
495 if(USES_LIST(topright_type, list)){
496 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
497 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
498 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
499 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
501 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
502 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
505 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
508 h->ref_cache[list][scan8[5 ]+1] =
509 h->ref_cache[list][scan8[7 ]+1] =
510 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
511 h->ref_cache[list][scan8[4 ]] =
512 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
513 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
514 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
515 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
516 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
517 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
520 /* XXX beurk, Load mvd */
521 if(USES_LIST(top_type, list)){
522 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
523 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
524 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
525 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
526 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
528 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
529 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
530 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
531 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
533 if(USES_LIST(left_type[0], list)){
534 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
535 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
536 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
538 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
539 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
541 if(USES_LIST(left_type[1], list)){
542 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
543 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
544 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
546 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
547 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
549 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
550 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
551 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
552 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
553 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
555 if(h->slice_type == B_TYPE){
556 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
558 if(IS_DIRECT(top_type)){
559 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
560 }else if(IS_8X8(top_type)){
561 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
562 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
563 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
568 if(IS_DIRECT(left_type[0]))
569 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
570 else if(IS_8X8(left_type[0]))
571 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
573 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
575 if(IS_DIRECT(left_type[1]))
576 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
577 else if(IS_8X8(left_type[1]))
578 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
580 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
586 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
587 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
588 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
589 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
590 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
591 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
592 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
593 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
594 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
595 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
597 #define MAP_F2F(idx, mb_type)\
598 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
599 h->ref_cache[list][idx] <<= 1;\
600 h->mv_cache[list][idx][1] /= 2;\
601 h->mvd_cache[list][idx][1] /= 2;\
606 #define MAP_F2F(idx, mb_type)\
607 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
608 h->ref_cache[list][idx] >>= 1;\
609 h->mv_cache[list][idx][1] <<= 1;\
610 h->mvd_cache[list][idx][1] <<= 1;\
620 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
623 static inline void write_back_intra_pred_mode(H264Context *h){
624 MpegEncContext * const s = &h->s;
625 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
627 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
628 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
629 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
630 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
631 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
632 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
633 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
637 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
639 static inline int check_intra4x4_pred_mode(H264Context *h){
640 MpegEncContext * const s = &h->s;
641 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
642 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
645 if(!(h->top_samples_available&0x8000)){
647 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
649 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
652 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
657 if(!(h->left_samples_available&0x8000)){
659 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
661 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
664 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
670 } //FIXME cleanup like next
673 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
675 static inline int check_intra_pred_mode(H264Context *h, int mode){
676 MpegEncContext * const s = &h->s;
677 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
678 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
681 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
685 if(!(h->top_samples_available&0x8000)){
688 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
693 if(!(h->left_samples_available&0x8000)){
696 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
705 * gets the predicted intra4x4 prediction mode.
707 static inline int pred_intra_mode(H264Context *h, int n){
708 const int index8= scan8[n];
709 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
710 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
711 const int min= FFMIN(left, top);
713 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
715 if(min<0) return DC_PRED;
719 static inline void write_back_non_zero_count(H264Context *h){
720 MpegEncContext * const s = &h->s;
721 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
723 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
724 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
725 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
726 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
727 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
728 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
729 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
731 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
732 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
733 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
735 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
736 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
737 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
740 // store all luma nnzs, for deblocking
743 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
744 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
749 * gets the predicted number of non zero coefficients.
750 * @param n block index
752 static inline int pred_non_zero_count(H264Context *h, int n){
753 const int index8= scan8[n];
754 const int left= h->non_zero_count_cache[index8 - 1];
755 const int top = h->non_zero_count_cache[index8 - 8];
758 if(i<64) i= (i+1)>>1;
760 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
765 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
766 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
767 MpegEncContext *s = &h->s;
769 /* there is no consistent mapping of mvs to neighboring locations that will
770 * make mbaff happy, so we can't move all this logic to fill_caches */
772 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
774 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
775 *C = h->mv_cache[list][scan8[0]-2];
778 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
779 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
780 if(IS_INTERLACED(mb_types[topright_xy])){
781 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
782 const int x4 = X4, y4 = Y4;\
783 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
784 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
785 return LIST_NOT_USED;\
786 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
787 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
788 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
789 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
791 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
794 if(topright_ref == PART_NOT_AVAILABLE
795 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
796 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
798 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
799 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
802 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
804 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
805 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
811 if(topright_ref != PART_NOT_AVAILABLE){
812 *C= h->mv_cache[list][ i - 8 + part_width ];
815 tprintf(s->avctx, "topright MV not available\n");
817 *C= h->mv_cache[list][ i - 8 - 1 ];
818 return h->ref_cache[list][ i - 8 - 1 ];
823 * gets the predicted MV.
824 * @param n the block index
825 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
826 * @param mx the x component of the predicted motion vector
827 * @param my the y component of the predicted motion vector
829 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
830 const int index8= scan8[n];
831 const int top_ref= h->ref_cache[list][ index8 - 8 ];
832 const int left_ref= h->ref_cache[list][ index8 - 1 ];
833 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
834 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
836 int diagonal_ref, match_count;
838 assert(part_width==1 || part_width==2 || part_width==4);
848 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
849 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
850 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
851 if(match_count > 1){ //most common
852 *mx= mid_pred(A[0], B[0], C[0]);
853 *my= mid_pred(A[1], B[1], C[1]);
854 }else if(match_count==1){
858 }else if(top_ref==ref){
866 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
870 *mx= mid_pred(A[0], B[0], C[0]);
871 *my= mid_pred(A[1], B[1], C[1]);
875 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
879 * gets the directionally predicted 16x8 MV.
880 * @param n the block index
881 * @param mx the x component of the predicted motion vector
882 * @param my the y component of the predicted motion vector
884 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
886 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
887 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
889 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
897 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
898 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
900 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
910 pred_motion(h, n, 4, list, ref, mx, my);
914 * gets the directionally predicted 8x16 MV.
915 * @param n the block index
916 * @param mx the x component of the predicted motion vector
917 * @param my the y component of the predicted motion vector
919 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
921 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
922 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
924 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
935 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
937 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
939 if(diagonal_ref == ref){
947 pred_motion(h, n, 2, list, ref, mx, my);
950 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
951 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
952 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
954 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
956 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
957 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
958 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
964 pred_motion(h, 0, 4, 0, 0, mx, my);
969 static inline void direct_dist_scale_factor(H264Context * const h){
970 const int poc = h->s.current_picture_ptr->poc;
971 const int poc1 = h->ref_list[1][0].poc;
973 for(i=0; i<h->ref_count[0]; i++){
974 int poc0 = h->ref_list[0][i].poc;
975 int td = av_clip(poc1 - poc0, -128, 127);
976 if(td == 0 /* FIXME || pic0 is a long-term ref */){
977 h->dist_scale_factor[i] = 256;
979 int tb = av_clip(poc - poc0, -128, 127);
980 int tx = (16384 + (FFABS(td) >> 1)) / td;
981 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
985 for(i=0; i<h->ref_count[0]; i++){
986 h->dist_scale_factor_field[2*i] =
987 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
991 static inline void direct_ref_list_init(H264Context * const h){
992 MpegEncContext * const s = &h->s;
993 Picture * const ref1 = &h->ref_list[1][0];
994 Picture * const cur = s->current_picture_ptr;
996 if(cur->pict_type == I_TYPE)
997 cur->ref_count[0] = 0;
998 if(cur->pict_type != B_TYPE)
999 cur->ref_count[1] = 0;
1000 for(list=0; list<2; list++){
1001 cur->ref_count[list] = h->ref_count[list];
1002 for(j=0; j<h->ref_count[list]; j++)
1003 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1005 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1007 for(list=0; list<2; list++){
1008 for(i=0; i<ref1->ref_count[list]; i++){
1009 const int poc = ref1->ref_poc[list][i];
1010 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1011 for(j=0; j<h->ref_count[list]; j++)
1012 if(h->ref_list[list][j].poc == poc){
1013 h->map_col_to_list0[list][i] = j;
1019 for(list=0; list<2; list++){
1020 for(i=0; i<ref1->ref_count[list]; i++){
1021 j = h->map_col_to_list0[list][i];
1022 h->map_col_to_list0_field[list][2*i] = 2*j;
1023 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1029 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1030 MpegEncContext * const s = &h->s;
1031 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1032 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1033 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1034 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1035 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1036 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1037 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1038 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1039 const int is_b8x8 = IS_8X8(*mb_type);
1040 unsigned int sub_mb_type;
1043 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1044 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1045 /* FIXME save sub mb types from previous frames (or derive from MVs)
1046 * so we know exactly what block size to use */
1047 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1048 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1049 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1050 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1051 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1053 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1057 *mb_type |= MB_TYPE_DIRECT2;
1059 *mb_type |= MB_TYPE_INTERLACED;
1061 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1063 if(h->direct_spatial_mv_pred){
1068 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1070 /* ref = min(neighbors) */
1071 for(list=0; list<2; list++){
1072 int refa = h->ref_cache[list][scan8[0] - 1];
1073 int refb = h->ref_cache[list][scan8[0] - 8];
1074 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1076 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1078 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1080 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1086 if(ref[0] < 0 && ref[1] < 0){
1087 ref[0] = ref[1] = 0;
1088 mv[0][0] = mv[0][1] =
1089 mv[1][0] = mv[1][1] = 0;
1091 for(list=0; list<2; list++){
1093 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1095 mv[list][0] = mv[list][1] = 0;
1100 *mb_type &= ~MB_TYPE_P0L1;
1101 sub_mb_type &= ~MB_TYPE_P0L1;
1102 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_P0L0;
1104 sub_mb_type &= ~MB_TYPE_P0L0;
1107 if(IS_16X16(*mb_type)){
1110 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1111 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1112 if(!IS_INTRA(mb_type_col)
1113 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1114 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1115 && (h->x264_build>33 || !h->x264_build)))){
1117 a= pack16to32(mv[0][0],mv[0][1]);
1119 b= pack16to32(mv[1][0],mv[1][1]);
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 b= pack16to32(mv[1][0],mv[1][1]);
1124 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1127 for(i8=0; i8<4; i8++){
1128 const int x8 = i8&1;
1129 const int y8 = i8>>1;
1131 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1133 h->sub_mb_type[i8] = sub_mb_type;
1135 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1137 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1138 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1141 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1142 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1143 && (h->x264_build>33 || !h->x264_build)))){
1144 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1145 if(IS_SUB_8X8(sub_mb_type)){
1146 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1147 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1149 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1151 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1154 for(i4=0; i4<4; i4++){
1155 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1156 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1158 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1160 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1166 }else{ /* direct temporal mv pred */
1167 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1168 const int *dist_scale_factor = h->dist_scale_factor;
1171 if(IS_INTERLACED(*mb_type)){
1172 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1173 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1174 dist_scale_factor = h->dist_scale_factor_field;
1176 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1177 /* FIXME assumes direct_8x8_inference == 1 */
1178 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1179 int mb_types_col[2];
1182 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1183 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1184 | (*mb_type & MB_TYPE_INTERLACED);
1185 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1187 if(IS_INTERLACED(*mb_type)){
1188 /* frame to field scaling */
1189 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1190 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1192 l1ref0 -= 2*h->b8_stride;
1193 l1ref1 -= 2*h->b8_stride;
1194 l1mv0 -= 4*h->b_stride;
1195 l1mv1 -= 4*h->b_stride;
1199 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1200 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1202 *mb_type |= MB_TYPE_16x8;
1204 *mb_type |= MB_TYPE_8x8;
1206 /* field to frame scaling */
1207 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1208 * but in MBAFF, top and bottom POC are equal */
1209 int dy = (s->mb_y&1) ? 1 : 2;
1211 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1212 l1ref0 += dy*h->b8_stride;
1213 l1ref1 += dy*h->b8_stride;
1214 l1mv0 += 2*dy*h->b_stride;
1215 l1mv1 += 2*dy*h->b_stride;
1218 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1220 *mb_type |= MB_TYPE_16x16;
1222 *mb_type |= MB_TYPE_8x8;
1225 for(i8=0; i8<4; i8++){
1226 const int x8 = i8&1;
1227 const int y8 = i8>>1;
1229 const int16_t (*l1mv)[2]= l1mv0;
1231 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1233 h->sub_mb_type[i8] = sub_mb_type;
1235 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1236 if(IS_INTRA(mb_types_col[y8])){
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1238 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1239 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1243 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1245 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1247 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1250 scale = dist_scale_factor[ref0];
1251 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1254 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1255 int my_col = (mv_col[1]<<y_shift)/2;
1256 int mx = (scale * mv_col[0] + 128) >> 8;
1257 int my = (scale * my_col + 128) >> 8;
1258 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1259 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1266 /* one-to-one mv scaling */
1268 if(IS_16X16(*mb_type)){
1271 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1272 if(IS_INTRA(mb_type_col)){
1275 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1276 : map_col_to_list0[1][l1ref1[0]];
1277 const int scale = dist_scale_factor[ref0];
1278 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1280 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1281 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1283 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1284 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1286 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1290 for(i8=0; i8<4; i8++){
1291 const int x8 = i8&1;
1292 const int y8 = i8>>1;
1294 const int16_t (*l1mv)[2]= l1mv0;
1296 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1298 h->sub_mb_type[i8] = sub_mb_type;
1299 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1300 if(IS_INTRA(mb_type_col)){
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1302 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1303 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1307 ref0 = l1ref0[x8 + y8*h->b8_stride];
1309 ref0 = map_col_to_list0[0][ref0];
1311 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1314 scale = dist_scale_factor[ref0];
1316 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1317 if(IS_SUB_8X8(sub_mb_type)){
1318 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1319 int mx = (scale * mv_col[0] + 128) >> 8;
1320 int my = (scale * mv_col[1] + 128) >> 8;
1321 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1322 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1324 for(i4=0; i4<4; i4++){
1325 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1326 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1327 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1328 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1329 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1330 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1337 static inline void write_back_motion(H264Context *h, int mb_type){
1338 MpegEncContext * const s = &h->s;
1339 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1340 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1343 if(!USES_LIST(mb_type, 0))
1344 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1346 for(list=0; list<h->list_count; list++){
1348 if(!USES_LIST(mb_type, list))
1352 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1353 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1355 if( h->pps.cabac ) {
1356 if(IS_SKIP(mb_type))
1357 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1360 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1361 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1366 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1367 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1368 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1369 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1370 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1374 if(h->slice_type == B_TYPE && h->pps.cabac){
1375 if(IS_8X8(mb_type)){
1376 uint8_t *direct_table = &h->direct_table[b8_xy];
1377 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1378 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1379 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1385 * Decodes a network abstraction layer unit.
1386 * @param consumed is the number of bytes used as input
1387 * @param length is the length of the array
1388 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1389 * @returns decoded bytes, might be src+1 if no escapes
1391 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1396 // src[0]&0x80; //forbidden bit
1397 h->nal_ref_idc= src[0]>>5;
1398 h->nal_unit_type= src[0]&0x1F;
1402 for(i=0; i<length; i++)
1403 printf("%2X ", src[i]);
1405 for(i=0; i+1<length; i+=2){
1406 if(src[i]) continue;
1407 if(i>0 && src[i-1]==0) i--;
1408 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1410 /* startcode, so we must be past the end */
1417 if(i>=length-1){ //no escaped 0
1418 *dst_length= length;
1419 *consumed= length+1; //+1 for the header
1423 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1424 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1425 dst= h->rbsp_buffer[bufidx];
1431 //printf("decoding esc\n");
1434 //remove escapes (very rare 1:2^22)
1435 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1436 if(src[si+2]==3){ //escape
1441 }else //next start code
1445 dst[di++]= src[si++];
1449 *consumed= si + 1;//+1 for the header
1450 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 * identifies the exact end of the bitstream
1456 * @return the length of the trailing, or 0 if damaged
1458 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1462 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1472 * idct tranforms the 16 dc values and dequantize them.
1473 * @param qp quantization parameter
1475 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1478 int temp[16]; //FIXME check if this is a good idea
1479 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1480 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1482 //memset(block, 64, 2*256);
1485 const int offset= y_offset[i];
1486 const int z0= block[offset+stride*0] + block[offset+stride*4];
1487 const int z1= block[offset+stride*0] - block[offset+stride*4];
1488 const int z2= block[offset+stride*1] - block[offset+stride*5];
1489 const int z3= block[offset+stride*1] + block[offset+stride*5];
1498 const int offset= x_offset[i];
1499 const int z0= temp[4*0+i] + temp[4*2+i];
1500 const int z1= temp[4*0+i] - temp[4*2+i];
1501 const int z2= temp[4*1+i] - temp[4*3+i];
1502 const int z3= temp[4*1+i] + temp[4*3+i];
1504 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1505 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1506 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1507 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1513 * dct tranforms the 16 dc values.
1514 * @param qp quantization parameter ??? FIXME
1516 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1517 // const int qmul= dequant_coeff[qp][0];
1519 int temp[16]; //FIXME check if this is a good idea
1520 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1521 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1524 const int offset= y_offset[i];
1525 const int z0= block[offset+stride*0] + block[offset+stride*4];
1526 const int z1= block[offset+stride*0] - block[offset+stride*4];
1527 const int z2= block[offset+stride*1] - block[offset+stride*5];
1528 const int z3= block[offset+stride*1] + block[offset+stride*5];
1537 const int offset= x_offset[i];
1538 const int z0= temp[4*0+i] + temp[4*2+i];
1539 const int z1= temp[4*0+i] - temp[4*2+i];
1540 const int z2= temp[4*1+i] - temp[4*3+i];
1541 const int z3= temp[4*1+i] + temp[4*3+i];
1543 block[stride*0 +offset]= (z0 + z3)>>1;
1544 block[stride*2 +offset]= (z1 + z2)>>1;
1545 block[stride*8 +offset]= (z1 - z2)>>1;
1546 block[stride*10+offset]= (z0 - z3)>>1;
1554 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1555 const int stride= 16*2;
1556 const int xStride= 16;
1559 a= block[stride*0 + xStride*0];
1560 b= block[stride*0 + xStride*1];
1561 c= block[stride*1 + xStride*0];
1562 d= block[stride*1 + xStride*1];
1569 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1570 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1571 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1572 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1576 static void chroma_dc_dct_c(DCTELEM *block){
1577 const int stride= 16*2;
1578 const int xStride= 16;
1581 a= block[stride*0 + xStride*0];
1582 b= block[stride*0 + xStride*1];
1583 c= block[stride*1 + xStride*0];
1584 d= block[stride*1 + xStride*1];
1591 block[stride*0 + xStride*0]= (a+c);
1592 block[stride*0 + xStride*1]= (e+b);
1593 block[stride*1 + xStride*0]= (a-c);
1594 block[stride*1 + xStride*1]= (e-b);
1599 * gets the chroma qp.
1601 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1602 return h->pps.chroma_qp_table[t][qscale & 0xff];
1605 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1606 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1607 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1609 const int * const quant_table= quant_coeff[qscale];
1610 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1611 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1612 const unsigned int threshold2= (threshold1<<1);
1618 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1619 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1620 const unsigned int dc_threshold2= (dc_threshold1<<1);
1622 int level= block[0]*quant_coeff[qscale+18][0];
1623 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1625 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1628 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1631 // last_non_zero = i;
1636 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1637 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1638 const unsigned int dc_threshold2= (dc_threshold1<<1);
1640 int level= block[0]*quant_table[0];
1641 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1643 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1646 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1649 // last_non_zero = i;
1662 const int j= scantable[i];
1663 int level= block[j]*quant_table[j];
1665 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1666 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1667 if(((unsigned)(level+threshold1))>threshold2){
1669 level= (bias + level)>>QUANT_SHIFT;
1672 level= (bias - level)>>QUANT_SHIFT;
1681 return last_non_zero;
1684 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1685 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1686 int src_x_offset, int src_y_offset,
1687 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1688 MpegEncContext * const s = &h->s;
1689 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1690 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1691 const int luma_xy= (mx&3) + ((my&3)<<2);
1692 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1693 uint8_t * src_cb, * src_cr;
1694 int extra_width= h->emu_edge_width;
1695 int extra_height= h->emu_edge_height;
1697 const int full_mx= mx>>2;
1698 const int full_my= my>>2;
1699 const int pic_width = 16*s->mb_width;
1700 const int pic_height = 16*s->mb_height >> MB_MBAFF;
1702 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1705 if(mx&7) extra_width -= 3;
1706 if(my&7) extra_height -= 3;
1708 if( full_mx < 0-extra_width
1709 || full_my < 0-extra_height
1710 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1711 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1712 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1713 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1717 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1719 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1722 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1725 // chroma offset when predicting from a field of opposite parity
1726 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
1727 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1729 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1730 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1733 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1734 src_cb= s->edge_emu_buffer;
1736 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1739 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1740 src_cr= s->edge_emu_buffer;
1742 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1745 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1746 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1747 int x_offset, int y_offset,
1748 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1749 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1750 int list0, int list1){
1751 MpegEncContext * const s = &h->s;
1752 qpel_mc_func *qpix_op= qpix_put;
1753 h264_chroma_mc_func chroma_op= chroma_put;
1755 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1756 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1757 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1758 x_offset += 8*s->mb_x;
1759 y_offset += 8*(s->mb_y >> MB_MBAFF);
1762 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1763 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1764 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1765 qpix_op, chroma_op);
1768 chroma_op= chroma_avg;
1772 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1773 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1774 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1775 qpix_op, chroma_op);
1779 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1780 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1781 int x_offset, int y_offset,
1782 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1783 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1784 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1785 int list0, int list1){
1786 MpegEncContext * const s = &h->s;
1788 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1789 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1790 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1791 x_offset += 8*s->mb_x;
1792 y_offset += 8*(s->mb_y >> MB_MBAFF);
1795 /* don't optimize for luma-only case, since B-frames usually
1796 * use implicit weights => chroma too. */
1797 uint8_t *tmp_cb = s->obmc_scratchpad;
1798 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1799 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1800 int refn0 = h->ref_cache[0][ scan8[n] ];
1801 int refn1 = h->ref_cache[1][ scan8[n] ];
1803 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1804 dest_y, dest_cb, dest_cr,
1805 x_offset, y_offset, qpix_put, chroma_put);
1806 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1807 tmp_y, tmp_cb, tmp_cr,
1808 x_offset, y_offset, qpix_put, chroma_put);
1810 if(h->use_weight == 2){
1811 int weight0 = h->implicit_weight[refn0][refn1];
1812 int weight1 = 64 - weight0;
1813 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1814 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1815 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1817 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1818 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1819 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1820 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1821 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1822 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1823 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1825 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1828 int list = list1 ? 1 : 0;
1829 int refn = h->ref_cache[list][ scan8[n] ];
1830 Picture *ref= &h->ref_list[list][refn];
1831 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1832 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1833 qpix_put, chroma_put);
1835 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1836 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1837 if(h->use_weight_chroma){
1838 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1839 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1840 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1841 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1846 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1847 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1848 int x_offset, int y_offset,
1849 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1850 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1851 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1852 int list0, int list1){
1853 if((h->use_weight==2 && list0 && list1
1854 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1855 || h->use_weight==1)
1856 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1857 x_offset, y_offset, qpix_put, chroma_put,
1858 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1860 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1861 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1864 static inline void prefetch_motion(H264Context *h, int list){
1865 /* fetch pixels for estimated mv 4 macroblocks ahead
1866 * optimized for 64byte cache lines */
1867 MpegEncContext * const s = &h->s;
1868 const int refn = h->ref_cache[list][scan8[0]];
1870 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1871 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1872 uint8_t **src= h->ref_list[list][refn].data;
1873 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1874 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1875 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1876 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1880 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1881 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1882 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1883 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1884 MpegEncContext * const s = &h->s;
1885 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1886 const int mb_type= s->current_picture.mb_type[mb_xy];
1888 assert(IS_INTER(mb_type));
1890 prefetch_motion(h, 0);
1892 if(IS_16X16(mb_type)){
1893 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1894 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1895 &weight_op[0], &weight_avg[0],
1896 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1897 }else if(IS_16X8(mb_type)){
1898 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1899 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1900 &weight_op[1], &weight_avg[1],
1901 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1902 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1903 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1904 &weight_op[1], &weight_avg[1],
1905 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1906 }else if(IS_8X16(mb_type)){
1907 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1908 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1909 &weight_op[2], &weight_avg[2],
1910 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1911 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1912 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1913 &weight_op[2], &weight_avg[2],
1914 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1918 assert(IS_8X8(mb_type));
1921 const int sub_mb_type= h->sub_mb_type[i];
1923 int x_offset= (i&1)<<2;
1924 int y_offset= (i&2)<<1;
1926 if(IS_SUB_8X8(sub_mb_type)){
1927 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1928 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1929 &weight_op[3], &weight_avg[3],
1930 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1931 }else if(IS_SUB_8X4(sub_mb_type)){
1932 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1933 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1934 &weight_op[4], &weight_avg[4],
1935 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1937 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1938 &weight_op[4], &weight_avg[4],
1939 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1940 }else if(IS_SUB_4X8(sub_mb_type)){
1941 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1942 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1943 &weight_op[5], &weight_avg[5],
1944 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1945 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1946 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1947 &weight_op[5], &weight_avg[5],
1948 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1951 assert(IS_SUB_4X4(sub_mb_type));
1953 int sub_x_offset= x_offset + 2*(j&1);
1954 int sub_y_offset= y_offset + (j&2);
1955 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1956 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1957 &weight_op[6], &weight_avg[6],
1958 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1964 prefetch_motion(h, 1);
1967 static void decode_init_vlc(void){
1968 static int done = 0;
1974 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1975 &chroma_dc_coeff_token_len [0], 1, 1,
1976 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1979 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1980 &coeff_token_len [i][0], 1, 1,
1981 &coeff_token_bits[i][0], 1, 1, 1);
1985 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1986 &chroma_dc_total_zeros_len [i][0], 1, 1,
1987 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1989 for(i=0; i<15; i++){
1990 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1991 &total_zeros_len [i][0], 1, 1,
1992 &total_zeros_bits[i][0], 1, 1, 1);
1996 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1997 &run_len [i][0], 1, 1,
1998 &run_bits[i][0], 1, 1, 1);
2000 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2001 &run_len [6][0], 1, 1,
2002 &run_bits[6][0], 1, 1, 1);
2006 static void free_tables(H264Context *h){
2009 av_freep(&h->intra4x4_pred_mode);
2010 av_freep(&h->chroma_pred_mode_table);
2011 av_freep(&h->cbp_table);
2012 av_freep(&h->mvd_table[0]);
2013 av_freep(&h->mvd_table[1]);
2014 av_freep(&h->direct_table);
2015 av_freep(&h->non_zero_count);
2016 av_freep(&h->slice_table_base);
2017 h->slice_table= NULL;
2019 av_freep(&h->mb2b_xy);
2020 av_freep(&h->mb2b8_xy);
2022 for(i = 0; i < MAX_SPS_COUNT; i++)
2023 av_freep(h->sps_buffers + i);
2025 for(i = 0; i < MAX_PPS_COUNT; i++)
2026 av_freep(h->pps_buffers + i);
2028 for(i = 0; i < h->s.avctx->thread_count; i++) {
2029 hx = h->thread_context[i];
2031 av_freep(&hx->top_borders[1]);
2032 av_freep(&hx->top_borders[0]);
2033 av_freep(&hx->s.obmc_scratchpad);
2034 av_freep(&hx->s.allocated_edge_emu_buffer);
2038 static void init_dequant8_coeff_table(H264Context *h){
2040 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2041 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2042 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2044 for(i=0; i<2; i++ ){
2045 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2046 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2050 for(q=0; q<52; q++){
2051 int shift = ff_div6[q];
2052 int idx = ff_rem6[q];
2054 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2055 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2056 h->pps.scaling_matrix8[i][x]) << shift;
2061 static void init_dequant4_coeff_table(H264Context *h){
2063 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2064 for(i=0; i<6; i++ ){
2065 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2067 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2068 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2075 for(q=0; q<52; q++){
2076 int shift = ff_div6[q] + 2;
2077 int idx = ff_rem6[q];
2079 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2080 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2081 h->pps.scaling_matrix4[i][x]) << shift;
2086 static void init_dequant_tables(H264Context *h){
2088 init_dequant4_coeff_table(h);
2089 if(h->pps.transform_8x8_mode)
2090 init_dequant8_coeff_table(h);
2091 if(h->sps.transform_bypass){
2094 h->dequant4_coeff[i][0][x] = 1<<6;
2095 if(h->pps.transform_8x8_mode)
2098 h->dequant8_coeff[i][0][x] = 1<<6;
2105 * needs width/height
2107 static int alloc_tables(H264Context *h){
2108 MpegEncContext * const s = &h->s;
2109 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2112 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2114 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2115 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2116 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2118 if( h->pps.cabac ) {
2119 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2120 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2121 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2122 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2125 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2126 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2128 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2129 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2130 for(y=0; y<s->mb_height; y++){
2131 for(x=0; x<s->mb_width; x++){
2132 const int mb_xy= x + y*s->mb_stride;
2133 const int b_xy = 4*x + 4*y*h->b_stride;
2134 const int b8_xy= 2*x + 2*y*h->b8_stride;
2136 h->mb2b_xy [mb_xy]= b_xy;
2137 h->mb2b8_xy[mb_xy]= b8_xy;
2141 s->obmc_scratchpad = NULL;
2143 if(!h->dequant4_coeff[0])
2144 init_dequant_tables(h);
2153 * Mimic alloc_tables(), but for every context thread.
2155 static void clone_tables(H264Context *dst, H264Context *src){
2156 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2157 dst->non_zero_count = src->non_zero_count;
2158 dst->slice_table = src->slice_table;
2159 dst->cbp_table = src->cbp_table;
2160 dst->mb2b_xy = src->mb2b_xy;
2161 dst->mb2b8_xy = src->mb2b8_xy;
2162 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2163 dst->mvd_table[0] = src->mvd_table[0];
2164 dst->mvd_table[1] = src->mvd_table[1];
2165 dst->direct_table = src->direct_table;
2167 dst->s.obmc_scratchpad = NULL;
2168 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2173 * Allocate buffers which are not shared amongst multiple threads.
2175 static int context_init(H264Context *h){
2176 MpegEncContext * const s = &h->s;
2178 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2179 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2181 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
2182 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
2183 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
2184 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
2187 return -1; // free_tables will clean up for us
2190 static void common_init(H264Context *h){
2191 MpegEncContext * const s = &h->s;
2193 s->width = s->avctx->width;
2194 s->height = s->avctx->height;
2195 s->codec_id= s->avctx->codec->id;
2197 ff_h264_pred_init(&h->hpc, s->codec_id);
2199 h->dequant_coeff_pps= -1;
2200 s->unrestricted_mv=1;
2201 s->decode=1; //FIXME
2203 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2204 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2207 static int decode_init(AVCodecContext *avctx){
2208 H264Context *h= avctx->priv_data;
2209 MpegEncContext * const s = &h->s;
2211 MPV_decode_defaults(s);
2216 s->out_format = FMT_H264;
2217 s->workaround_bugs= avctx->workaround_bugs;
2220 // s->decode_mb= ff_h263_decode_mb;
2221 s->quarter_sample = 1;
2223 avctx->pix_fmt= PIX_FMT_YUV420P;
2227 if(avctx->extradata_size > 0 && avctx->extradata &&
2228 *(char *)avctx->extradata == 1){
2235 h->thread_context[0] = h;
2239 static int frame_start(H264Context *h){
2240 MpegEncContext * const s = &h->s;
2243 if(MPV_frame_start(s, s->avctx) < 0)
2245 ff_er_frame_start(s);
2247 assert(s->linesize && s->uvlinesize);
2249 for(i=0; i<16; i++){
2250 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2251 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2254 h->block_offset[16+i]=
2255 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2256 h->block_offset[24+16+i]=
2257 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2260 /* can't be in alloc_tables because linesize isn't known there.
2261 * FIXME: redo bipred weight to not require extra buffer? */
2262 for(i = 0; i < s->avctx->thread_count; i++)
2263 if(!h->thread_context[i]->s.obmc_scratchpad)
2264 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2266 /* some macroblocks will be accessed before they're available */
2267 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2268 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2270 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2274 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2275 MpegEncContext * const s = &h->s;
2279 src_cb -= uvlinesize;
2280 src_cr -= uvlinesize;
2282 // There are two lines saved, the line above the the top macroblock of a pair,
2283 // and the line above the bottom macroblock
2284 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2285 for(i=1; i<17; i++){
2286 h->left_border[i]= src_y[15+i* linesize];
2289 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2290 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2292 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2293 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2294 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2296 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2297 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2304 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2305 MpegEncContext * const s = &h->s;
2312 if(h->deblocking_filter == 2) {
2313 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2314 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2315 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2317 deblock_left = (s->mb_x > 0);
2318 deblock_top = (s->mb_y > 0);
2321 src_y -= linesize + 1;
2322 src_cb -= uvlinesize + 1;
2323 src_cr -= uvlinesize + 1;
2325 #define XCHG(a,b,t,xchg)\
2332 for(i = !deblock_top; i<17; i++){
2333 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2338 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2339 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2340 if(s->mb_x+1 < s->mb_width){
2341 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2345 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2347 for(i = !deblock_top; i<9; i++){
2348 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2349 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2353 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2359 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2360 MpegEncContext * const s = &h->s;
2363 src_y -= 2 * linesize;
2364 src_cb -= 2 * uvlinesize;
2365 src_cr -= 2 * uvlinesize;
2367 // There are two lines saved, the line above the the top macroblock of a pair,
2368 // and the line above the bottom macroblock
2369 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2370 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2371 for(i=2; i<34; i++){
2372 h->left_border[i]= src_y[15+i* linesize];
2375 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2376 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2377 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2378 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2380 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2381 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2382 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2383 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2384 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2385 for(i=2; i<18; i++){
2386 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2387 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2389 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2390 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2391 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2392 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2396 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2397 MpegEncContext * const s = &h->s;
2400 int deblock_left = (s->mb_x > 0);
2401 int deblock_top = (s->mb_y > 1);
2403 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2405 src_y -= 2 * linesize + 1;
2406 src_cb -= 2 * uvlinesize + 1;
2407 src_cr -= 2 * uvlinesize + 1;
2409 #define XCHG(a,b,t,xchg)\
2416 for(i = (!deblock_top)<<1; i<34; i++){
2417 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2422 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2425 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2426 if(s->mb_x+1 < s->mb_width){
2427 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2428 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2432 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2434 for(i = (!deblock_top) << 1; i<18; i++){
2435 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2436 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2440 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2441 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2442 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2443 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2448 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2449 MpegEncContext * const s = &h->s;
2450 const int mb_x= s->mb_x;
2451 const int mb_y= s->mb_y;
2452 const int mb_xy= mb_x + mb_y*s->mb_stride;
2453 const int mb_type= s->current_picture.mb_type[mb_xy];
2454 uint8_t *dest_y, *dest_cb, *dest_cr;
2455 int linesize, uvlinesize /*dct_offset*/;
2457 int *block_offset = &h->block_offset[0];
2458 const unsigned int bottom = mb_y & 1;
2459 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2460 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2461 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2463 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2464 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2465 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2467 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2468 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2470 if (!simple && MB_FIELD) {
2471 linesize = h->mb_linesize = s->linesize * 2;
2472 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2473 block_offset = &h->block_offset[24];
2474 if(mb_y&1){ //FIXME move out of this func?
2475 dest_y -= s->linesize*15;
2476 dest_cb-= s->uvlinesize*7;
2477 dest_cr-= s->uvlinesize*7;
2481 for(list=0; list<h->list_count; list++){
2482 if(!USES_LIST(mb_type, list))
2484 if(IS_16X16(mb_type)){
2485 int8_t *ref = &h->ref_cache[list][scan8[0]];
2486 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
2488 for(i=0; i<16; i+=4){
2489 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2490 int ref = h->ref_cache[list][scan8[i]];
2492 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
2498 linesize = h->mb_linesize = s->linesize;
2499 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2500 // dct_offset = s->linesize * 16;
2503 if(transform_bypass){
2505 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2506 }else if(IS_8x8DCT(mb_type)){
2507 idct_dc_add = s->dsp.h264_idct8_dc_add;
2508 idct_add = s->dsp.h264_idct8_add;
2510 idct_dc_add = s->dsp.h264_idct_dc_add;
2511 idct_add = s->dsp.h264_idct_add;
2514 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2515 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2516 int mbt_y = mb_y&~1;
2517 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2518 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2519 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2520 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2523 if (!simple && IS_INTRA_PCM(mb_type)) {
2526 // The pixels are stored in h->mb array in the same order as levels,
2527 // copy them in output in the correct order.
2528 for(i=0; i<16; i++) {
2529 for (y=0; y<4; y++) {
2530 for (x=0; x<4; x++) {
2531 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2535 for(i=16; i<16+4; i++) {
2536 for (y=0; y<4; y++) {
2537 for (x=0; x<4; x++) {
2538 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2542 for(i=20; i<20+4; i++) {
2543 for (y=0; y<4; y++) {
2544 for (x=0; x<4; x++) {
2545 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2550 if(IS_INTRA(mb_type)){
2551 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2552 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2554 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2555 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2556 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2559 if(IS_INTRA4x4(mb_type)){
2560 if(simple || !s->encoding){
2561 if(IS_8x8DCT(mb_type)){
2562 for(i=0; i<16; i+=4){
2563 uint8_t * const ptr= dest_y + block_offset[i];
2564 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2565 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2566 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2567 (h->topright_samples_available<<i)&0x4000, linesize);
2569 if(nnz == 1 && h->mb[i*16])
2570 idct_dc_add(ptr, h->mb + i*16, linesize);
2572 idct_add(ptr, h->mb + i*16, linesize);
2576 for(i=0; i<16; i++){
2577 uint8_t * const ptr= dest_y + block_offset[i];
2579 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2582 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2583 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2584 assert(mb_y || linesize <= block_offset[i]);
2585 if(!topright_avail){
2586 tr= ptr[3 - linesize]*0x01010101;
2587 topright= (uint8_t*) &tr;
2589 topright= ptr + 4 - linesize;
2593 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2594 nnz = h->non_zero_count_cache[ scan8[i] ];
2597 if(nnz == 1 && h->mb[i*16])
2598 idct_dc_add(ptr, h->mb + i*16, linesize);
2600 idct_add(ptr, h->mb + i*16, linesize);
2602 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2607 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2609 if(!transform_bypass)
2610 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
2612 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2614 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2615 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2617 hl_motion(h, dest_y, dest_cb, dest_cr,
2618 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2619 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2620 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2624 if(!IS_INTRA4x4(mb_type)){
2626 if(IS_INTRA16x16(mb_type)){
2627 for(i=0; i<16; i++){
2628 if(h->non_zero_count_cache[ scan8[i] ])
2629 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2630 else if(h->mb[i*16])
2631 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2634 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2635 for(i=0; i<16; i+=di){
2636 int nnz = h->non_zero_count_cache[ scan8[i] ];
2638 if(nnz==1 && h->mb[i*16])
2639 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2641 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2646 for(i=0; i<16; i++){
2647 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2648 uint8_t * const ptr= dest_y + block_offset[i];
2649 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2655 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2656 uint8_t *dest[2] = {dest_cb, dest_cr};
2657 if(transform_bypass){
2658 idct_add = idct_dc_add = s->dsp.add_pixels4;
2660 idct_add = s->dsp.h264_idct_add;
2661 idct_dc_add = s->dsp.h264_idct_dc_add;
2662 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2663 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2666 for(i=16; i<16+8; i++){
2667 if(h->non_zero_count_cache[ scan8[i] ])
2668 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2669 else if(h->mb[i*16])
2670 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2673 for(i=16; i<16+8; i++){
2674 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2675 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2676 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2682 if(h->deblocking_filter) {
2683 if (!simple && FRAME_MBAFF) {
2684 //FIXME try deblocking one mb at a time?
2685 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2686 const int mb_y = s->mb_y - 1;
2687 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2688 const int mb_xy= mb_x + mb_y*s->mb_stride;
2689 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2690 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2691 if (!bottom) return;
2692 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2693 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2694 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2696 if(IS_INTRA(mb_type_top | mb_type_bottom))
2697 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2699 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2703 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2704 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2705 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2706 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2707 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2710 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2711 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2712 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2713 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2714 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2716 tprintf(h->s.avctx, "call filter_mb\n");
2717 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2718 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2719 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2725 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2727 static void hl_decode_mb_simple(H264Context *h){
2728 hl_decode_mb_internal(h, 1);
2732 * Process a macroblock; this handles edge cases, such as interlacing.
2734 static void av_noinline hl_decode_mb_complex(H264Context *h){
2735 hl_decode_mb_internal(h, 0);
2738 static void hl_decode_mb(H264Context *h){
2739 MpegEncContext * const s = &h->s;
2740 const int mb_x= s->mb_x;
2741 const int mb_y= s->mb_y;
2742 const int mb_xy= mb_x + mb_y*s->mb_stride;
2743 const int mb_type= s->current_picture.mb_type[mb_xy];
2744 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2750 hl_decode_mb_complex(h);
2751 else hl_decode_mb_simple(h);
2755 * fills the default_ref_list.
2757 static int fill_default_ref_list(H264Context *h){
2758 MpegEncContext * const s = &h->s;
2760 int smallest_poc_greater_than_current = -1;
2761 Picture sorted_short_ref[32];
2763 if(h->slice_type==B_TYPE){
2767 /* sort frame according to poc in B slice */
2768 for(out_i=0; out_i<h->short_ref_count; out_i++){
2770 int best_poc=INT_MAX;
2772 for(i=0; i<h->short_ref_count; i++){
2773 const int poc= h->short_ref[i]->poc;
2774 if(poc > limit && poc < best_poc){
2780 assert(best_i != INT_MIN);
2783 sorted_short_ref[out_i]= *h->short_ref[best_i];
2784 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2785 if (-1 == smallest_poc_greater_than_current) {
2786 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2787 smallest_poc_greater_than_current = out_i;
2793 if(s->picture_structure == PICT_FRAME){
2794 if(h->slice_type==B_TYPE){
2796 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2798 // find the largest poc
2799 for(list=0; list<2; list++){
2802 int step= list ? -1 : 1;
2804 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2805 while(j<0 || j>= h->short_ref_count){
2806 if(j != -99 && step == (list ? -1 : 1))
2809 j= smallest_poc_greater_than_current + (step>>1);
2811 if(sorted_short_ref[j].reference != 3) continue;
2812 h->default_ref_list[list][index ]= sorted_short_ref[j];
2813 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2816 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2817 if(h->long_ref[i] == NULL) continue;
2818 if(h->long_ref[i]->reference != 3) continue;
2820 h->default_ref_list[ list ][index ]= *h->long_ref[i];
2821 h->default_ref_list[ list ][index++].pic_id= i;;
2824 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
2825 // swap the two first elements of L1 when
2826 // L0 and L1 are identical
2827 Picture temp= h->default_ref_list[1][0];
2828 h->default_ref_list[1][0] = h->default_ref_list[1][1];
2829 h->default_ref_list[1][1] = temp;
2832 if(index < h->ref_count[ list ])
2833 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
2837 for(i=0; i<h->short_ref_count; i++){
2838 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
2839 h->default_ref_list[0][index ]= *h->short_ref[i];
2840 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2842 for(i = 0; i < 16; i++){
2843 if(h->long_ref[i] == NULL) continue;
2844 if(h->long_ref[i]->reference != 3) continue;
2845 h->default_ref_list[0][index ]= *h->long_ref[i];
2846 h->default_ref_list[0][index++].pic_id= i;;
2848 if(index < h->ref_count[0])
2849 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2852 if(h->slice_type==B_TYPE){
2854 //FIXME second field balh
2858 for (i=0; i<h->ref_count[0]; i++) {
2859 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2861 if(h->slice_type==B_TYPE){
2862 for (i=0; i<h->ref_count[1]; i++) {
2863 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
2870 static void print_short_term(H264Context *h);
2871 static void print_long_term(H264Context *h);
2873 static int decode_ref_pic_list_reordering(H264Context *h){
2874 MpegEncContext * const s = &h->s;
2877 print_short_term(h);
2879 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
2881 for(list=0; list<h->list_count; list++){
2882 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2884 if(get_bits1(&s->gb)){
2885 int pred= h->curr_pic_num;
2887 for(index=0; ; index++){
2888 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2889 unsigned int pic_id;
2891 Picture *ref = NULL;
2893 if(reordering_of_pic_nums_idc==3)
2896 if(index >= h->ref_count[list]){
2897 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2901 if(reordering_of_pic_nums_idc<3){
2902 if(reordering_of_pic_nums_idc<2){
2903 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2905 if(abs_diff_pic_num >= h->max_pic_num){
2906 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2910 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2911 else pred+= abs_diff_pic_num;
2912 pred &= h->max_pic_num - 1;
2914 for(i= h->short_ref_count-1; i>=0; i--){
2915 ref = h->short_ref[i];
2916 assert(ref->reference == 3);
2917 assert(!ref->long_ref);
2918 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
2922 ref->pic_id= ref->frame_num;
2924 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2926 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2929 ref = h->long_ref[pic_id];
2931 ref->pic_id= pic_id;
2932 assert(ref->reference == 3);
2933 assert(ref->long_ref);
2941 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2942 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2944 for(i=index; i+1<h->ref_count[list]; i++){
2945 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2948 for(; i > index; i--){
2949 h->ref_list[list][i]= h->ref_list[list][i-1];
2951 h->ref_list[list][index]= *ref;
2954 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2960 for(list=0; list<h->list_count; list++){
2961 for(index= 0; index < h->ref_count[list]; index++){
2962 if(!h->ref_list[list][index].data[0])
2963 h->ref_list[list][index]= s->current_picture;
2967 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
2968 direct_dist_scale_factor(h);
2969 direct_ref_list_init(h);
2973 static void fill_mbaff_ref_list(H264Context *h){
2975 for(list=0; list<2; list++){ //FIXME try list_count
2976 for(i=0; i<h->ref_count[list]; i++){
2977 Picture *frame = &h->ref_list[list][i];
2978 Picture *field = &h->ref_list[list][16+2*i];
2981 field[0].linesize[j] <<= 1;
2982 field[1] = field[0];
2984 field[1].data[j] += frame->linesize[j];
2986 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2987 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2989 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2990 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2994 for(j=0; j<h->ref_count[1]; j++){
2995 for(i=0; i<h->ref_count[0]; i++)
2996 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2997 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2998 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3002 static int pred_weight_table(H264Context *h){
3003 MpegEncContext * const s = &h->s;
3005 int luma_def, chroma_def;
3008 h->use_weight_chroma= 0;
3009 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3010 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3011 luma_def = 1<<h->luma_log2_weight_denom;
3012 chroma_def = 1<<h->chroma_log2_weight_denom;
3014 for(list=0; list<2; list++){
3015 for(i=0; i<h->ref_count[list]; i++){
3016 int luma_weight_flag, chroma_weight_flag;
3018 luma_weight_flag= get_bits1(&s->gb);
3019 if(luma_weight_flag){
3020 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3021 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3022 if( h->luma_weight[list][i] != luma_def
3023 || h->luma_offset[list][i] != 0)
3026 h->luma_weight[list][i]= luma_def;
3027 h->luma_offset[list][i]= 0;
3030 chroma_weight_flag= get_bits1(&s->gb);
3031 if(chroma_weight_flag){
3034 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3035 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3036 if( h->chroma_weight[list][i][j] != chroma_def
3037 || h->chroma_offset[list][i][j] != 0)
3038 h->use_weight_chroma= 1;
3043 h->chroma_weight[list][i][j]= chroma_def;
3044 h->chroma_offset[list][i][j]= 0;
3048 if(h->slice_type != B_TYPE) break;
3050 h->use_weight= h->use_weight || h->use_weight_chroma;
3054 static void implicit_weight_table(H264Context *h){
3055 MpegEncContext * const s = &h->s;
3057 int cur_poc = s->current_picture_ptr->poc;
3059 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3060 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3062 h->use_weight_chroma= 0;
3067 h->use_weight_chroma= 2;
3068 h->luma_log2_weight_denom= 5;
3069 h->chroma_log2_weight_denom= 5;
3071 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3072 int poc0 = h->ref_list[0][ref0].poc;
3073 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3074 int poc1 = h->ref_list[1][ref1].poc;
3075 int td = av_clip(poc1 - poc0, -128, 127);
3077 int tb = av_clip(cur_poc - poc0, -128, 127);
3078 int tx = (16384 + (FFABS(td) >> 1)) / td;
3079 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3080 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3081 h->implicit_weight[ref0][ref1] = 32;
3083 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3085 h->implicit_weight[ref0][ref1] = 32;
3090 static inline void unreference_pic(H264Context *h, Picture *pic){
3093 if(pic == h->delayed_output_pic)
3096 for(i = 0; h->delayed_pic[i]; i++)
3097 if(pic == h->delayed_pic[i]){
3105 * instantaneous decoder refresh.
3107 static void idr(H264Context *h){
3110 for(i=0; i<16; i++){
3111 if (h->long_ref[i] != NULL) {
3112 unreference_pic(h, h->long_ref[i]);
3113 h->long_ref[i]= NULL;
3116 h->long_ref_count=0;
3118 for(i=0; i<h->short_ref_count; i++){
3119 unreference_pic(h, h->short_ref[i]);
3120 h->short_ref[i]= NULL;
3122 h->short_ref_count=0;
3125 /* forget old pics after a seek */
3126 static void flush_dpb(AVCodecContext *avctx){
3127 H264Context *h= avctx->priv_data;
3129 for(i=0; i<16; i++) {
3130 if(h->delayed_pic[i])
3131 h->delayed_pic[i]->reference= 0;
3132 h->delayed_pic[i]= NULL;
3134 if(h->delayed_output_pic)
3135 h->delayed_output_pic->reference= 0;
3136 h->delayed_output_pic= NULL;
3138 if(h->s.current_picture_ptr)
3139 h->s.current_picture_ptr->reference= 0;
3144 * @return the removed picture or NULL if an error occurs
3146 static Picture * remove_short(H264Context *h, int frame_num){
3147 MpegEncContext * const s = &h->s;
3150 if(s->avctx->debug&FF_DEBUG_MMCO)
3151 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3153 for(i=0; i<h->short_ref_count; i++){
3154 Picture *pic= h->short_ref[i];
3155 if(s->avctx->debug&FF_DEBUG_MMCO)
3156 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3157 if(pic->frame_num == frame_num){
3158 h->short_ref[i]= NULL;
3159 if (--h->short_ref_count)
3160 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3169 * @return the removed picture or NULL if an error occurs
3171 static Picture * remove_long(H264Context *h, int i){
3174 pic= h->long_ref[i];
3175 h->long_ref[i]= NULL;
3176 if(pic) h->long_ref_count--;
3182 * print short term list
3184 static void print_short_term(H264Context *h) {
3186 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3187 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3188 for(i=0; i<h->short_ref_count; i++){
3189 Picture *pic= h->short_ref[i];
3190 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3196 * print long term list
3198 static void print_long_term(H264Context *h) {
3200 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3201 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3202 for(i = 0; i < 16; i++){
3203 Picture *pic= h->long_ref[i];
3205 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3212 * Executes the reference picture marking (memory management control operations).
3214 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3215 MpegEncContext * const s = &h->s;
3217 int current_is_long=0;
3220 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3221 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3223 for(i=0; i<mmco_count; i++){
3224 if(s->avctx->debug&FF_DEBUG_MMCO)
3225 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3227 switch(mmco[i].opcode){
3228 case MMCO_SHORT2UNUSED:
3229 pic= remove_short(h, mmco[i].short_frame_num);
3231 unreference_pic(h, pic);
3232 else if(s->avctx->debug&FF_DEBUG_MMCO)
3233 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3235 case MMCO_SHORT2LONG:
3236 pic= remove_long(h, mmco[i].long_index);
3237 if(pic) unreference_pic(h, pic);
3239 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3240 if (h->long_ref[ mmco[i].long_index ]){
3241 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3242 h->long_ref_count++;
3245 case MMCO_LONG2UNUSED:
3246 pic= remove_long(h, mmco[i].long_index);
3248 unreference_pic(h, pic);
3249 else if(s->avctx->debug&FF_DEBUG_MMCO)
3250 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3253 pic= remove_long(h, mmco[i].long_index);
3254 if(pic) unreference_pic(h, pic);
3256 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3257 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3258 h->long_ref_count++;
3262 case MMCO_SET_MAX_LONG:
3263 assert(mmco[i].long_index <= 16);
3264 // just remove the long term which index is greater than new max
3265 for(j = mmco[i].long_index; j<16; j++){
3266 pic = remove_long(h, j);
3267 if (pic) unreference_pic(h, pic);
3271 while(h->short_ref_count){
3272 pic= remove_short(h, h->short_ref[0]->frame_num);
3273 if(pic) unreference_pic(h, pic);
3275 for(j = 0; j < 16; j++) {
3276 pic= remove_long(h, j);
3277 if(pic) unreference_pic(h, pic);
3284 if(!current_is_long){
3285 pic= remove_short(h, s->current_picture_ptr->frame_num);
3287 unreference_pic(h, pic);
3288 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3291 if(h->short_ref_count)
3292 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3294 h->short_ref[0]= s->current_picture_ptr;
3295 h->short_ref[0]->long_ref=0;
3296 h->short_ref_count++;
3299 print_short_term(h);
3304 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3305 MpegEncContext * const s = &h->s;
3308 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3309 s->broken_link= get_bits1(gb) -1;
3310 h->mmco[0].long_index= get_bits1(gb) - 1; // current_long_term_idx
3311 if(h->mmco[0].long_index == -1)
3314 h->mmco[0].opcode= MMCO_LONG;
3318 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3319 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3320 MMCOOpcode opcode= get_ue_golomb(gb);
3322 h->mmco[i].opcode= opcode;
3323 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3324 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
3325 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
3326 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3330 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3331 unsigned int long_index= get_ue_golomb(gb);
3332 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
3333 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3336 h->mmco[i].long_index= long_index;
3339 if(opcode > (unsigned)MMCO_LONG){
3340 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3343 if(opcode == MMCO_END)
3348 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3350 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
3351 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3352 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3362 static int init_poc(H264Context *h){
3363 MpegEncContext * const s = &h->s;
3364 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3367 if(h->nal_unit_type == NAL_IDR_SLICE){
3368 h->frame_num_offset= 0;
3370 if(h->frame_num < h->prev_frame_num)
3371 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3373 h->frame_num_offset= h->prev_frame_num_offset;
3376 if(h->sps.poc_type==0){
3377 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3379 if(h->nal_unit_type == NAL_IDR_SLICE){
3384 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3385 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3386 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3387 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3389 h->poc_msb = h->prev_poc_msb;
3390 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3392 field_poc[1] = h->poc_msb + h->poc_lsb;
3393 if(s->picture_structure == PICT_FRAME)
3394 field_poc[1] += h->delta_poc_bottom;
3395 }else if(h->sps.poc_type==1){
3396 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3399 if(h->sps.poc_cycle_length != 0)
3400 abs_frame_num = h->frame_num_offset + h->frame_num;
3404 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3407 expected_delta_per_poc_cycle = 0;
3408 for(i=0; i < h->sps.poc_cycle_length; i++)
3409 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3411 if(abs_frame_num > 0){
3412 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3413 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3415 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3416 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3417 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3421 if(h->nal_ref_idc == 0)
3422 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3424 field_poc[0] = expectedpoc + h->delta_poc[0];
3425 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3427 if(s->picture_structure == PICT_FRAME)
3428 field_poc[1] += h->delta_poc[1];
3431 if(h->nal_unit_type == NAL_IDR_SLICE){
3434 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3435 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3441 if(s->picture_structure != PICT_BOTTOM_FIELD)
3442 s->current_picture_ptr->field_poc[0]= field_poc[0];
3443 if(s->picture_structure != PICT_TOP_FIELD)
3444 s->current_picture_ptr->field_poc[1]= field_poc[1];
3445 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
3446 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
3453 * initialize scan tables
3455 static void init_scan_tables(H264Context *h){
3456 MpegEncContext * const s = &h->s;
3458 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3459 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3460 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3462 for(i=0; i<16; i++){
3463 #define T(x) (x>>2) | ((x<<2) & 0xF)
3464 h->zigzag_scan[i] = T(zigzag_scan[i]);
3465 h-> field_scan[i] = T( field_scan[i]);
3469 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3470 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3471 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3472 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3473 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3475 for(i=0; i<64; i++){
3476 #define T(x) (x>>3) | ((x&7)<<3)
3477 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3478 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3479 h->field_scan8x8[i] = T(field_scan8x8[i]);
3480 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3484 if(h->sps.transform_bypass){ //FIXME same ugly
3485 h->zigzag_scan_q0 = zigzag_scan;
3486 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3487 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3488 h->field_scan_q0 = field_scan;
3489 h->field_scan8x8_q0 = field_scan8x8;
3490 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3492 h->zigzag_scan_q0 = h->zigzag_scan;
3493 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3494 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3495 h->field_scan_q0 = h->field_scan;
3496 h->field_scan8x8_q0 = h->field_scan8x8;
3497 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3502 * Replicates H264 "master" context to thread contexts.
3504 static void clone_slice(H264Context *dst, H264Context *src)
3506 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3507 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3508 dst->s.current_picture = src->s.current_picture;
3509 dst->s.linesize = src->s.linesize;
3510 dst->s.uvlinesize = src->s.uvlinesize;
3512 dst->prev_poc_msb = src->prev_poc_msb;
3513 dst->prev_poc_lsb = src->prev_poc_lsb;
3514 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3515 dst->prev_frame_num = src->prev_frame_num;
3516 dst->short_ref_count = src->short_ref_count;
3518 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3519 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3520 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3521 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3523 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3524 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3528 * decodes a slice header.
3529 * this will allso call MPV_common_init() and frame_start() as needed
3531 * @param h h264context
3532 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3534 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3536 static int decode_slice_header(H264Context *h, H264Context *h0){
3537 MpegEncContext * const s = &h->s;
3538 unsigned int first_mb_in_slice;
3539 unsigned int pps_id;
3540 int num_ref_idx_active_override_flag;
3541 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3542 unsigned int slice_type, tmp, i;
3543 int default_ref_list_done = 0;
3545 s->current_picture.reference= h->nal_ref_idc != 0;
3546 s->dropable= h->nal_ref_idc == 0;
3548 first_mb_in_slice= get_ue_golomb(&s->gb);
3550 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3551 h0->current_slice = 0;
3552 s->current_picture_ptr= NULL;
3555 slice_type= get_ue_golomb(&s->gb);
3557 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3562 h->slice_type_fixed=1;
3564 h->slice_type_fixed=0;
3566 slice_type= slice_type_map[ slice_type ];
3567 if (slice_type == I_TYPE
3568 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3569 default_ref_list_done = 1;
3571 h->slice_type= slice_type;
3573 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3575 pps_id= get_ue_golomb(&s->gb);
3576 if(pps_id>=MAX_PPS_COUNT){
3577 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3580 if(!h0->pps_buffers[pps_id]) {
3581 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3584 h->pps= *h0->pps_buffers[pps_id];
3586 if(!h0->sps_buffers[h->pps.sps_id]) {
3587 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3590 h->sps = *h0->sps_buffers[h->pps.sps_id];
3592 if(h == h0 && h->dequant_coeff_pps != pps_id){
3593 h->dequant_coeff_pps = pps_id;
3594 init_dequant_tables(h);
3597 s->mb_width= h->sps.mb_width;
3598 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3600 h->b_stride= s->mb_width*4;
3601 h->b8_stride= s->mb_width*2;
3603 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3604 if(h->sps.frame_mbs_only_flag)
3605 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3607 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3609 if (s->context_initialized
3610 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3612 return -1; // width / height changed during parallelized decoding
3616 if (!s->context_initialized) {
3618 return -1; // we cant (re-)initialize context during parallel decoding
3619 if (MPV_common_init(s) < 0)
3622 init_scan_tables(h);
3625 for(i = 1; i < s->avctx->thread_count; i++) {
3627 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3628 memcpy(c, h, sizeof(MpegEncContext));
3629 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3632 init_scan_tables(c);
3636 for(i = 0; i < s->avctx->thread_count; i++)
3637 if(context_init(h->thread_context[i]) < 0)
3640 s->avctx->width = s->width;
3641 s->avctx->height = s->height;
3642 s->avctx->sample_aspect_ratio= h->sps.sar;
3643 if(!s->avctx->sample_aspect_ratio.den)
3644 s->avctx->sample_aspect_ratio.den = 1;
3646 if(h->sps.timing_info_present_flag){
3647 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3648 if(h->x264_build > 0 && h->x264_build < 44)
3649 s->avctx->time_base.den *= 2;
3650 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3651 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3655 if(h0->current_slice == 0){
3656 if(frame_start(h) < 0)
3662 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
3663 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3666 h->mb_aff_frame = 0;
3667 if(h->sps.frame_mbs_only_flag){
3668 s->picture_structure= PICT_FRAME;
3670 if(get_bits1(&s->gb)) { //field_pic_flag
3671 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3672 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
3674 s->picture_structure= PICT_FRAME;
3675 h->mb_aff_frame = h->sps.mb_aff;
3678 assert(s->mb_num == s->mb_width * s->mb_height);
3679 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
3680 first_mb_in_slice >= s->mb_num){
3681 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3684 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3685 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
3686 assert(s->mb_y < s->mb_height);
3688 if(s->picture_structure==PICT_FRAME){
3689 h->curr_pic_num= h->frame_num;
3690 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3692 h->curr_pic_num= 2*h->frame_num;
3693 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3696 if(h->nal_unit_type == NAL_IDR_SLICE){
3697 get_ue_golomb(&s->gb); /* idr_pic_id */
3700 if(h->sps.poc_type==0){
3701 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3703 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3704 h->delta_poc_bottom= get_se_golomb(&s->gb);
3708 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3709 h->delta_poc[0]= get_se_golomb(&s->gb);
3711 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3712 h->delta_poc[1]= get_se_golomb(&s->gb);
3717 if(h->pps.redundant_pic_cnt_present){
3718 h->redundant_pic_count= get_ue_golomb(&s->gb);
3721 //set defaults, might be overriden a few line later
3722 h->ref_count[0]= h->pps.ref_count[0];
3723 h->ref_count[1]= h->pps.ref_count[1];
3725 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
3726 if(h->slice_type == B_TYPE){
3727 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3728 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
3729 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
3731 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3733 if(num_ref_idx_active_override_flag){
3734 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3735 if(h->slice_type==B_TYPE)
3736 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3738 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3739 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3740 h->ref_count[0]= h->ref_count[1]= 1;
3744 if(h->slice_type == B_TYPE)
3751 if(!default_ref_list_done){
3752 fill_default_ref_list(h);
3755 if(decode_ref_pic_list_reordering(h) < 0)
3758 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
3759 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
3760 pred_weight_table(h);
3761 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
3762 implicit_weight_table(h);
3766 if(s->current_picture.reference)
3767 decode_ref_pic_marking(h0, &s->gb);
3770 fill_mbaff_ref_list(h);
3772 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
3773 tmp = get_ue_golomb(&s->gb);
3775 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3778 h->cabac_init_idc= tmp;
3781 h->last_qscale_diff = 0;
3782 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3784 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3788 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3789 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3790 //FIXME qscale / qp ... stuff
3791 if(h->slice_type == SP_TYPE){
3792 get_bits1(&s->gb); /* sp_for_switch_flag */
3794 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
3795 get_se_golomb(&s->gb); /* slice_qs_delta */
3798 h->deblocking_filter = 1;
3799 h->slice_alpha_c0_offset = 0;
3800 h->slice_beta_offset = 0;
3801 if( h->pps.deblocking_filter_parameters_present ) {
3802 tmp= get_ue_golomb(&s->gb);
3804 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3807 h->deblocking_filter= tmp;
3808 if(h->deblocking_filter < 2)
3809 h->deblocking_filter^= 1; // 1<->0
3811 if( h->deblocking_filter ) {
3812 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3813 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3817 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3818 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3819 /* Cheat slightly for speed:
3820 Dont bother to deblock across slices */
3821 h->deblocking_filter = 2;
3823 h0->max_contexts = 1;
3824 if(!h0->single_decode_warning) {
3825 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3826 h0->single_decode_warning = 1;
3829 return 1; // deblocking switched inside frame
3833 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3834 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
3835 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
3836 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3837 h->deblocking_filter= 0;
3840 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3841 slice_group_change_cycle= get_bits(&s->gb, ?);
3844 h0->last_slice_type = slice_type;
3845 h->slice_num = ++h0->current_slice;
3847 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3848 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
3850 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3851 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
3853 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3855 av_get_pict_type_char(h->slice_type),
3856 pps_id, h->frame_num,
3857 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3858 h->ref_count[0], h->ref_count[1],
3860 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
3862 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
3866 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
3867 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3868 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3870 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3871 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3880 static inline int get_level_prefix(GetBitContext *gb){
3884 OPEN_READER(re, gb);
3885 UPDATE_CACHE(re, gb);
3886 buf=GET_CACHE(re, gb);
3888 log= 32 - av_log2(buf);
3890 print_bin(buf>>(32-log), log);
3891 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
3894 LAST_SKIP_BITS(re, gb, log);
3895 CLOSE_READER(re, gb);
3900 static inline int get_dct8x8_allowed(H264Context *h){
3903 if(!IS_SUB_8X8(h->sub_mb_type[i])
3904 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
3911 * decodes a residual block.
3912 * @param n block index
3913 * @param scantable scantable
3914 * @param max_coeff number of coefficients in the block
3915 * @return <0 if an error occured
3917 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
3918 MpegEncContext * const s = &h->s;
3919 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
3921 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
3923 //FIXME put trailing_onex into the context
3925 if(n == CHROMA_DC_BLOCK_INDEX){
3926 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
3927 total_coeff= coeff_token>>2;
3929 if(n == LUMA_DC_BLOCK_INDEX){
3930 total_coeff= pred_non_zero_count(h, 0);
3931 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3932 total_coeff= coeff_token>>2;
3934 total_coeff= pred_non_zero_count(h, n);
3935 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3936 total_coeff= coeff_token>>2;
3937 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
3941 //FIXME set last_non_zero?
3945 if(total_coeff > (unsigned)max_coeff) {
3946 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
3950 trailing_ones= coeff_token&3;
3951 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
3952 assert(total_coeff<=16);
3954 for(i=0; i<trailing_ones; i++){
3955 level[i]= 1 - 2*get_bits1(gb);
3959 int level_code, mask;
3960 int suffix_length = total_coeff > 10 && trailing_ones < 3;
3961 int prefix= get_level_prefix(gb);
3963 //first coefficient has suffix_length equal to 0 or 1
3964 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
3966 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3968 level_code= (prefix<<suffix_length); //part
3969 }else if(prefix==14){
3971 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3973 level_code= prefix + get_bits(gb, 4); //part
3974 }else if(prefix==15){
3975 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
3976 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
3978 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
3982 if(trailing_ones < 3) level_code += 2;
3987 mask= -(level_code&1);
3988 level[i]= (((2+level_code)>>1) ^ mask) - mask;
3991 //remaining coefficients have suffix_length > 0
3992 for(;i<total_coeff;i++) {
3993 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
3994 prefix = get_level_prefix(gb);
3996 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
3997 }else if(prefix==15){
3998 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4000 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4003 mask= -(level_code&1);
4004 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4005 if(level_code > suffix_limit[suffix_length])
4010 if(total_coeff == max_coeff)
4013 if(n == CHROMA_DC_BLOCK_INDEX)
4014 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4016 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4019 coeff_num = zeros_left + total_coeff - 1;
4020 j = scantable[coeff_num];
4022 block[j] = level[0];
4023 for(i=1;i<total_coeff;i++) {
4026 else if(zeros_left < 7){
4027 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4029 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4031 zeros_left -= run_before;
4032 coeff_num -= 1 + run_before;
4033 j= scantable[ coeff_num ];
4038 block[j] = (level[0] * qmul[j] + 32)>>6;
4039 for(i=1;i<total_coeff;i++) {
4042 else if(zeros_left < 7){
4043 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4045 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4047 zeros_left -= run_before;
4048 coeff_num -= 1 + run_before;
4049 j= scantable[ coeff_num ];
4051 block[j]= (level[i] * qmul[j] + 32)>>6;
4056 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4063 static void predict_field_decoding_flag(H264Context *h){
4064 MpegEncContext * const s = &h->s;
4065 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4066 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4067 ? s->current_picture.mb_type[mb_xy-1]
4068 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4069 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4071 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4075 * decodes a P_SKIP or B_SKIP macroblock
4077 static void decode_mb_skip(H264Context *h){
4078 MpegEncContext * const s = &h->s;
4079 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4082 memset(h->non_zero_count[mb_xy], 0, 16);
4083 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4086 mb_type|= MB_TYPE_INTERLACED;
4088 if( h->slice_type == B_TYPE )
4090 // just for fill_caches. pred_direct_motion will set the real mb_type
4091 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4093 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4094 pred_direct_motion(h, &mb_type);
4095 mb_type|= MB_TYPE_SKIP;
4100 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4102 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4103 pred_pskip_motion(h, &mx, &my);
4104 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4105 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4108 write_back_motion(h, mb_type);
4109 s->current_picture.mb_type[mb_xy]= mb_type;
4110 s->current_picture.qscale_table[mb_xy]= s->qscale;
4111 h->slice_table[ mb_xy ]= h->slice_num;
4112 h->prev_mb_skipped= 1;
4116 * decodes a macroblock
4117 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4119 static int decode_mb_cavlc(H264Context *h){
4120 MpegEncContext * const s = &h->s;
4121 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4122 int partition_count;
4123 unsigned int mb_type, cbp;
4124 int dct8x8_allowed= h->pps.transform_8x8_mode;
4126 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4128 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4129 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4131 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4132 if(s->mb_skip_run==-1)
4133 s->mb_skip_run= get_ue_golomb(&s->gb);
4135 if (s->mb_skip_run--) {
4136 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4137 if(s->mb_skip_run==0)
4138 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4140 predict_field_decoding_flag(h);
4147 if( (s->mb_y&1) == 0 )
4148 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4150 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4152 h->prev_mb_skipped= 0;
4154 mb_type= get_ue_golomb(&s->gb);
4155 if(h->slice_type == B_TYPE){
4157 partition_count= b_mb_type_info[mb_type].partition_count;
4158 mb_type= b_mb_type_info[mb_type].type;
4161 goto decode_intra_mb;
4163 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4165 partition_count= p_mb_type_info[mb_type].partition_count;
4166 mb_type= p_mb_type_info[mb_type].type;
4169 goto decode_intra_mb;
4172 assert(h->slice_type == I_TYPE);
4175 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4179 cbp= i_mb_type_info[mb_type].cbp;
4180 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4181 mb_type= i_mb_type_info[mb_type].type;
4185 mb_type |= MB_TYPE_INTERLACED;
4187 h->slice_table[ mb_xy ]= h->slice_num;
4189 if(IS_INTRA_PCM(mb_type)){
4192 // We assume these blocks are very rare so we do not optimize it.
4193 align_get_bits(&s->gb);
4195 // The pixels are stored in the same order as levels in h->mb array.
4196 for(y=0; y<16; y++){
4197 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4198 for(x=0; x<16; x++){
4199 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4200 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4204 const int index= 256 + 4*(y&3) + 32*(y>>2);
4206 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4207 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4211 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4213 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4214 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4218 // In deblocking, the quantizer is 0
4219 s->current_picture.qscale_table[mb_xy]= 0;
4220 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4221 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4222 // All coeffs are present
4223 memset(h->non_zero_count[mb_xy], 16, 16);
4225 s->current_picture.mb_type[mb_xy]= mb_type;
4230 h->ref_count[0] <<= 1;
4231 h->ref_count[1] <<= 1;
4234 fill_caches(h, mb_type, 0);
4237 if(IS_INTRA(mb_type)){
4239 // init_top_left_availability(h);
4240 if(IS_INTRA4x4(mb_type)){
4243 if(dct8x8_allowed && get_bits1(&s->gb)){
4244 mb_type |= MB_TYPE_8x8DCT;
4248 // fill_intra4x4_pred_table(h);
4249 for(i=0; i<16; i+=di){
4250 int mode= pred_intra_mode(h, i);
4252 if(!get_bits1(&s->gb)){
4253 const int rem_mode= get_bits(&s->gb, 3);
4254 mode = rem_mode + (rem_mode >= mode);
4258 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4260 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4262 write_back_intra_pred_mode(h);
4263 if( check_intra4x4_pred_mode(h) < 0)
4266 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4267 if(h->intra16x16_pred_mode < 0)
4271 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4274 h->chroma_pred_mode= pred_mode;
4275 }else if(partition_count==4){
4276 int i, j, sub_partition_count[4], list, ref[2][4];
4278 if(h->slice_type == B_TYPE){
4280 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4281 if(h->sub_mb_type[i] >=13){
4282 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4285 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4286 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4288 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4289 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4290 pred_direct_motion(h, &mb_type);
4291 h->ref_cache[0][scan8[4]] =
4292 h->ref_cache[1][scan8[4]] =
4293 h->ref_cache[0][scan8[12]] =
4294 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4297 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4299 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4300 if(h->sub_mb_type[i] >=4){
4301 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4304 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4305 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4309 for(list=0; list<h->list_count; list++){
4310 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4312 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4313 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4314 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4316 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4328 dct8x8_allowed = get_dct8x8_allowed(h);
4330 for(list=0; list<h->list_count; list++){
4332 if(IS_DIRECT(h->sub_mb_type[i])) {
4333 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4336 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4337 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4339 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4340 const int sub_mb_type= h->sub_mb_type[i];
4341 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4342 for(j=0; j<sub_partition_count[i]; j++){
4344 const int index= 4*i + block_width*j;
4345 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4346 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4347 mx += get_se_golomb(&s->gb);
4348 my += get_se_golomb(&s->gb);
4349 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4351 if(IS_SUB_8X8(sub_mb_type)){
4353 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4355 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4356 }else if(IS_SUB_8X4(sub_mb_type)){
4357 mv_cache[ 1 ][0]= mx;
4358 mv_cache[ 1 ][1]= my;
4359 }else if(IS_SUB_4X8(sub_mb_type)){
4360 mv_cache[ 8 ][0]= mx;
4361 mv_cache[ 8 ][1]= my;
4363 mv_cache[ 0 ][0]= mx;
4364 mv_cache[ 0 ][1]= my;
4367 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4373 }else if(IS_DIRECT(mb_type)){
4374 pred_direct_motion(h, &mb_type);
4375 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4377 int list, mx, my, i;
4378 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4379 if(IS_16X16(mb_type)){
4380 for(list=0; list<h->list_count; list++){
4382 if(IS_DIR(mb_type, 0, list)){
4383 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4384 if(val >= h->ref_count[list]){
4385 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4389 val= LIST_NOT_USED&0xFF;
4390 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4392 for(list=0; list<h->list_count; list++){
4394 if(IS_DIR(mb_type, 0, list)){
4395 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4396 mx += get_se_golomb(&s->gb);
4397 my += get_se_golomb(&s->gb);
4398 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4400 val= pack16to32(mx,my);
4403 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4406 else if(IS_16X8(mb_type)){
4407 for(list=0; list<h->list_count; list++){
4410 if(IS_DIR(mb_type, i, list)){
4411 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4412 if(val >= h->ref_count[list]){
4413 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4417 val= LIST_NOT_USED&0xFF;
4418 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4421 for(list=0; list<h->list_count; list++){
4424 if(IS_DIR(mb_type, i, list)){
4425 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4426 mx += get_se_golomb(&s->gb);
4427 my += get_se_golomb(&s->gb);
4428 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4430 val= pack16to32(mx,my);
4433 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4437 assert(IS_8X16(mb_type));
4438 for(list=0; list<h->list_count; list++){
4441 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4442 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4443 if(val >= h->ref_count[list]){
4444 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4448 val= LIST_NOT_USED&0xFF;
4449 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4452 for(list=0; list<h->list_count; list++){
4455 if(IS_DIR(mb_type, i, list)){
4456 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4457 mx += get_se_golomb(&s->gb);
4458 my += get_se_golomb(&s->gb);
4459 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4461 val= pack16to32(mx,my);
4464 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4470 if(IS_INTER(mb_type))
4471 write_back_motion(h, mb_type);
4473 if(!IS_INTRA16x16(mb_type)){
4474 cbp= get_ue_golomb(&s->gb);
4476 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4480 if(IS_INTRA4x4(mb_type))
4481 cbp= golomb_to_intra4x4_cbp[cbp];
4483 cbp= golomb_to_inter_cbp[cbp];
4487 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4488 if(get_bits1(&s->gb))
4489 mb_type |= MB_TYPE_8x8DCT;
4491 s->current_picture.mb_type[mb_xy]= mb_type;
4493 if(cbp || IS_INTRA16x16(mb_type)){
4494 int i8x8, i4x4, chroma_idx;
4496 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4497 const uint8_t *scan, *scan8x8, *dc_scan;
4499 // fill_non_zero_count_cache(h);
4501 if(IS_INTERLACED(mb_type)){
4502 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4503 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4504 dc_scan= luma_dc_field_scan;
4506 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4507 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4508 dc_scan= luma_dc_zigzag_scan;
4511 dquant= get_se_golomb(&s->gb);
4513 if( dquant > 25 || dquant < -26 ){
4514 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4518 s->qscale += dquant;
4519 if(((unsigned)s->qscale) > 51){
4520 if(s->qscale<0) s->qscale+= 52;
4521 else s->qscale-= 52;
4524 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4525 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4526 if(IS_INTRA16x16(mb_type)){
4527 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4528 return -1; //FIXME continue if partitioned and other return -1 too
4531 assert((cbp&15) == 0 || (cbp&15) == 15);
4534 for(i8x8=0; i8x8<4; i8x8++){
4535 for(i4x4=0; i4x4<4; i4x4++){
4536 const int index= i4x4 + 4*i8x8;
4537 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4543 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4546 for(i8x8=0; i8x8<4; i8x8++){
4547 if(cbp & (1<<i8x8)){
4548 if(IS_8x8DCT(mb_type)){
4549 DCTELEM *buf = &h->mb[64*i8x8];
4551 for(i4x4=0; i4x4<4; i4x4++){
4552 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4553 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4556 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4557 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4559 for(i4x4=0; i4x4<4; i4x4++){
4560 const int index= i4x4 + 4*i8x8;
4562 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4568 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4569 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4575 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4576 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4582 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4583 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4584 for(i4x4=0; i4x4<4; i4x4++){
4585 const int index= 16 + 4*chroma_idx + i4x4;
4586 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4592 uint8_t * const nnz= &h->non_zero_count_cache[0];
4593 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4594 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4597 uint8_t * const nnz= &h->non_zero_count_cache[0];
4598 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4599 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4600 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4602 s->current_picture.qscale_table[mb_xy]= s->qscale;
4603 write_back_non_zero_count(h);
4606 h->ref_count[0] >>= 1;
4607 h->ref_count[1] >>= 1;
4613 static int decode_cabac_field_decoding_flag(H264Context *h) {
4614 MpegEncContext * const s = &h->s;
4615 const int mb_x = s->mb_x;
4616 const int mb_y = s->mb_y & ~1;
4617 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4618 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4620 unsigned int ctx = 0;
4622 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4625 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4629 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4632 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4633 uint8_t *state= &h->cabac_state[ctx_base];
4637 MpegEncContext * const s = &h->s;
4638 const int mba_xy = h->left_mb_xy[0];
4639 const int mbb_xy = h->top_mb_xy;
4641 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4643 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4645 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4646 return 0; /* I4x4 */
4649 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4650 return 0; /* I4x4 */
4653 if( get_cabac_terminate( &h->cabac ) )
4654 return 25; /* PCM */
4656 mb_type = 1; /* I16x16 */
4657 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4658 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4659 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4660 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4661 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4665 static int decode_cabac_mb_type( H264Context *h ) {
4666 MpegEncContext * const s = &h->s;
4668 if( h->slice_type == I_TYPE ) {
4669 return decode_cabac_intra_mb_type(h, 3, 1);
4670 } else if( h->slice_type == P_TYPE ) {
4671 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4673 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4674 /* P_L0_D16x16, P_8x8 */
4675 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4677 /* P_L0_D8x16, P_L0_D16x8 */
4678 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4681 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4683 } else if( h->slice_type == B_TYPE ) {
4684 const int mba_xy = h->left_mb_xy[0];
4685 const int mbb_xy = h->top_mb_xy;
4689 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4691 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4694 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4695 return 0; /* B_Direct_16x16 */
4697 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4698 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4701 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4702 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4703 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4704 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4706 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4707 else if( bits == 13 ) {
4708 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4709 } else if( bits == 14 )
4710 return 11; /* B_L1_L0_8x16 */
4711 else if( bits == 15 )
4712 return 22; /* B_8x8 */
4714 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4715 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4717 /* TODO SI/SP frames? */
4722 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4723 MpegEncContext * const s = &h->s;
4727 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4728 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4731 && h->slice_table[mba_xy] == h->slice_num
4732 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4733 mba_xy += s->mb_stride;
4735 mbb_xy = mb_xy - s->mb_stride;
4737 && h->slice_table[mbb_xy] == h->slice_num
4738 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4739 mbb_xy -= s->mb_stride;
4741 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4743 int mb_xy = mb_x + mb_y*s->mb_stride;
4745 mbb_xy = mb_xy - s->mb_stride;
4748 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4750 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4753 if( h->slice_type == B_TYPE )
4755 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4758 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4761 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4764 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4765 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4766 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4768 if( mode >= pred_mode )
4774 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4775 const int mba_xy = h->left_mb_xy[0];
4776 const int mbb_xy = h->top_mb_xy;
4780 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4781 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4784 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4787 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4790 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4792 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4798 static const uint8_t block_idx_x[16] = {
4799 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
4801 static const uint8_t block_idx_y[16] = {
4802 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
4804 static const uint8_t block_idx_xy[4][4] = {
4811 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4816 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
4818 tprintf(h->s.avctx, "cbp_b = top_cbp = %x\n", cbp_b);
4821 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
4826 x = block_idx_x[4*i8x8];
4827 y = block_idx_y[4*i8x8];
4831 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
4832 cbp_a = h->left_cbp;
4833 tprintf(h->s.avctx, "cbp_a = left_cbp = %x\n", cbp_a);
4839 /* No need to test for skip as we put 0 for skip block */
4840 /* No need to test for IPCM as we put 1 for IPCM block */
4842 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
4843 if( ((cbp_a >> i8x8a)&0x01) == 0 )
4848 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
4849 if( ((cbp_b >> i8x8b)&0x01) == 0 )
4853 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
4859 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4863 cbp_a = (h->left_cbp>>4)&0x03;
4864 cbp_b = (h-> top_cbp>>4)&0x03;
4867 if( cbp_a > 0 ) ctx++;
4868 if( cbp_b > 0 ) ctx += 2;
4869 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4873 if( cbp_a == 2 ) ctx++;
4874 if( cbp_b == 2 ) ctx += 2;
4875 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4877 static int decode_cabac_mb_dqp( H264Context *h) {
4878 MpegEncContext * const s = &h->s;
4884 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
4886 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
4888 if( h->last_qscale_diff != 0 )
4891 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4897 if(val > 102) //prevent infinite loop
4904 return -(val + 1)/2;
4906 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4907 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4909 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4911 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4915 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4917 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4918 return 0; /* B_Direct_8x8 */
4919 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4920 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4922 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4923 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4924 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4927 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4928 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4932 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4933 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4936 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4937 int refa = h->ref_cache[list][scan8[n] - 1];
4938 int refb = h->ref_cache[list][scan8[n] - 8];
4942 if( h->slice_type == B_TYPE) {
4943 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
4945 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
4954 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
4960 if(ref >= 32 /*h->ref_list[list]*/){
4961 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
4962 return 0; //FIXME we should return -1 and check the return everywhere
4968 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
4969 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
4970 abs( h->mvd_cache[list][scan8[n] - 8][l] );
4971 int ctxbase = (l == 0) ? 40 : 47;
4976 else if( amvd > 32 )
4981 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
4986 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
4994 while( get_cabac_bypass( &h->cabac ) ) {
4998 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5003 if( get_cabac_bypass( &h->cabac ) )
5007 return get_cabac_bypass_sign( &h->cabac, -mvd );
5010 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5015 nza = h->left_cbp&0x100;
5016 nzb = h-> top_cbp&0x100;
5017 } else if( cat == 1 || cat == 2 ) {
5018 nza = h->non_zero_count_cache[scan8[idx] - 1];
5019 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5020 } else if( cat == 3 ) {
5021 nza = (h->left_cbp>>(6+idx))&0x01;
5022 nzb = (h-> top_cbp>>(6+idx))&0x01;
5025 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5026 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5035 return ctx + 4 * cat;
5038 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5039 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5040 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5041 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5042 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5045 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5046 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5047 static const int significant_coeff_flag_offset[2][6] = {
5048 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5049 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5051 static const int last_coeff_flag_offset[2][6] = {
5052 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5053 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5055 static const int coeff_abs_level_m1_offset[6] = {
5056 227+0, 227+10, 227+20, 227+30, 227+39, 426
5058 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5059 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5060 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5061 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5062 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5063 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5064 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5065 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5066 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5072 int coeff_count = 0;
5075 int abslevelgt1 = 0;
5077 uint8_t *significant_coeff_ctx_base;
5078 uint8_t *last_coeff_ctx_base;
5079 uint8_t *abs_level_m1_ctx_base;
5082 #define CABAC_ON_STACK
5084 #ifdef CABAC_ON_STACK
5087 cc.range = h->cabac.range;
5088 cc.low = h->cabac.low;
5089 cc.bytestream= h->cabac.bytestream;
5091 #define CC &h->cabac
5095 /* cat: 0-> DC 16x16 n = 0
5096 * 1-> AC 16x16 n = luma4x4idx
5097 * 2-> Luma4x4 n = luma4x4idx
5098 * 3-> DC Chroma n = iCbCr
5099 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5100 * 5-> Luma8x8 n = 4 * luma8x8idx
5103 /* read coded block flag */
5105 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5106 if( cat == 1 || cat == 2 )
5107 h->non_zero_count_cache[scan8[n]] = 0;
5109 h->non_zero_count_cache[scan8[16+n]] = 0;
5110 #ifdef CABAC_ON_STACK
5111 h->cabac.range = cc.range ;
5112 h->cabac.low = cc.low ;
5113 h->cabac.bytestream= cc.bytestream;
5119 significant_coeff_ctx_base = h->cabac_state
5120 + significant_coeff_flag_offset[MB_FIELD][cat];
5121 last_coeff_ctx_base = h->cabac_state
5122 + last_coeff_flag_offset[MB_FIELD][cat];
5123 abs_level_m1_ctx_base = h->cabac_state
5124 + coeff_abs_level_m1_offset[cat];
5127 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5128 for(last= 0; last < coefs; last++) { \
5129 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5130 if( get_cabac( CC, sig_ctx )) { \
5131 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5132 index[coeff_count++] = last; \
5133 if( get_cabac( CC, last_ctx ) ) { \
5139 if( last == max_coeff -1 ) {\
5140 index[coeff_count++] = last;\
5142 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5143 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5144 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5146 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5148 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5150 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5153 assert(coeff_count > 0);
5156 h->cbp_table[mb_xy] |= 0x100;
5157 else if( cat == 1 || cat == 2 )
5158 h->non_zero_count_cache[scan8[n]] = coeff_count;
5160 h->cbp_table[mb_xy] |= 0x40 << n;
5162 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5165 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5168 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5169 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5170 int j= scantable[index[coeff_count]];
5172 if( get_cabac( CC, ctx ) == 0 ) {
5174 block[j] = get_cabac_bypass_sign( CC, -1);
5176 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5182 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5183 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5187 if( coeff_abs >= 15 ) {
5189 while( get_cabac_bypass( CC ) ) {
5195 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5201 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5202 else block[j] = coeff_abs;
5204 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5205 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5211 #ifdef CABAC_ON_STACK
5212 h->cabac.range = cc.range ;
5213 h->cabac.low = cc.low ;
5214 h->cabac.bytestream= cc.bytestream;
5219 static inline void compute_mb_neighbors(H264Context *h)
5221 MpegEncContext * const s = &h->s;
5222 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5223 h->top_mb_xy = mb_xy - s->mb_stride;
5224 h->left_mb_xy[0] = mb_xy - 1;
5226 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5227 const int top_pair_xy = pair_xy - s->mb_stride;
5228 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5229 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5230 const int curr_mb_frame_flag = !MB_FIELD;
5231 const int bottom = (s->mb_y & 1);
5233 ? !curr_mb_frame_flag // bottom macroblock
5234 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5236 h->top_mb_xy -= s->mb_stride;
5238 if (left_mb_frame_flag != curr_mb_frame_flag) {
5239 h->left_mb_xy[0] = pair_xy - 1;
5246 * decodes a macroblock
5247 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5249 static int decode_mb_cabac(H264Context *h) {
5250 MpegEncContext * const s = &h->s;
5251 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5252 int mb_type, partition_count, cbp = 0;
5253 int dct8x8_allowed= h->pps.transform_8x8_mode;
5255 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5257 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5258 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5260 /* a skipped mb needs the aff flag from the following mb */
5261 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5262 predict_field_decoding_flag(h);
5263 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5264 skip = h->next_mb_skipped;
5266 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5267 /* read skip flags */
5269 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5270 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5271 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5272 if(h->next_mb_skipped)
5273 predict_field_decoding_flag(h);
5275 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5280 h->cbp_table[mb_xy] = 0;
5281 h->chroma_pred_mode_table[mb_xy] = 0;
5282 h->last_qscale_diff = 0;
5289 if( (s->mb_y&1) == 0 )
5291 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5293 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5295 h->prev_mb_skipped = 0;
5297 compute_mb_neighbors(h);
5298 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5299 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5303 if( h->slice_type == B_TYPE ) {
5305 partition_count= b_mb_type_info[mb_type].partition_count;
5306 mb_type= b_mb_type_info[mb_type].type;
5309 goto decode_intra_mb;
5311 } else if( h->slice_type == P_TYPE ) {
5313 partition_count= p_mb_type_info[mb_type].partition_count;
5314 mb_type= p_mb_type_info[mb_type].type;
5317 goto decode_intra_mb;
5320 assert(h->slice_type == I_TYPE);
5322 partition_count = 0;
5323 cbp= i_mb_type_info[mb_type].cbp;
5324 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5325 mb_type= i_mb_type_info[mb_type].type;
5328 mb_type |= MB_TYPE_INTERLACED;
5330 h->slice_table[ mb_xy ]= h->slice_num;
5332 if(IS_INTRA_PCM(mb_type)) {
5336 // We assume these blocks are very rare so we do not optimize it.
5337 // FIXME The two following lines get the bitstream position in the cabac
5338 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5339 ptr= h->cabac.bytestream;
5340 if(h->cabac.low&0x1) ptr--;
5342 if(h->cabac.low&0x1FF) ptr--;
5345 // The pixels are stored in the same order as levels in h->mb array.
5346 for(y=0; y<16; y++){
5347 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5348 for(x=0; x<16; x++){
5349 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5350 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5354 const int index= 256 + 4*(y&3) + 32*(y>>2);
5356 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5357 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5361 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5363 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5364 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5368 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5370 // All blocks are present
5371 h->cbp_table[mb_xy] = 0x1ef;
5372 h->chroma_pred_mode_table[mb_xy] = 0;
5373 // In deblocking, the quantizer is 0
5374 s->current_picture.qscale_table[mb_xy]= 0;
5375 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5376 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5377 // All coeffs are present
5378 memset(h->non_zero_count[mb_xy], 16, 16);
5379 s->current_picture.mb_type[mb_xy]= mb_type;
5384 h->ref_count[0] <<= 1;
5385 h->ref_count[1] <<= 1;
5388 fill_caches(h, mb_type, 0);
5390 if( IS_INTRA( mb_type ) ) {
5392 if( IS_INTRA4x4( mb_type ) ) {
5393 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5394 mb_type |= MB_TYPE_8x8DCT;
5395 for( i = 0; i < 16; i+=4 ) {
5396 int pred = pred_intra_mode( h, i );
5397 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5398 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5401 for( i = 0; i < 16; i++ ) {
5402 int pred = pred_intra_mode( h, i );
5403 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5405 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5408 write_back_intra_pred_mode(h);
5409 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5411 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5412 if( h->intra16x16_pred_mode < 0 ) return -1;
5414 h->chroma_pred_mode_table[mb_xy] =
5415 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5417 pred_mode= check_intra_pred_mode( h, pred_mode );
5418 if( pred_mode < 0 ) return -1;
5419 h->chroma_pred_mode= pred_mode;
5420 } else if( partition_count == 4 ) {
5421 int i, j, sub_partition_count[4], list, ref[2][4];
5423 if( h->slice_type == B_TYPE ) {
5424 for( i = 0; i < 4; i++ ) {
5425 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5426 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5427 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5429 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5430 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5431 pred_direct_motion(h, &mb_type);
5432 h->ref_cache[0][scan8[4]] =
5433 h->ref_cache[1][scan8[4]] =
5434 h->ref_cache[0][scan8[12]] =
5435 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5436 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5437 for( i = 0; i < 4; i++ )
5438 if( IS_DIRECT(h->sub_mb_type[i]) )
5439 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5443 for( i = 0; i < 4; i++ ) {
5444 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5445 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5446 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5450 for( list = 0; list < h->list_count; list++ ) {
5451 for( i = 0; i < 4; i++ ) {
5452 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5453 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5454 if( h->ref_count[list] > 1 )
5455 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5461 h->ref_cache[list][ scan8[4*i]+1 ]=
5462 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5467 dct8x8_allowed = get_dct8x8_allowed(h);
5469 for(list=0; list<h->list_count; list++){
5471 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5472 if(IS_DIRECT(h->sub_mb_type[i])){
5473 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5477 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5478 const int sub_mb_type= h->sub_mb_type[i];
5479 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5480 for(j=0; j<sub_partition_count[i]; j++){
5483 const int index= 4*i + block_width*j;
5484 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5485 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5486 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5488 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5489 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5490 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5492 if(IS_SUB_8X8(sub_mb_type)){
5494 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5496 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5499 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5501 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5502 }else if(IS_SUB_8X4(sub_mb_type)){
5503 mv_cache[ 1 ][0]= mx;
5504 mv_cache[ 1 ][1]= my;
5506 mvd_cache[ 1 ][0]= mx - mpx;
5507 mvd_cache[ 1 ][1]= my - mpy;
5508 }else if(IS_SUB_4X8(sub_mb_type)){
5509 mv_cache[ 8 ][0]= mx;
5510 mv_cache[ 8 ][1]= my;
5512 mvd_cache[ 8 ][0]= mx - mpx;
5513 mvd_cache[ 8 ][1]= my - mpy;
5515 mv_cache[ 0 ][0]= mx;
5516 mv_cache[ 0 ][1]= my;
5518 mvd_cache[ 0 ][0]= mx - mpx;
5519 mvd_cache[ 0 ][1]= my - mpy;
5522 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5523 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5524 p[0] = p[1] = p[8] = p[9] = 0;
5525 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5529 } else if( IS_DIRECT(mb_type) ) {
5530 pred_direct_motion(h, &mb_type);
5531 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5532 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5533 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5535 int list, mx, my, i, mpx, mpy;
5536 if(IS_16X16(mb_type)){
5537 for(list=0; list<h->list_count; list++){
5538 if(IS_DIR(mb_type, 0, list)){
5539 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5540 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5542 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5544 for(list=0; list<h->list_count; list++){
5545 if(IS_DIR(mb_type, 0, list)){
5546 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5548 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5549 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5550 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5552 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5553 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5555 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5558 else if(IS_16X8(mb_type)){
5559 for(list=0; list<h->list_count; list++){
5561 if(IS_DIR(mb_type, i, list)){
5562 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5563 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5565 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5568 for(list=0; list<h->list_count; list++){
5570 if(IS_DIR(mb_type, i, list)){
5571 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5572 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5573 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5574 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5576 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5577 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5579 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5580 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5585 assert(IS_8X16(mb_type));
5586 for(list=0; list<h->list_count; list++){
5588 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5589 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5590 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5592 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5595 for(list=0; list<h->list_count; list++){
5597 if(IS_DIR(mb_type, i, list)){
5598 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5599 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5600 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5602 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5603 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5604 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5606 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5607 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5614 if( IS_INTER( mb_type ) ) {
5615 h->chroma_pred_mode_table[mb_xy] = 0;
5616 write_back_motion( h, mb_type );
5619 if( !IS_INTRA16x16( mb_type ) ) {
5620 cbp = decode_cabac_mb_cbp_luma( h );
5621 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5624 h->cbp_table[mb_xy] = h->cbp = cbp;
5626 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5627 if( decode_cabac_mb_transform_size( h ) )
5628 mb_type |= MB_TYPE_8x8DCT;
5630 s->current_picture.mb_type[mb_xy]= mb_type;
5632 if( cbp || IS_INTRA16x16( mb_type ) ) {
5633 const uint8_t *scan, *scan8x8, *dc_scan;
5636 if(IS_INTERLACED(mb_type)){
5637 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5638 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5639 dc_scan= luma_dc_field_scan;
5641 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5642 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5643 dc_scan= luma_dc_zigzag_scan;
5646 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5647 if( dqp == INT_MIN ){
5648 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5652 if(((unsigned)s->qscale) > 51){
5653 if(s->qscale<0) s->qscale+= 52;
5654 else s->qscale-= 52;
5656 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5657 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5659 if( IS_INTRA16x16( mb_type ) ) {
5661 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5662 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5665 for( i = 0; i < 16; i++ ) {
5666 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5667 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15);
5670 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5674 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5675 if( cbp & (1<<i8x8) ) {
5676 if( IS_8x8DCT(mb_type) ) {
5677 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5678 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5680 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5681 const int index = 4*i8x8 + i4x4;
5682 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5684 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16);
5685 //STOP_TIMER("decode_residual")
5688 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5689 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5696 for( c = 0; c < 2; c++ ) {
5697 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5698 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5704 for( c = 0; c < 2; c++ ) {
5705 const uint32_t *qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5706 for( i = 0; i < 4; i++ ) {
5707 const int index = 16 + 4 * c + i;
5708 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5709 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5713 uint8_t * const nnz= &h->non_zero_count_cache[0];
5714 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5715 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5718 uint8_t * const nnz= &h->non_zero_count_cache[0];
5719 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5720 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5721 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5722 h->last_qscale_diff = 0;
5725 s->current_picture.qscale_table[mb_xy]= s->qscale;
5726 write_back_non_zero_count(h);
5729 h->ref_count[0] >>= 1;
5730 h->ref_count[1] >>= 1;
5737 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5739 const int index_a = qp + h->slice_alpha_c0_offset;
5740 const int alpha = (alpha_table+52)[index_a];
5741 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5746 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5747 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5749 /* 16px edge length, because bS=4 is triggered by being at
5750 * the edge of an intra MB, so all 4 bS are the same */
5751 for( d = 0; d < 16; d++ ) {
5752 const int p0 = pix[-1];
5753 const int p1 = pix[-2];
5754 const int p2 = pix[-3];
5756 const int q0 = pix[0];
5757 const int q1 = pix[1];
5758 const int q2 = pix[2];
5760 if( FFABS( p0 - q0 ) < alpha &&
5761 FFABS( p1 - p0 ) < beta &&
5762 FFABS( q1 - q0 ) < beta ) {
5764 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5765 if( FFABS( p2 - p0 ) < beta)
5767 const int p3 = pix[-4];
5769 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5770 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5771 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5774 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5776 if( FFABS( q2 - q0 ) < beta)
5778 const int q3 = pix[3];
5780 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5781 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5782 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5785 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5789 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5790 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5792 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5798 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5800 const int index_a = qp + h->slice_alpha_c0_offset;
5801 const int alpha = (alpha_table+52)[index_a];
5802 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5807 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5808 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5810 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5814 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5816 for( i = 0; i < 16; i++, pix += stride) {
5822 int bS_index = (i >> 1);
5825 bS_index |= (i & 1);
5828 if( bS[bS_index] == 0 ) {
5832 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5833 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5834 alpha = (alpha_table+52)[index_a];
5835 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5837 if( bS[bS_index] < 4 ) {
5838 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5839 const int p0 = pix[-1];
5840 const int p1 = pix[-2];
5841 const int p2 = pix[-3];
5842 const int q0 = pix[0];
5843 const int q1 = pix[1];
5844 const int q2 = pix[2];
5846 if( FFABS( p0 - q0 ) < alpha &&
5847 FFABS( p1 - p0 ) < beta &&
5848 FFABS( q1 - q0 ) < beta ) {
5852 if( FFABS( p2 - p0 ) < beta ) {
5853 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5856 if( FFABS( q2 - q0 ) < beta ) {
5857 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5861 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5862 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5863 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5864 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5867 const int p0 = pix[-1];
5868 const int p1 = pix[-2];
5869 const int p2 = pix[-3];
5871 const int q0 = pix[0];
5872 const int q1 = pix[1];
5873 const int q2 = pix[2];
5875 if( FFABS( p0 - q0 ) < alpha &&
5876 FFABS( p1 - p0 ) < beta &&
5877 FFABS( q1 - q0 ) < beta ) {
5879 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5880 if( FFABS( p2 - p0 ) < beta)
5882 const int p3 = pix[-4];
5884 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5885 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5886 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5889 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5891 if( FFABS( q2 - q0 ) < beta)
5893 const int q3 = pix[3];
5895 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5896 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5897 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5900 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5904 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5905 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5907 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5912 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5914 for( i = 0; i < 8; i++, pix += stride) {
5922 if( bS[bS_index] == 0 ) {
5926 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5927 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5928 alpha = (alpha_table+52)[index_a];
5929 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5931 if( bS[bS_index] < 4 ) {
5932 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
5933 const int p0 = pix[-1];
5934 const int p1 = pix[-2];
5935 const int q0 = pix[0];
5936 const int q1 = pix[1];
5938 if( FFABS( p0 - q0 ) < alpha &&
5939 FFABS( p1 - p0 ) < beta &&
5940 FFABS( q1 - q0 ) < beta ) {
5941 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5943 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5944 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5945 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5948 const int p0 = pix[-1];
5949 const int p1 = pix[-2];
5950 const int q0 = pix[0];
5951 const int q1 = pix[1];
5953 if( FFABS( p0 - q0 ) < alpha &&
5954 FFABS( p1 - p0 ) < beta &&
5955 FFABS( q1 - q0 ) < beta ) {
5957 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
5958 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
5959 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5965 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5967 const int index_a = qp + h->slice_alpha_c0_offset;
5968 const int alpha = (alpha_table+52)[index_a];
5969 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5970 const int pix_next = stride;
5975 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5976 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
5978 /* 16px edge length, see filter_mb_edgev */
5979 for( d = 0; d < 16; d++ ) {
5980 const int p0 = pix[-1*pix_next];
5981 const int p1 = pix[-2*pix_next];
5982 const int p2 = pix[-3*pix_next];
5983 const int q0 = pix[0];
5984 const int q1 = pix[1*pix_next];
5985 const int q2 = pix[2*pix_next];
5987 if( FFABS( p0 - q0 ) < alpha &&
5988 FFABS( p1 - p0 ) < beta &&
5989 FFABS( q1 - q0 ) < beta ) {
5991 const int p3 = pix[-4*pix_next];
5992 const int q3 = pix[ 3*pix_next];
5994 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5995 if( FFABS( p2 - p0 ) < beta) {
5997 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5998 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5999 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6002 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6004 if( FFABS( q2 - q0 ) < beta) {
6006 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6007 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6008 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6011 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6015 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6016 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6018 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6025 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6027 const int index_a = qp + h->slice_alpha_c0_offset;
6028 const int alpha = (alpha_table+52)[index_a];
6029 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6034 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6035 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6037 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6041 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6042 MpegEncContext * const s = &h->s;
6044 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6046 mb_xy = mb_x + mb_y*s->mb_stride;
6048 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6049 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6050 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6051 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6054 assert(!FRAME_MBAFF);
6056 mb_type = s->current_picture.mb_type[mb_xy];
6057 qp = s->current_picture.qscale_table[mb_xy];
6058 qp0 = s->current_picture.qscale_table[mb_xy-1];
6059 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6060 qpc = get_chroma_qp( h, 0, qp );
6061 qpc0 = get_chroma_qp( h, 0, qp0 );
6062 qpc1 = get_chroma_qp( h, 0, qp1 );
6063 qp0 = (qp + qp0 + 1) >> 1;
6064 qp1 = (qp + qp1 + 1) >> 1;
6065 qpc0 = (qpc + qpc0 + 1) >> 1;
6066 qpc1 = (qpc + qpc1 + 1) >> 1;
6067 qp_thresh = 15 - h->slice_alpha_c0_offset;
6068 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6069 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6072 if( IS_INTRA(mb_type) ) {
6073 int16_t bS4[4] = {4,4,4,4};
6074 int16_t bS3[4] = {3,3,3,3};
6075 if( IS_8x8DCT(mb_type) ) {
6076 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6077 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6078 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6079 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6081 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6082 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6083 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6084 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6085 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6086 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6087 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6088 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6090 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6091 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6092 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6093 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6094 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6095 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6096 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6097 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6100 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6101 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6103 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6105 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6107 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6108 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6109 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6110 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6112 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6113 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6114 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6115 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6117 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6118 bSv[0][0] = 0x0004000400040004ULL;
6119 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6120 bSv[1][0] = 0x0004000400040004ULL;
6122 #define FILTER(hv,dir,edge)\
6123 if(bSv[dir][edge]) {\
6124 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6126 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6127 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6133 } else if( IS_8x8DCT(mb_type) ) {
6152 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6153 MpegEncContext * const s = &h->s;
6154 const int mb_xy= mb_x + mb_y*s->mb_stride;
6155 const int mb_type = s->current_picture.mb_type[mb_xy];
6156 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6157 int first_vertical_edge_done = 0;
6159 /* FIXME: A given frame may occupy more than one position in
6160 * the reference list. So ref2frm should be populated with
6161 * frame numbers, not indices. */
6162 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6163 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6165 //for sufficiently low qp, filtering wouldn't do anything
6166 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6168 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6169 int qp = s->current_picture.qscale_table[mb_xy];
6171 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6172 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6178 // left mb is in picture
6179 && h->slice_table[mb_xy-1] != 255
6180 // and current and left pair do not have the same interlaced type
6181 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6182 // and left mb is in the same slice if deblocking_filter == 2
6183 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6184 /* First vertical edge is different in MBAFF frames
6185 * There are 8 different bS to compute and 2 different Qp
6187 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6188 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6193 int mb_qp, mbn0_qp, mbn1_qp;
6195 first_vertical_edge_done = 1;
6197 if( IS_INTRA(mb_type) )
6198 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6200 for( i = 0; i < 8; i++ ) {
6201 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6203 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6205 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6206 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6207 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6214 mb_qp = s->current_picture.qscale_table[mb_xy];
6215 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6216 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6217 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6218 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6219 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6220 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6221 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6222 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6223 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6224 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6225 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6226 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6229 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6230 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6231 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6232 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6233 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6235 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6236 for( dir = 0; dir < 2; dir++ )
6239 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6240 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6241 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6243 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6244 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6245 // how often to recheck mv-based bS when iterating between edges
6246 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6247 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6248 // how often to recheck mv-based bS when iterating along each edge
6249 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6251 if (first_vertical_edge_done) {
6253 first_vertical_edge_done = 0;
6256 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6259 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6260 && !IS_INTERLACED(mb_type)
6261 && IS_INTERLACED(mbm_type)
6263 // This is a special case in the norm where the filtering must
6264 // be done twice (one each of the field) even if we are in a
6265 // frame macroblock.
6267 static const int nnz_idx[4] = {4,5,6,3};
6268 unsigned int tmp_linesize = 2 * linesize;
6269 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6270 int mbn_xy = mb_xy - 2 * s->mb_stride;
6275 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6276 if( IS_INTRA(mb_type) ||
6277 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6278 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6280 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6281 for( i = 0; i < 4; i++ ) {
6282 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6283 mbn_nnz[nnz_idx[i]] != 0 )
6289 // Do not use s->qscale as luma quantizer because it has not the same
6290 // value in IPCM macroblocks.
6291 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6292 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6293 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6294 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6295 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6296 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6297 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6298 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6305 for( edge = start; edge < edges; edge++ ) {
6306 /* mbn_xy: neighbor macroblock */
6307 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6308 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6312 if( (edge&1) && IS_8x8DCT(mb_type) )
6315 if( IS_INTRA(mb_type) ||
6316 IS_INTRA(mbn_type) ) {
6319 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6320 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6329 bS[0] = bS[1] = bS[2] = bS[3] = value;
6334 if( edge & mask_edge ) {
6335 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6338 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6339 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6342 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6343 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6344 int bn_idx= b_idx - (dir ? 8:1);
6346 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6347 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6348 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6349 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6351 bS[0] = bS[1] = bS[2] = bS[3] = v;
6357 for( i = 0; i < 4; i++ ) {
6358 int x = dir == 0 ? edge : i;
6359 int y = dir == 0 ? i : edge;
6360 int b_idx= 8 + 4 + x + 8*y;
6361 int bn_idx= b_idx - (dir ? 8:1);
6363 if( h->non_zero_count_cache[b_idx] != 0 ||
6364 h->non_zero_count_cache[bn_idx] != 0 ) {
6370 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6371 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6372 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6373 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6381 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6386 // Do not use s->qscale as luma quantizer because it has not the same
6387 // value in IPCM macroblocks.
6388 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6389 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6390 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6391 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6393 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6394 if( (edge&1) == 0 ) {
6395 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6396 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6397 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6398 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6401 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6402 if( (edge&1) == 0 ) {
6403 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6404 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6405 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6406 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6413 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6414 MpegEncContext * const s = &h->s;
6415 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6419 if( h->pps.cabac ) {
6423 align_get_bits( &s->gb );
6426 ff_init_cabac_states( &h->cabac);
6427 ff_init_cabac_decoder( &h->cabac,
6428 s->gb.buffer + get_bits_count(&s->gb)/8,
6429 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6430 /* calculate pre-state */
6431 for( i= 0; i < 460; i++ ) {
6433 if( h->slice_type == I_TYPE )
6434 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6436 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6439 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6441 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6446 int ret = decode_mb_cabac(h);
6448 //STOP_TIMER("decode_mb_cabac")
6450 if(ret>=0) hl_decode_mb(h);
6452 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6455 if(ret>=0) ret = decode_mb_cabac(h);
6457 if(ret>=0) hl_decode_mb(h);
6460 eos = get_cabac_terminate( &h->cabac );
6462 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6463 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6464 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6468 if( ++s->mb_x >= s->mb_width ) {
6470 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6477 if( eos || s->mb_y >= s->mb_height ) {
6478 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6479 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6486 int ret = decode_mb_cavlc(h);
6488 if(ret>=0) hl_decode_mb(h);
6490 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6492 ret = decode_mb_cavlc(h);
6494 if(ret>=0) hl_decode_mb(h);
6499 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6500 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6505 if(++s->mb_x >= s->mb_width){
6507 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6512 if(s->mb_y >= s->mb_height){
6513 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6515 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6516 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6520 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6527 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6528 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6529 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6530 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6534 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6543 for(;s->mb_y < s->mb_height; s->mb_y++){
6544 for(;s->mb_x < s->mb_width; s->mb_x++){
6545 int ret= decode_mb(h);
6550 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6551 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6556 if(++s->mb_x >= s->mb_width){
6558 if(++s->mb_y >= s->mb_height){
6559 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6560 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6564 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6571 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6572 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6573 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6577 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6584 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6587 return -1; //not reached
6590 static int decode_unregistered_user_data(H264Context *h, int size){
6591 MpegEncContext * const s = &h->s;
6592 uint8_t user_data[16+256];
6598 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6599 user_data[i]= get_bits(&s->gb, 8);
6603 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6604 if(e==1 && build>=0)
6605 h->x264_build= build;
6607 if(s->avctx->debug & FF_DEBUG_BUGS)
6608 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6611 skip_bits(&s->gb, 8);
6616 static int decode_sei(H264Context *h){
6617 MpegEncContext * const s = &h->s;
6619 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6624 type+= show_bits(&s->gb, 8);
6625 }while(get_bits(&s->gb, 8) == 255);
6629 size+= show_bits(&s->gb, 8);
6630 }while(get_bits(&s->gb, 8) == 255);
6634 if(decode_unregistered_user_data(h, size) < 0)
6638 skip_bits(&s->gb, 8*size);
6641 //FIXME check bits here
6642 align_get_bits(&s->gb);
6648 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6649 MpegEncContext * const s = &h->s;
6651 cpb_count = get_ue_golomb(&s->gb) + 1;
6652 get_bits(&s->gb, 4); /* bit_rate_scale */
6653 get_bits(&s->gb, 4); /* cpb_size_scale */
6654 for(i=0; i<cpb_count; i++){
6655 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6656 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6657 get_bits1(&s->gb); /* cbr_flag */
6659 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6660 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6661 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6662 get_bits(&s->gb, 5); /* time_offset_length */
6665 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6666 MpegEncContext * const s = &h->s;
6667 int aspect_ratio_info_present_flag;
6668 unsigned int aspect_ratio_idc;
6669 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6671 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6673 if( aspect_ratio_info_present_flag ) {
6674 aspect_ratio_idc= get_bits(&s->gb, 8);
6675 if( aspect_ratio_idc == EXTENDED_SAR ) {
6676 sps->sar.num= get_bits(&s->gb, 16);
6677 sps->sar.den= get_bits(&s->gb, 16);
6678 }else if(aspect_ratio_idc < 14){
6679 sps->sar= pixel_aspect[aspect_ratio_idc];
6681 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6688 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6690 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6691 get_bits1(&s->gb); /* overscan_appropriate_flag */
6694 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6695 get_bits(&s->gb, 3); /* video_format */
6696 get_bits1(&s->gb); /* video_full_range_flag */
6697 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6698 get_bits(&s->gb, 8); /* colour_primaries */
6699 get_bits(&s->gb, 8); /* transfer_characteristics */
6700 get_bits(&s->gb, 8); /* matrix_coefficients */
6704 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6705 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6706 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6709 sps->timing_info_present_flag = get_bits1(&s->gb);
6710 if(sps->timing_info_present_flag){
6711 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6712 sps->time_scale = get_bits_long(&s->gb, 32);
6713 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6716 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6717 if(nal_hrd_parameters_present_flag)
6718 decode_hrd_parameters(h, sps);
6719 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6720 if(vcl_hrd_parameters_present_flag)
6721 decode_hrd_parameters(h, sps);
6722 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6723 get_bits1(&s->gb); /* low_delay_hrd_flag */
6724 get_bits1(&s->gb); /* pic_struct_present_flag */
6726 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6727 if(sps->bitstream_restriction_flag){
6728 unsigned int num_reorder_frames;
6729 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6730 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6731 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6732 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6733 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6734 num_reorder_frames= get_ue_golomb(&s->gb);
6735 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6737 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6738 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6742 sps->num_reorder_frames= num_reorder_frames;
6748 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6749 const uint8_t *jvt_list, const uint8_t *fallback_list){
6750 MpegEncContext * const s = &h->s;
6751 int i, last = 8, next = 8;
6752 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6753 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6754 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6756 for(i=0;i<size;i++){
6758 next = (last + get_se_golomb(&s->gb)) & 0xff;
6759 if(!i && !next){ /* matrix not written, we use the preset one */
6760 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6763 last = factors[scan[i]] = next ? next : last;
6767 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6768 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6769 MpegEncContext * const s = &h->s;
6770 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6771 const uint8_t *fallback[4] = {
6772 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6773 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6774 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6775 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6777 if(get_bits1(&s->gb)){
6778 sps->scaling_matrix_present |= is_sps;
6779 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6780 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6781 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6782 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6783 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6784 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6785 if(is_sps || pps->transform_8x8_mode){
6786 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6787 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6789 } else if(fallback_sps) {
6790 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
6791 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
6796 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
6799 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
6800 const size_t size, const char *name)
6803 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
6808 vec[id] = av_mallocz(size);
6810 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
6815 static inline int decode_seq_parameter_set(H264Context *h){
6816 MpegEncContext * const s = &h->s;
6817 int profile_idc, level_idc;
6818 unsigned int sps_id, tmp, mb_width, mb_height;
6822 profile_idc= get_bits(&s->gb, 8);
6823 get_bits1(&s->gb); //constraint_set0_flag
6824 get_bits1(&s->gb); //constraint_set1_flag
6825 get_bits1(&s->gb); //constraint_set2_flag
6826 get_bits1(&s->gb); //constraint_set3_flag
6827 get_bits(&s->gb, 4); // reserved
6828 level_idc= get_bits(&s->gb, 8);
6829 sps_id= get_ue_golomb(&s->gb);
6831 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
6835 sps->profile_idc= profile_idc;
6836 sps->level_idc= level_idc;
6838 if(sps->profile_idc >= 100){ //high profile
6839 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
6840 get_bits1(&s->gb); //residual_color_transform_flag
6841 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6842 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6843 sps->transform_bypass = get_bits1(&s->gb);
6844 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6846 sps->scaling_matrix_present = 0;
6848 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6849 sps->poc_type= get_ue_golomb(&s->gb);
6851 if(sps->poc_type == 0){ //FIXME #define
6852 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6853 } else if(sps->poc_type == 1){//FIXME #define
6854 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6855 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6856 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6857 tmp= get_ue_golomb(&s->gb);
6859 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
6860 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
6863 sps->poc_cycle_length= tmp;
6865 for(i=0; i<sps->poc_cycle_length; i++)
6866 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6867 }else if(sps->poc_type != 2){
6868 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6872 tmp= get_ue_golomb(&s->gb);
6873 if(tmp > MAX_PICTURE_COUNT-2){
6874 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
6876 sps->ref_frame_count= tmp;
6877 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
6878 mb_width= get_ue_golomb(&s->gb) + 1;
6879 mb_height= get_ue_golomb(&s->gb) + 1;
6880 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
6881 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
6882 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
6885 sps->mb_width = mb_width;
6886 sps->mb_height= mb_height;
6888 sps->frame_mbs_only_flag= get_bits1(&s->gb);
6889 if(!sps->frame_mbs_only_flag)
6890 sps->mb_aff= get_bits1(&s->gb);
6894 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
6896 #ifndef ALLOW_INTERLACE
6898 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
6900 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
6901 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
6903 sps->crop= get_bits1(&s->gb);
6905 sps->crop_left = get_ue_golomb(&s->gb);
6906 sps->crop_right = get_ue_golomb(&s->gb);
6907 sps->crop_top = get_ue_golomb(&s->gb);
6908 sps->crop_bottom= get_ue_golomb(&s->gb);
6909 if(sps->crop_left || sps->crop_top){
6910 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
6916 sps->crop_bottom= 0;
6919 sps->vui_parameters_present_flag= get_bits1(&s->gb);
6920 if( sps->vui_parameters_present_flag )
6921 decode_vui_parameters(h, sps);
6923 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
6924 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
6925 sps_id, sps->profile_idc, sps->level_idc,
6927 sps->ref_frame_count,
6928 sps->mb_width, sps->mb_height,
6929 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
6930 sps->direct_8x8_inference_flag ? "8B8" : "",
6931 sps->crop_left, sps->crop_right,
6932 sps->crop_top, sps->crop_bottom,
6933 sps->vui_parameters_present_flag ? "VUI" : ""
6940 build_qp_table(PPS *pps, int t, int index)
6943 for(i = 0; i < 255; i++)
6944 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
6947 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
6948 MpegEncContext * const s = &h->s;
6949 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
6952 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
6956 tmp= get_ue_golomb(&s->gb);
6957 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
6958 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
6963 pps->cabac= get_bits1(&s->gb);
6964 pps->pic_order_present= get_bits1(&s->gb);
6965 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
6966 if(pps->slice_group_count > 1 ){
6967 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
6968 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
6969 switch(pps->mb_slice_group_map_type){
6972 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
6973 | run_length[ i ] |1 |ue(v) |
6978 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
6980 | top_left_mb[ i ] |1 |ue(v) |
6981 | bottom_right_mb[ i ] |1 |ue(v) |
6989 | slice_group_change_direction_flag |1 |u(1) |
6990 | slice_group_change_rate_minus1 |1 |ue(v) |
6995 | slice_group_id_cnt_minus1 |1 |ue(v) |
6996 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
6998 | slice_group_id[ i ] |1 |u(v) |
7003 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7004 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7005 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7006 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7007 pps->ref_count[0]= pps->ref_count[1]= 1;
7011 pps->weighted_pred= get_bits1(&s->gb);
7012 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7013 pps->init_qp= get_se_golomb(&s->gb) + 26;
7014 pps->init_qs= get_se_golomb(&s->gb) + 26;
7015 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7016 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7017 pps->constrained_intra_pred= get_bits1(&s->gb);
7018 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7020 pps->transform_8x8_mode= 0;
7021 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7022 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7023 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7025 if(get_bits_count(&s->gb) < bit_length){
7026 pps->transform_8x8_mode= get_bits1(&s->gb);
7027 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7028 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7030 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7033 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7034 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7035 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7036 h->pps.chroma_qp_diff= 1;
7038 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7040 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7041 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7042 pps_id, pps->sps_id,
7043 pps->cabac ? "CABAC" : "CAVLC",
7044 pps->slice_group_count,
7045 pps->ref_count[0], pps->ref_count[1],
7046 pps->weighted_pred ? "weighted" : "",
7047 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7048 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7049 pps->constrained_intra_pred ? "CONSTR" : "",
7050 pps->redundant_pic_cnt_present ? "REDU" : "",
7051 pps->transform_8x8_mode ? "8x8DCT" : ""
7059 * Call decode_slice() for each context.
7061 * @param h h264 master context
7062 * @param context_count number of contexts to execute
7064 static void execute_decode_slices(H264Context *h, int context_count){
7065 MpegEncContext * const s = &h->s;
7066 AVCodecContext * const avctx= s->avctx;
7070 if(context_count == 1) {
7071 decode_slice(avctx, h);
7073 for(i = 1; i < context_count; i++) {
7074 hx = h->thread_context[i];
7075 hx->s.error_resilience = avctx->error_resilience;
7076 hx->s.error_count = 0;
7079 avctx->execute(avctx, (void *)decode_slice,
7080 (void **)h->thread_context, NULL, context_count);
7082 /* pull back stuff from slices to master context */
7083 hx = h->thread_context[context_count - 1];
7084 s->mb_x = hx->s.mb_x;
7085 s->mb_y = hx->s.mb_y;
7086 for(i = 1; i < context_count; i++)
7087 h->s.error_count += h->thread_context[i]->s.error_count;
7092 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7093 MpegEncContext * const s = &h->s;
7094 AVCodecContext * const avctx= s->avctx;
7096 H264Context *hx; ///< thread context
7097 int context_count = 0;
7099 h->max_contexts = avctx->thread_count;
7102 for(i=0; i<50; i++){
7103 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7106 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7107 h->current_slice = 0;
7108 s->current_picture_ptr= NULL;
7120 if(buf_index >= buf_size) break;
7122 for(i = 0; i < h->nal_length_size; i++)
7123 nalsize = (nalsize << 8) | buf[buf_index++];
7124 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7129 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7134 // start code prefix search
7135 for(; buf_index + 3 < buf_size; buf_index++){
7136 // This should always succeed in the first iteration.
7137 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7141 if(buf_index+3 >= buf_size) break;
7146 hx = h->thread_context[context_count];
7148 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7149 if (ptr==NULL || dst_length < 0){
7152 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7154 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7156 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7157 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7160 if (h->is_avc && (nalsize != consumed))
7161 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7163 buf_index += consumed;
7165 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7166 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7171 switch(hx->nal_unit_type){
7173 if (h->nal_unit_type != NAL_IDR_SLICE) {
7174 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7177 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7179 init_get_bits(&hx->s.gb, ptr, bit_length);
7181 hx->inter_gb_ptr= &hx->s.gb;
7182 hx->s.data_partitioning = 0;
7184 if((err = decode_slice_header(hx, h)))
7187 s->current_picture_ptr->key_frame= (hx->nal_unit_type == NAL_IDR_SLICE);
7188 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7189 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7190 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7191 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7192 && avctx->skip_frame < AVDISCARD_ALL)
7196 init_get_bits(&hx->s.gb, ptr, bit_length);
7198 hx->inter_gb_ptr= NULL;
7199 hx->s.data_partitioning = 1;
7201 err = decode_slice_header(hx, h);
7204 init_get_bits(&hx->intra_gb, ptr, bit_length);
7205 hx->intra_gb_ptr= &hx->intra_gb;
7208 init_get_bits(&hx->inter_gb, ptr, bit_length);
7209 hx->inter_gb_ptr= &hx->inter_gb;
7211 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7212 && s->context_initialized
7214 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7215 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7216 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7217 && avctx->skip_frame < AVDISCARD_ALL)
7221 init_get_bits(&s->gb, ptr, bit_length);
7225 init_get_bits(&s->gb, ptr, bit_length);
7226 decode_seq_parameter_set(h);
7228 if(s->flags& CODEC_FLAG_LOW_DELAY)
7231 if(avctx->has_b_frames < 2)
7232 avctx->has_b_frames= !s->low_delay;
7235 init_get_bits(&s->gb, ptr, bit_length);
7237 decode_picture_parameter_set(h, bit_length);
7241 case NAL_END_SEQUENCE:
7242 case NAL_END_STREAM:
7243 case NAL_FILLER_DATA:
7245 case NAL_AUXILIARY_SLICE:
7248 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7251 if(context_count == h->max_contexts) {
7252 execute_decode_slices(h, context_count);
7257 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7259 /* Slice could not be decoded in parallel mode, copy down
7260 * NAL unit stuff to context 0 and restart. Note that
7261 * rbsp_buffer is not transfered, but since we no longer
7262 * run in parallel mode this should not be an issue. */
7263 h->nal_unit_type = hx->nal_unit_type;
7264 h->nal_ref_idc = hx->nal_ref_idc;
7270 execute_decode_slices(h, context_count);
7275 * returns the number of bytes consumed for building the current frame
7277 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7278 if(s->flags&CODEC_FLAG_TRUNCATED){
7279 pos -= s->parse_context.last_index;
7280 if(pos<0) pos=0; // FIXME remove (unneeded?)
7284 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7285 if(pos+10>buf_size) pos=buf_size; // oops ;)
7291 static int decode_frame(AVCodecContext *avctx,
7292 void *data, int *data_size,
7293 uint8_t *buf, int buf_size)
7295 H264Context *h = avctx->priv_data;
7296 MpegEncContext *s = &h->s;
7297 AVFrame *pict = data;
7300 s->flags= avctx->flags;
7301 s->flags2= avctx->flags2;
7303 /* no supplementary picture */
7304 if (buf_size == 0) {
7308 //FIXME factorize this with the output code below
7309 out = h->delayed_pic[0];
7311 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7312 if(h->delayed_pic[i]->poc < out->poc){
7313 out = h->delayed_pic[i];
7317 for(i=out_idx; h->delayed_pic[i]; i++)
7318 h->delayed_pic[i] = h->delayed_pic[i+1];
7321 *data_size = sizeof(AVFrame);
7322 *pict= *(AVFrame*)out;
7328 if(s->flags&CODEC_FLAG_TRUNCATED){
7329 int next= ff_h264_find_frame_end(h, buf, buf_size);
7331 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7333 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7336 if(h->is_avc && !h->got_avcC) {
7337 int i, cnt, nalsize;
7338 unsigned char *p = avctx->extradata;
7339 if(avctx->extradata_size < 7) {
7340 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7344 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7347 /* sps and pps in the avcC always have length coded with 2 bytes,
7348 so put a fake nal_length_size = 2 while parsing them */
7349 h->nal_length_size = 2;
7350 // Decode sps from avcC
7351 cnt = *(p+5) & 0x1f; // Number of sps
7353 for (i = 0; i < cnt; i++) {
7354 nalsize = AV_RB16(p) + 2;
7355 if(decode_nal_units(h, p, nalsize) < 0) {
7356 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7361 // Decode pps from avcC
7362 cnt = *(p++); // Number of pps
7363 for (i = 0; i < cnt; i++) {
7364 nalsize = AV_RB16(p) + 2;
7365 if(decode_nal_units(h, p, nalsize) != nalsize) {
7366 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7371 // Now store right nal length size, that will be use to parse all other nals
7372 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7373 // Do not reparse avcC
7377 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7378 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7382 buf_index=decode_nal_units(h, buf, buf_size);
7386 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7387 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7388 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7392 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7393 Picture *out = s->current_picture_ptr;
7394 Picture *cur = s->current_picture_ptr;
7395 Picture *prev = h->delayed_output_pic;
7396 int i, pics, cross_idr, out_of_order, out_idx;
7400 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7401 s->current_picture_ptr->pict_type= s->pict_type;
7403 h->prev_frame_num_offset= h->frame_num_offset;
7404 h->prev_frame_num= h->frame_num;
7405 if(s->current_picture_ptr->reference){
7406 h->prev_poc_msb= h->poc_msb;
7407 h->prev_poc_lsb= h->poc_lsb;
7409 if(s->current_picture_ptr->reference)
7410 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7416 //FIXME do something with unavailable reference frames
7418 #if 0 //decode order
7419 *data_size = sizeof(AVFrame);
7421 /* Sort B-frames into display order */
7423 if(h->sps.bitstream_restriction_flag
7424 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7425 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7430 while(h->delayed_pic[pics]) pics++;
7432 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7434 h->delayed_pic[pics++] = cur;
7435 if(cur->reference == 0)
7439 for(i=0; h->delayed_pic[i]; i++)
7440 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7443 out = h->delayed_pic[0];
7445 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7446 if(h->delayed_pic[i]->poc < out->poc){
7447 out = h->delayed_pic[i];
7451 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7452 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7454 else if(prev && pics <= s->avctx->has_b_frames)
7456 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7458 ((!cross_idr && prev && out->poc > prev->poc + 2)
7459 || cur->pict_type == B_TYPE)))
7462 s->avctx->has_b_frames++;
7465 else if(out_of_order)
7468 if(out_of_order || pics > s->avctx->has_b_frames){
7469 for(i=out_idx; h->delayed_pic[i]; i++)
7470 h->delayed_pic[i] = h->delayed_pic[i+1];
7476 *data_size = sizeof(AVFrame);
7477 if(prev && prev != out && prev->reference == 1)
7478 prev->reference = 0;
7479 h->delayed_output_pic = out;
7483 *pict= *(AVFrame*)out;
7485 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7488 assert(pict->data[0] || !*data_size);
7489 ff_print_debug_info(s, pict);
7490 //printf("out %d\n", (int)pict->data[0]);
7493 /* Return the Picture timestamp as the frame number */
7494 /* we substract 1 because it is added on utils.c */
7495 avctx->frame_number = s->picture_number - 1;
7497 return get_consumed_bytes(s, buf_index, buf_size);
7500 static inline void fill_mb_avail(H264Context *h){
7501 MpegEncContext * const s = &h->s;
7502 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7505 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7506 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7507 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7513 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7514 h->mb_avail[4]= 1; //FIXME move out
7515 h->mb_avail[5]= 0; //FIXME move out
7522 #define SIZE (COUNT*40)
7528 // int int_temp[10000];
7530 AVCodecContext avctx;
7532 dsputil_init(&dsp, &avctx);
7534 init_put_bits(&pb, temp, SIZE);
7535 printf("testing unsigned exp golomb\n");
7536 for(i=0; i<COUNT; i++){
7538 set_ue_golomb(&pb, i);
7539 STOP_TIMER("set_ue_golomb");
7541 flush_put_bits(&pb);
7543 init_get_bits(&gb, temp, 8*SIZE);
7544 for(i=0; i<COUNT; i++){
7547 s= show_bits(&gb, 24);
7550 j= get_ue_golomb(&gb);
7552 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7555 STOP_TIMER("get_ue_golomb");
7559 init_put_bits(&pb, temp, SIZE);
7560 printf("testing signed exp golomb\n");
7561 for(i=0; i<COUNT; i++){
7563 set_se_golomb(&pb, i - COUNT/2);
7564 STOP_TIMER("set_se_golomb");
7566 flush_put_bits(&pb);
7568 init_get_bits(&gb, temp, 8*SIZE);
7569 for(i=0; i<COUNT; i++){
7572 s= show_bits(&gb, 24);
7575 j= get_se_golomb(&gb);
7576 if(j != i - COUNT/2){
7577 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7580 STOP_TIMER("get_se_golomb");
7583 printf("testing 4x4 (I)DCT\n");
7586 uint8_t src[16], ref[16];
7587 uint64_t error= 0, max_error=0;
7589 for(i=0; i<COUNT; i++){
7591 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7592 for(j=0; j<16; j++){
7593 ref[j]= random()%255;
7594 src[j]= random()%255;
7597 h264_diff_dct_c(block, src, ref, 4);
7600 for(j=0; j<16; j++){
7601 // printf("%d ", block[j]);
7602 block[j]= block[j]*4;
7603 if(j&1) block[j]= (block[j]*4 + 2)/5;
7604 if(j&4) block[j]= (block[j]*4 + 2)/5;
7608 s->dsp.h264_idct_add(ref, block, 4);
7609 /* for(j=0; j<16; j++){
7610 printf("%d ", ref[j]);
7614 for(j=0; j<16; j++){
7615 int diff= FFABS(src[j] - ref[j]);
7618 max_error= FFMAX(max_error, diff);
7621 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7623 printf("testing quantizer\n");
7624 for(qp=0; qp<52; qp++){
7626 src1_block[i]= src2_block[i]= random()%255;
7630 printf("Testing NAL layer\n");
7632 uint8_t bitstream[COUNT];
7633 uint8_t nal[COUNT*2];
7635 memset(&h, 0, sizeof(H264Context));
7637 for(i=0; i<COUNT; i++){
7645 for(j=0; j<COUNT; j++){
7646 bitstream[j]= (random() % 255) + 1;
7649 for(j=0; j<zeros; j++){
7650 int pos= random() % COUNT;
7651 while(bitstream[pos] == 0){
7660 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7662 printf("encoding failed\n");
7666 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7670 if(out_length != COUNT){
7671 printf("incorrect length %d %d\n", out_length, COUNT);
7675 if(consumed != nal_length){
7676 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7680 if(memcmp(bitstream, out, COUNT)){
7681 printf("mismatch\n");
7686 printf("Testing RBSP\n");
7694 static int decode_end(AVCodecContext *avctx)
7696 H264Context *h = avctx->priv_data;
7697 MpegEncContext *s = &h->s;
7699 av_freep(&h->rbsp_buffer[0]);
7700 av_freep(&h->rbsp_buffer[1]);
7701 free_tables(h); //FIXME cleanup init stuff perhaps
7704 // memset(h, 0, sizeof(H264Context));
7710 AVCodec h264_decoder = {
7714 sizeof(H264Context),
7719 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,