2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
42 * Value of Picture.reference when Picture is not a reference picture, but
43 * is held for delayed output.
45 #define DELAYED_PIC_REF 4
47 static VLC coeff_token_vlc[4];
48 static VLC chroma_dc_coeff_token_vlc;
50 static VLC total_zeros_vlc[15];
51 static VLC chroma_dc_total_zeros_vlc[3];
53 static VLC run_vlc[6];
56 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
57 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
58 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
59 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
61 static av_always_inline uint32_t pack16to32(int a, int b){
62 #ifdef WORDS_BIGENDIAN
63 return (b&0xFFFF) + (a<<16);
65 return (a&0xFFFF) + (b<<16);
69 const uint8_t ff_rem6[52]={
70 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
73 const uint8_t ff_div6[52]={
74 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
80 * @param h height of the rectangle, should be a constant
81 * @param w width of the rectangle, should be a constant
82 * @param size the size of val (1 or 4), should be a constant
84 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
85 uint8_t *p= (uint8_t*)vp;
86 assert(size==1 || size==4);
92 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
93 assert((stride&(w-1))==0);
95 const uint16_t v= size==4 ? val : val*0x0101;
96 *(uint16_t*)(p + 0*stride)= v;
98 *(uint16_t*)(p + 1*stride)= v;
100 *(uint16_t*)(p + 2*stride)= v;
101 *(uint16_t*)(p + 3*stride)= v;
103 const uint32_t v= size==4 ? val : val*0x01010101;
104 *(uint32_t*)(p + 0*stride)= v;
106 *(uint32_t*)(p + 1*stride)= v;
108 *(uint32_t*)(p + 2*stride)= v;
109 *(uint32_t*)(p + 3*stride)= v;
111 //gcc can't optimize 64bit math on x86_32
112 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
113 const uint64_t v= val*0x0100000001ULL;
114 *(uint64_t*)(p + 0*stride)= v;
116 *(uint64_t*)(p + 1*stride)= v;
118 *(uint64_t*)(p + 2*stride)= v;
119 *(uint64_t*)(p + 3*stride)= v;
121 const uint64_t v= val*0x0100000001ULL;
122 *(uint64_t*)(p + 0+0*stride)= v;
123 *(uint64_t*)(p + 8+0*stride)= v;
124 *(uint64_t*)(p + 0+1*stride)= v;
125 *(uint64_t*)(p + 8+1*stride)= v;
127 *(uint64_t*)(p + 0+2*stride)= v;
128 *(uint64_t*)(p + 8+2*stride)= v;
129 *(uint64_t*)(p + 0+3*stride)= v;
130 *(uint64_t*)(p + 8+3*stride)= v;
132 *(uint32_t*)(p + 0+0*stride)= val;
133 *(uint32_t*)(p + 4+0*stride)= val;
135 *(uint32_t*)(p + 0+1*stride)= val;
136 *(uint32_t*)(p + 4+1*stride)= val;
138 *(uint32_t*)(p + 0+2*stride)= val;
139 *(uint32_t*)(p + 4+2*stride)= val;
140 *(uint32_t*)(p + 0+3*stride)= val;
141 *(uint32_t*)(p + 4+3*stride)= val;
143 *(uint32_t*)(p + 0+0*stride)= val;
144 *(uint32_t*)(p + 4+0*stride)= val;
145 *(uint32_t*)(p + 8+0*stride)= val;
146 *(uint32_t*)(p +12+0*stride)= val;
147 *(uint32_t*)(p + 0+1*stride)= val;
148 *(uint32_t*)(p + 4+1*stride)= val;
149 *(uint32_t*)(p + 8+1*stride)= val;
150 *(uint32_t*)(p +12+1*stride)= val;
152 *(uint32_t*)(p + 0+2*stride)= val;
153 *(uint32_t*)(p + 4+2*stride)= val;
154 *(uint32_t*)(p + 8+2*stride)= val;
155 *(uint32_t*)(p +12+2*stride)= val;
156 *(uint32_t*)(p + 0+3*stride)= val;
157 *(uint32_t*)(p + 4+3*stride)= val;
158 *(uint32_t*)(p + 8+3*stride)= val;
159 *(uint32_t*)(p +12+3*stride)= val;
166 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
167 MpegEncContext * const s = &h->s;
168 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
169 int topleft_xy, top_xy, topright_xy, left_xy[2];
170 int topleft_type, top_type, topright_type, left_type[2];
174 //FIXME deblocking could skip the intra and nnz parts.
175 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
178 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
180 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
181 topleft_xy = top_xy - 1;
182 topright_xy= top_xy + 1;
183 left_xy[1] = left_xy[0] = mb_xy-1;
193 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
194 const int top_pair_xy = pair_xy - s->mb_stride;
195 const int topleft_pair_xy = top_pair_xy - 1;
196 const int topright_pair_xy = top_pair_xy + 1;
197 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
198 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
199 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
200 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
201 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
202 const int bottom = (s->mb_y & 1);
203 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
208 top_xy -= s->mb_stride;
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
214 topleft_xy -= s->mb_stride;
217 ? !curr_mb_frame_flag // bottom macroblock
218 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
220 topright_xy -= s->mb_stride;
222 if (left_mb_frame_flag != curr_mb_frame_flag) {
223 left_xy[1] = left_xy[0] = pair_xy - 1;
224 if (curr_mb_frame_flag) {
245 left_xy[1] += s->mb_stride;
258 h->top_mb_xy = top_xy;
259 h->left_mb_xy[0] = left_xy[0];
260 h->left_mb_xy[1] = left_xy[1];
264 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
265 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
266 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
268 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
270 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
272 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
273 for(list=0; list<h->list_count; list++){
274 if(USES_LIST(mb_type,list)){
275 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
276 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
277 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
278 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
284 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
285 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
287 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
288 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
290 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
291 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
296 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
297 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
298 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
299 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
300 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
303 if(IS_INTRA(mb_type)){
304 h->topleft_samples_available=
305 h->top_samples_available=
306 h->left_samples_available= 0xFFFF;
307 h->topright_samples_available= 0xEEEA;
309 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available= 0xB3FF;
311 h->top_samples_available= 0x33FF;
312 h->topright_samples_available= 0x26EA;
315 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
316 h->topleft_samples_available&= 0xDF5F;
317 h->left_samples_available&= 0x5F5F;
321 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
322 h->topleft_samples_available&= 0x7FFF;
324 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
325 h->topright_samples_available&= 0xFBFF;
327 if(IS_INTRA4x4(mb_type)){
328 if(IS_INTRA4x4(top_type)){
329 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
330 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
331 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
332 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
335 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
340 h->intra4x4_pred_mode_cache[4+8*0]=
341 h->intra4x4_pred_mode_cache[5+8*0]=
342 h->intra4x4_pred_mode_cache[6+8*0]=
343 h->intra4x4_pred_mode_cache[7+8*0]= pred;
346 if(IS_INTRA4x4(left_type[i])){
347 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
348 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
351 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
356 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
357 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
372 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
374 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
375 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
376 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
377 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
379 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
380 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
382 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
383 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
386 h->non_zero_count_cache[4+8*0]=
387 h->non_zero_count_cache[5+8*0]=
388 h->non_zero_count_cache[6+8*0]=
389 h->non_zero_count_cache[7+8*0]=
391 h->non_zero_count_cache[1+8*0]=
392 h->non_zero_count_cache[2+8*0]=
394 h->non_zero_count_cache[1+8*3]=
395 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
399 for (i=0; i<2; i++) {
401 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
402 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
403 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
404 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
406 h->non_zero_count_cache[3+8*1 + 2*8*i]=
407 h->non_zero_count_cache[3+8*2 + 2*8*i]=
408 h->non_zero_count_cache[0+8*1 + 8*i]=
409 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
416 h->top_cbp = h->cbp_table[top_xy];
417 } else if(IS_INTRA(mb_type)) {
424 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
425 } else if(IS_INTRA(mb_type)) {
431 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
434 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
439 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
441 for(list=0; list<h->list_count; list++){
442 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
443 /*if(!h->mv_cache_clean[list]){
444 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
445 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
446 h->mv_cache_clean[list]= 1;
450 h->mv_cache_clean[list]= 0;
452 if(USES_LIST(top_type, list)){
453 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
454 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
455 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
456 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
457 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
458 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
459 h->ref_cache[list][scan8[0] + 0 - 1*8]=
460 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
461 h->ref_cache[list][scan8[0] + 2 - 1*8]=
462 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
464 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
465 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
466 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
467 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
468 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
472 int cache_idx = scan8[0] - 1 + i*2*8;
473 if(USES_LIST(left_type[i], list)){
474 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
475 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
476 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
477 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
478 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
479 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
481 *(uint32_t*)h->mv_cache [list][cache_idx ]=
482 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
483 h->ref_cache[list][cache_idx ]=
484 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
488 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
491 if(USES_LIST(topleft_type, list)){
492 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
493 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
494 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
495 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
497 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
498 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
501 if(USES_LIST(topright_type, list)){
502 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
503 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
504 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
505 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
507 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
508 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
511 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
514 h->ref_cache[list][scan8[5 ]+1] =
515 h->ref_cache[list][scan8[7 ]+1] =
516 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
517 h->ref_cache[list][scan8[4 ]] =
518 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
519 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
520 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
521 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
522 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
523 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
526 /* XXX beurk, Load mvd */
527 if(USES_LIST(top_type, list)){
528 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
529 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
530 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
531 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
532 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
534 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
535 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
536 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
537 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
539 if(USES_LIST(left_type[0], list)){
540 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
541 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
542 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
544 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
545 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
547 if(USES_LIST(left_type[1], list)){
548 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
549 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
550 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
552 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
553 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
555 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
556 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
557 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
558 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
559 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
561 if(h->slice_type == B_TYPE){
562 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
564 if(IS_DIRECT(top_type)){
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
566 }else if(IS_8X8(top_type)){
567 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
568 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
569 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
571 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
574 if(IS_DIRECT(left_type[0]))
575 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
576 else if(IS_8X8(left_type[0]))
577 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
579 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
581 if(IS_DIRECT(left_type[1]))
582 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
583 else if(IS_8X8(left_type[1]))
584 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
586 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
592 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
593 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
594 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
595 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
596 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
597 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
598 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
599 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
600 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
601 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
603 #define MAP_F2F(idx, mb_type)\
604 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
605 h->ref_cache[list][idx] <<= 1;\
606 h->mv_cache[list][idx][1] /= 2;\
607 h->mvd_cache[list][idx][1] /= 2;\
612 #define MAP_F2F(idx, mb_type)\
613 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
614 h->ref_cache[list][idx] >>= 1;\
615 h->mv_cache[list][idx][1] <<= 1;\
616 h->mvd_cache[list][idx][1] <<= 1;\
626 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
629 static inline void write_back_intra_pred_mode(H264Context *h){
630 MpegEncContext * const s = &h->s;
631 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
633 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
634 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
635 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
636 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
637 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
638 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
639 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
643 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
645 static inline int check_intra4x4_pred_mode(H264Context *h){
646 MpegEncContext * const s = &h->s;
647 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
648 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
651 if(!(h->top_samples_available&0x8000)){
653 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
655 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
658 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
663 if(!(h->left_samples_available&0x8000)){
665 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
667 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
670 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
676 } //FIXME cleanup like next
679 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
681 static inline int check_intra_pred_mode(H264Context *h, int mode){
682 MpegEncContext * const s = &h->s;
683 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
684 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
687 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
691 if(!(h->top_samples_available&0x8000)){
694 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
699 if(!(h->left_samples_available&0x8000)){
702 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
711 * gets the predicted intra4x4 prediction mode.
713 static inline int pred_intra_mode(H264Context *h, int n){
714 const int index8= scan8[n];
715 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
716 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
717 const int min= FFMIN(left, top);
719 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
721 if(min<0) return DC_PRED;
725 static inline void write_back_non_zero_count(H264Context *h){
726 MpegEncContext * const s = &h->s;
727 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
729 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
730 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
731 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
732 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
733 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
734 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
735 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
737 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
738 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
739 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
741 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
742 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
743 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
746 // store all luma nnzs, for deblocking
749 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
750 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
755 * gets the predicted number of non zero coefficients.
756 * @param n block index
758 static inline int pred_non_zero_count(H264Context *h, int n){
759 const int index8= scan8[n];
760 const int left= h->non_zero_count_cache[index8 - 1];
761 const int top = h->non_zero_count_cache[index8 - 8];
764 if(i<64) i= (i+1)>>1;
766 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
771 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
772 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
773 MpegEncContext *s = &h->s;
775 /* there is no consistent mapping of mvs to neighboring locations that will
776 * make mbaff happy, so we can't move all this logic to fill_caches */
778 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
780 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
781 *C = h->mv_cache[list][scan8[0]-2];
784 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
785 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
786 if(IS_INTERLACED(mb_types[topright_xy])){
787 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
788 const int x4 = X4, y4 = Y4;\
789 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
790 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
791 return LIST_NOT_USED;\
792 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
793 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
794 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
795 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
797 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
800 if(topright_ref == PART_NOT_AVAILABLE
801 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
802 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
804 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
805 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
808 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
810 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
811 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
817 if(topright_ref != PART_NOT_AVAILABLE){
818 *C= h->mv_cache[list][ i - 8 + part_width ];
821 tprintf(s->avctx, "topright MV not available\n");
823 *C= h->mv_cache[list][ i - 8 - 1 ];
824 return h->ref_cache[list][ i - 8 - 1 ];
829 * gets the predicted MV.
830 * @param n the block index
831 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
832 * @param mx the x component of the predicted motion vector
833 * @param my the y component of the predicted motion vector
835 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
836 const int index8= scan8[n];
837 const int top_ref= h->ref_cache[list][ index8 - 8 ];
838 const int left_ref= h->ref_cache[list][ index8 - 1 ];
839 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
840 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
842 int diagonal_ref, match_count;
844 assert(part_width==1 || part_width==2 || part_width==4);
854 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
855 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
856 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
857 if(match_count > 1){ //most common
858 *mx= mid_pred(A[0], B[0], C[0]);
859 *my= mid_pred(A[1], B[1], C[1]);
860 }else if(match_count==1){
864 }else if(top_ref==ref){
872 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
876 *mx= mid_pred(A[0], B[0], C[0]);
877 *my= mid_pred(A[1], B[1], C[1]);
881 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
885 * gets the directionally predicted 16x8 MV.
886 * @param n the block index
887 * @param mx the x component of the predicted motion vector
888 * @param my the y component of the predicted motion vector
890 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
892 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
893 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
895 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
903 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
904 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
906 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
916 pred_motion(h, n, 4, list, ref, mx, my);
920 * gets the directionally predicted 8x16 MV.
921 * @param n the block index
922 * @param mx the x component of the predicted motion vector
923 * @param my the y component of the predicted motion vector
925 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
927 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
928 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
930 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
941 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
943 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
945 if(diagonal_ref == ref){
953 pred_motion(h, n, 2, list, ref, mx, my);
956 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
957 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
958 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
960 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
962 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
963 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
964 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
970 pred_motion(h, 0, 4, 0, 0, mx, my);
975 static inline void direct_dist_scale_factor(H264Context * const h){
976 const int poc = h->s.current_picture_ptr->poc;
977 const int poc1 = h->ref_list[1][0].poc;
979 for(i=0; i<h->ref_count[0]; i++){
980 int poc0 = h->ref_list[0][i].poc;
981 int td = av_clip(poc1 - poc0, -128, 127);
982 if(td == 0 /* FIXME || pic0 is a long-term ref */){
983 h->dist_scale_factor[i] = 256;
985 int tb = av_clip(poc - poc0, -128, 127);
986 int tx = (16384 + (FFABS(td) >> 1)) / td;
987 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
991 for(i=0; i<h->ref_count[0]; i++){
992 h->dist_scale_factor_field[2*i] =
993 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
997 static inline void direct_ref_list_init(H264Context * const h){
998 MpegEncContext * const s = &h->s;
999 Picture * const ref1 = &h->ref_list[1][0];
1000 Picture * const cur = s->current_picture_ptr;
1002 if(cur->pict_type == I_TYPE)
1003 cur->ref_count[0] = 0;
1004 if(cur->pict_type != B_TYPE)
1005 cur->ref_count[1] = 0;
1006 for(list=0; list<2; list++){
1007 cur->ref_count[list] = h->ref_count[list];
1008 for(j=0; j<h->ref_count[list]; j++)
1009 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1011 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1013 for(list=0; list<2; list++){
1014 for(i=0; i<ref1->ref_count[list]; i++){
1015 const int poc = ref1->ref_poc[list][i];
1016 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1017 for(j=0; j<h->ref_count[list]; j++)
1018 if(h->ref_list[list][j].poc == poc){
1019 h->map_col_to_list0[list][i] = j;
1025 for(list=0; list<2; list++){
1026 for(i=0; i<ref1->ref_count[list]; i++){
1027 j = h->map_col_to_list0[list][i];
1028 h->map_col_to_list0_field[list][2*i] = 2*j;
1029 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1035 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1036 MpegEncContext * const s = &h->s;
1037 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1038 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1039 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1040 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1041 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1042 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1043 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1044 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1045 const int is_b8x8 = IS_8X8(*mb_type);
1046 unsigned int sub_mb_type;
1049 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1050 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1051 /* FIXME save sub mb types from previous frames (or derive from MVs)
1052 * so we know exactly what block size to use */
1053 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1055 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1056 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1057 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1059 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1060 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1063 *mb_type |= MB_TYPE_DIRECT2;
1065 *mb_type |= MB_TYPE_INTERLACED;
1067 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1069 if(h->direct_spatial_mv_pred){
1074 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1076 /* ref = min(neighbors) */
1077 for(list=0; list<2; list++){
1078 int refa = h->ref_cache[list][scan8[0] - 1];
1079 int refb = h->ref_cache[list][scan8[0] - 8];
1080 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1082 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1084 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1086 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1092 if(ref[0] < 0 && ref[1] < 0){
1093 ref[0] = ref[1] = 0;
1094 mv[0][0] = mv[0][1] =
1095 mv[1][0] = mv[1][1] = 0;
1097 for(list=0; list<2; list++){
1099 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1101 mv[list][0] = mv[list][1] = 0;
1106 *mb_type &= ~MB_TYPE_P0L1;
1107 sub_mb_type &= ~MB_TYPE_P0L1;
1108 }else if(ref[0] < 0){
1109 *mb_type &= ~MB_TYPE_P0L0;
1110 sub_mb_type &= ~MB_TYPE_P0L0;
1113 if(IS_16X16(*mb_type)){
1116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1117 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1118 if(!IS_INTRA(mb_type_col)
1119 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1120 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1121 && (h->x264_build>33 || !h->x264_build)))){
1123 a= pack16to32(mv[0][0],mv[0][1]);
1125 b= pack16to32(mv[1][0],mv[1][1]);
1127 a= pack16to32(mv[0][0],mv[0][1]);
1128 b= pack16to32(mv[1][0],mv[1][1]);
1130 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1131 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1133 for(i8=0; i8<4; i8++){
1134 const int x8 = i8&1;
1135 const int y8 = i8>>1;
1137 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1139 h->sub_mb_type[i8] = sub_mb_type;
1141 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1142 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1143 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1144 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1147 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1148 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1149 && (h->x264_build>33 || !h->x264_build)))){
1150 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1151 if(IS_SUB_8X8(sub_mb_type)){
1152 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1153 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1155 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1160 for(i4=0; i4<4; i4++){
1161 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1162 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1164 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1166 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1172 }else{ /* direct temporal mv pred */
1173 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1174 const int *dist_scale_factor = h->dist_scale_factor;
1177 if(IS_INTERLACED(*mb_type)){
1178 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1179 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1180 dist_scale_factor = h->dist_scale_factor_field;
1182 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1183 /* FIXME assumes direct_8x8_inference == 1 */
1184 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1185 int mb_types_col[2];
1188 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1189 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1190 | (*mb_type & MB_TYPE_INTERLACED);
1191 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1193 if(IS_INTERLACED(*mb_type)){
1194 /* frame to field scaling */
1195 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1198 l1ref0 -= 2*h->b8_stride;
1199 l1ref1 -= 2*h->b8_stride;
1200 l1mv0 -= 4*h->b_stride;
1201 l1mv1 -= 4*h->b_stride;
1205 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1206 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1208 *mb_type |= MB_TYPE_16x8;
1210 *mb_type |= MB_TYPE_8x8;
1212 /* field to frame scaling */
1213 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1214 * but in MBAFF, top and bottom POC are equal */
1215 int dy = (s->mb_y&1) ? 1 : 2;
1217 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1218 l1ref0 += dy*h->b8_stride;
1219 l1ref1 += dy*h->b8_stride;
1220 l1mv0 += 2*dy*h->b_stride;
1221 l1mv1 += 2*dy*h->b_stride;
1224 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1226 *mb_type |= MB_TYPE_16x16;
1228 *mb_type |= MB_TYPE_8x8;
1231 for(i8=0; i8<4; i8++){
1232 const int x8 = i8&1;
1233 const int y8 = i8>>1;
1235 const int16_t (*l1mv)[2]= l1mv0;
1237 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1239 h->sub_mb_type[i8] = sub_mb_type;
1241 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1242 if(IS_INTRA(mb_types_col[y8])){
1243 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1244 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1245 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1249 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1251 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1253 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1256 scale = dist_scale_factor[ref0];
1257 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1260 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1261 int my_col = (mv_col[1]<<y_shift)/2;
1262 int mx = (scale * mv_col[0] + 128) >> 8;
1263 int my = (scale * my_col + 128) >> 8;
1264 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1265 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1272 /* one-to-one mv scaling */
1274 if(IS_16X16(*mb_type)){
1277 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col)){
1281 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1282 : map_col_to_list0[1][l1ref1[0]];
1283 const int scale = dist_scale_factor[ref0];
1284 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1286 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1287 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1289 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1290 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1292 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1293 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1294 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1296 for(i8=0; i8<4; i8++){
1297 const int x8 = i8&1;
1298 const int y8 = i8>>1;
1300 const int16_t (*l1mv)[2]= l1mv0;
1302 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1304 h->sub_mb_type[i8] = sub_mb_type;
1305 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1306 if(IS_INTRA(mb_type_col)){
1307 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1308 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1309 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1313 ref0 = l1ref0[x8 + y8*h->b8_stride];
1315 ref0 = map_col_to_list0[0][ref0];
1317 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1320 scale = dist_scale_factor[ref0];
1322 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1323 if(IS_SUB_8X8(sub_mb_type)){
1324 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1325 int mx = (scale * mv_col[0] + 128) >> 8;
1326 int my = (scale * mv_col[1] + 128) >> 8;
1327 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1328 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1330 for(i4=0; i4<4; i4++){
1331 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1332 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1333 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1334 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1335 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1336 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1343 static inline void write_back_motion(H264Context *h, int mb_type){
1344 MpegEncContext * const s = &h->s;
1345 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1346 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1349 if(!USES_LIST(mb_type, 0))
1350 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1352 for(list=0; list<h->list_count; list++){
1354 if(!USES_LIST(mb_type, list))
1358 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1359 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1361 if( h->pps.cabac ) {
1362 if(IS_SKIP(mb_type))
1363 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1366 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1367 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1372 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1373 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1374 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1375 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1376 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1380 if(h->slice_type == B_TYPE && h->pps.cabac){
1381 if(IS_8X8(mb_type)){
1382 uint8_t *direct_table = &h->direct_table[b8_xy];
1383 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1384 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1385 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1391 * Decodes a network abstraction layer unit.
1392 * @param consumed is the number of bytes used as input
1393 * @param length is the length of the array
1394 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1395 * @returns decoded bytes, might be src+1 if no escapes
1397 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1402 // src[0]&0x80; //forbidden bit
1403 h->nal_ref_idc= src[0]>>5;
1404 h->nal_unit_type= src[0]&0x1F;
1408 for(i=0; i<length; i++)
1409 printf("%2X ", src[i]);
1411 for(i=0; i+1<length; i+=2){
1412 if(src[i]) continue;
1413 if(i>0 && src[i-1]==0) i--;
1414 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1416 /* startcode, so we must be past the end */
1423 if(i>=length-1){ //no escaped 0
1424 *dst_length= length;
1425 *consumed= length+1; //+1 for the header
1429 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1430 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1431 dst= h->rbsp_buffer[bufidx];
1437 //printf("decoding esc\n");
1440 //remove escapes (very rare 1:2^22)
1441 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1442 if(src[si+2]==3){ //escape
1447 }else //next start code
1451 dst[di++]= src[si++];
1455 *consumed= si + 1;//+1 for the header
1456 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1461 * identifies the exact end of the bitstream
1462 * @return the length of the trailing, or 0 if damaged
1464 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1468 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1478 * idct tranforms the 16 dc values and dequantize them.
1479 * @param qp quantization parameter
1481 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1484 int temp[16]; //FIXME check if this is a good idea
1485 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1486 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1488 //memset(block, 64, 2*256);
1491 const int offset= y_offset[i];
1492 const int z0= block[offset+stride*0] + block[offset+stride*4];
1493 const int z1= block[offset+stride*0] - block[offset+stride*4];
1494 const int z2= block[offset+stride*1] - block[offset+stride*5];
1495 const int z3= block[offset+stride*1] + block[offset+stride*5];
1504 const int offset= x_offset[i];
1505 const int z0= temp[4*0+i] + temp[4*2+i];
1506 const int z1= temp[4*0+i] - temp[4*2+i];
1507 const int z2= temp[4*1+i] - temp[4*3+i];
1508 const int z3= temp[4*1+i] + temp[4*3+i];
1510 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1511 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1512 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1513 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1519 * dct tranforms the 16 dc values.
1520 * @param qp quantization parameter ??? FIXME
1522 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1523 // const int qmul= dequant_coeff[qp][0];
1525 int temp[16]; //FIXME check if this is a good idea
1526 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1527 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1530 const int offset= y_offset[i];
1531 const int z0= block[offset+stride*0] + block[offset+stride*4];
1532 const int z1= block[offset+stride*0] - block[offset+stride*4];
1533 const int z2= block[offset+stride*1] - block[offset+stride*5];
1534 const int z3= block[offset+stride*1] + block[offset+stride*5];
1543 const int offset= x_offset[i];
1544 const int z0= temp[4*0+i] + temp[4*2+i];
1545 const int z1= temp[4*0+i] - temp[4*2+i];
1546 const int z2= temp[4*1+i] - temp[4*3+i];
1547 const int z3= temp[4*1+i] + temp[4*3+i];
1549 block[stride*0 +offset]= (z0 + z3)>>1;
1550 block[stride*2 +offset]= (z1 + z2)>>1;
1551 block[stride*8 +offset]= (z1 - z2)>>1;
1552 block[stride*10+offset]= (z0 - z3)>>1;
1560 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1561 const int stride= 16*2;
1562 const int xStride= 16;
1565 a= block[stride*0 + xStride*0];
1566 b= block[stride*0 + xStride*1];
1567 c= block[stride*1 + xStride*0];
1568 d= block[stride*1 + xStride*1];
1575 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1576 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1577 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1578 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1582 static void chroma_dc_dct_c(DCTELEM *block){
1583 const int stride= 16*2;
1584 const int xStride= 16;
1587 a= block[stride*0 + xStride*0];
1588 b= block[stride*0 + xStride*1];
1589 c= block[stride*1 + xStride*0];
1590 d= block[stride*1 + xStride*1];
1597 block[stride*0 + xStride*0]= (a+c);
1598 block[stride*0 + xStride*1]= (e+b);
1599 block[stride*1 + xStride*0]= (a-c);
1600 block[stride*1 + xStride*1]= (e-b);
1605 * gets the chroma qp.
1607 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1608 return h->pps.chroma_qp_table[t][qscale & 0xff];
1611 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1612 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1613 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1615 const int * const quant_table= quant_coeff[qscale];
1616 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1617 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1618 const unsigned int threshold2= (threshold1<<1);
1624 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1625 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1626 const unsigned int dc_threshold2= (dc_threshold1<<1);
1628 int level= block[0]*quant_coeff[qscale+18][0];
1629 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1631 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1634 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1637 // last_non_zero = i;
1642 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1643 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1644 const unsigned int dc_threshold2= (dc_threshold1<<1);
1646 int level= block[0]*quant_table[0];
1647 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1649 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1652 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1655 // last_non_zero = i;
1668 const int j= scantable[i];
1669 int level= block[j]*quant_table[j];
1671 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1672 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1673 if(((unsigned)(level+threshold1))>threshold2){
1675 level= (bias + level)>>QUANT_SHIFT;
1678 level= (bias - level)>>QUANT_SHIFT;
1687 return last_non_zero;
1690 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1691 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1692 int src_x_offset, int src_y_offset,
1693 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1694 MpegEncContext * const s = &h->s;
1695 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1696 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1697 const int luma_xy= (mx&3) + ((my&3)<<2);
1698 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1699 uint8_t * src_cb, * src_cr;
1700 int extra_width= h->emu_edge_width;
1701 int extra_height= h->emu_edge_height;
1703 const int full_mx= mx>>2;
1704 const int full_my= my>>2;
1705 const int pic_width = 16*s->mb_width;
1706 const int pic_height = 16*s->mb_height >> MB_FIELD;
1708 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1711 if(mx&7) extra_width -= 3;
1712 if(my&7) extra_height -= 3;
1714 if( full_mx < 0-extra_width
1715 || full_my < 0-extra_height
1716 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1717 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1719 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1723 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1725 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1728 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1731 // chroma offset when predicting from a field of opposite parity
1732 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1733 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1735 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1736 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1739 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1740 src_cb= s->edge_emu_buffer;
1742 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1745 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1746 src_cr= s->edge_emu_buffer;
1748 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1751 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1752 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1753 int x_offset, int y_offset,
1754 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1755 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1756 int list0, int list1){
1757 MpegEncContext * const s = &h->s;
1758 qpel_mc_func *qpix_op= qpix_put;
1759 h264_chroma_mc_func chroma_op= chroma_put;
1761 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1762 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1763 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1764 x_offset += 8*s->mb_x;
1765 y_offset += 8*(s->mb_y >> MB_FIELD);
1768 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1769 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1770 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1771 qpix_op, chroma_op);
1774 chroma_op= chroma_avg;
1778 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1779 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1780 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1781 qpix_op, chroma_op);
1785 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1786 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 int x_offset, int y_offset,
1788 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1789 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1790 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1791 int list0, int list1){
1792 MpegEncContext * const s = &h->s;
1794 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1795 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1796 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1797 x_offset += 8*s->mb_x;
1798 y_offset += 8*(s->mb_y >> MB_FIELD);
1801 /* don't optimize for luma-only case, since B-frames usually
1802 * use implicit weights => chroma too. */
1803 uint8_t *tmp_cb = s->obmc_scratchpad;
1804 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1805 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1806 int refn0 = h->ref_cache[0][ scan8[n] ];
1807 int refn1 = h->ref_cache[1][ scan8[n] ];
1809 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1810 dest_y, dest_cb, dest_cr,
1811 x_offset, y_offset, qpix_put, chroma_put);
1812 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1813 tmp_y, tmp_cb, tmp_cr,
1814 x_offset, y_offset, qpix_put, chroma_put);
1816 if(h->use_weight == 2){
1817 int weight0 = h->implicit_weight[refn0][refn1];
1818 int weight1 = 64 - weight0;
1819 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1820 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1821 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1823 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1824 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1825 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1826 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1827 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1828 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1829 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1831 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1834 int list = list1 ? 1 : 0;
1835 int refn = h->ref_cache[list][ scan8[n] ];
1836 Picture *ref= &h->ref_list[list][refn];
1837 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1838 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put, chroma_put);
1841 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1842 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1843 if(h->use_weight_chroma){
1844 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1845 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1846 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1847 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1852 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1853 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1854 int x_offset, int y_offset,
1855 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1856 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1857 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1858 int list0, int list1){
1859 if((h->use_weight==2 && list0 && list1
1860 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1861 || h->use_weight==1)
1862 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1863 x_offset, y_offset, qpix_put, chroma_put,
1864 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1866 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1867 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1870 static inline void prefetch_motion(H264Context *h, int list){
1871 /* fetch pixels for estimated mv 4 macroblocks ahead
1872 * optimized for 64byte cache lines */
1873 MpegEncContext * const s = &h->s;
1874 const int refn = h->ref_cache[list][scan8[0]];
1876 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1877 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1878 uint8_t **src= h->ref_list[list][refn].data;
1879 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1880 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1881 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1882 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1886 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1887 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1888 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1889 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1890 MpegEncContext * const s = &h->s;
1891 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1892 const int mb_type= s->current_picture.mb_type[mb_xy];
1894 assert(IS_INTER(mb_type));
1896 prefetch_motion(h, 0);
1898 if(IS_16X16(mb_type)){
1899 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1900 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1901 &weight_op[0], &weight_avg[0],
1902 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1903 }else if(IS_16X8(mb_type)){
1904 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1905 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1906 &weight_op[1], &weight_avg[1],
1907 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1908 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1909 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1910 &weight_op[1], &weight_avg[1],
1911 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1912 }else if(IS_8X16(mb_type)){
1913 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1914 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1915 &weight_op[2], &weight_avg[2],
1916 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1917 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1918 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1919 &weight_op[2], &weight_avg[2],
1920 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1924 assert(IS_8X8(mb_type));
1927 const int sub_mb_type= h->sub_mb_type[i];
1929 int x_offset= (i&1)<<2;
1930 int y_offset= (i&2)<<1;
1932 if(IS_SUB_8X8(sub_mb_type)){
1933 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1934 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1935 &weight_op[3], &weight_avg[3],
1936 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1937 }else if(IS_SUB_8X4(sub_mb_type)){
1938 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1939 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1940 &weight_op[4], &weight_avg[4],
1941 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1942 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1943 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1944 &weight_op[4], &weight_avg[4],
1945 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1946 }else if(IS_SUB_4X8(sub_mb_type)){
1947 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1948 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1949 &weight_op[5], &weight_avg[5],
1950 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1951 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1952 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1953 &weight_op[5], &weight_avg[5],
1954 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1957 assert(IS_SUB_4X4(sub_mb_type));
1959 int sub_x_offset= x_offset + 2*(j&1);
1960 int sub_y_offset= y_offset + (j&2);
1961 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1962 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1963 &weight_op[6], &weight_avg[6],
1964 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1970 prefetch_motion(h, 1);
1973 static void decode_init_vlc(void){
1974 static int done = 0;
1980 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1981 &chroma_dc_coeff_token_len [0], 1, 1,
1982 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1985 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1986 &coeff_token_len [i][0], 1, 1,
1987 &coeff_token_bits[i][0], 1, 1, 1);
1991 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1992 &chroma_dc_total_zeros_len [i][0], 1, 1,
1993 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1995 for(i=0; i<15; i++){
1996 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1997 &total_zeros_len [i][0], 1, 1,
1998 &total_zeros_bits[i][0], 1, 1, 1);
2002 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2003 &run_len [i][0], 1, 1,
2004 &run_bits[i][0], 1, 1, 1);
2006 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2007 &run_len [6][0], 1, 1,
2008 &run_bits[6][0], 1, 1, 1);
2012 static void free_tables(H264Context *h){
2015 av_freep(&h->intra4x4_pred_mode);
2016 av_freep(&h->chroma_pred_mode_table);
2017 av_freep(&h->cbp_table);
2018 av_freep(&h->mvd_table[0]);
2019 av_freep(&h->mvd_table[1]);
2020 av_freep(&h->direct_table);
2021 av_freep(&h->non_zero_count);
2022 av_freep(&h->slice_table_base);
2023 h->slice_table= NULL;
2025 av_freep(&h->mb2b_xy);
2026 av_freep(&h->mb2b8_xy);
2028 for(i = 0; i < MAX_SPS_COUNT; i++)
2029 av_freep(h->sps_buffers + i);
2031 for(i = 0; i < MAX_PPS_COUNT; i++)
2032 av_freep(h->pps_buffers + i);
2034 for(i = 0; i < h->s.avctx->thread_count; i++) {
2035 hx = h->thread_context[i];
2037 av_freep(&hx->top_borders[1]);
2038 av_freep(&hx->top_borders[0]);
2039 av_freep(&hx->s.obmc_scratchpad);
2040 av_freep(&hx->s.allocated_edge_emu_buffer);
2044 static void init_dequant8_coeff_table(H264Context *h){
2046 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2047 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2048 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2050 for(i=0; i<2; i++ ){
2051 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2052 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2056 for(q=0; q<52; q++){
2057 int shift = ff_div6[q];
2058 int idx = ff_rem6[q];
2060 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2061 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2062 h->pps.scaling_matrix8[i][x]) << shift;
2067 static void init_dequant4_coeff_table(H264Context *h){
2069 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2070 for(i=0; i<6; i++ ){
2071 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2073 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2074 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2081 for(q=0; q<52; q++){
2082 int shift = ff_div6[q] + 2;
2083 int idx = ff_rem6[q];
2085 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2086 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2087 h->pps.scaling_matrix4[i][x]) << shift;
2092 static void init_dequant_tables(H264Context *h){
2094 init_dequant4_coeff_table(h);
2095 if(h->pps.transform_8x8_mode)
2096 init_dequant8_coeff_table(h);
2097 if(h->sps.transform_bypass){
2100 h->dequant4_coeff[i][0][x] = 1<<6;
2101 if(h->pps.transform_8x8_mode)
2104 h->dequant8_coeff[i][0][x] = 1<<6;
2111 * needs width/height
2113 static int alloc_tables(H264Context *h){
2114 MpegEncContext * const s = &h->s;
2115 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2118 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2120 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2121 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2124 if( h->pps.cabac ) {
2125 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2126 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2127 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
2138 const int mb_xy= x + y*s->mb_stride;
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2147 s->obmc_scratchpad = NULL;
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2159 * Mimic alloc_tables(), but for every context thread.
2161 static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2179 * Allocate buffers which are not shared amongst multiple threads.
2181 static int context_init(H264Context *h){
2182 MpegEncContext * const s = &h->s;
2184 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2185 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2187 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
2188 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
2189 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
2190 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
2193 return -1; // free_tables will clean up for us
2196 static void common_init(H264Context *h){
2197 MpegEncContext * const s = &h->s;
2199 s->width = s->avctx->width;
2200 s->height = s->avctx->height;
2201 s->codec_id= s->avctx->codec->id;
2203 ff_h264_pred_init(&h->hpc, s->codec_id);
2205 h->dequant_coeff_pps= -1;
2206 s->unrestricted_mv=1;
2207 s->decode=1; //FIXME
2209 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2210 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2213 static int decode_init(AVCodecContext *avctx){
2214 H264Context *h= avctx->priv_data;
2215 MpegEncContext * const s = &h->s;
2217 MPV_decode_defaults(s);
2222 s->out_format = FMT_H264;
2223 s->workaround_bugs= avctx->workaround_bugs;
2226 // s->decode_mb= ff_h263_decode_mb;
2227 s->quarter_sample = 1;
2229 avctx->pix_fmt= PIX_FMT_YUV420P;
2233 if(avctx->extradata_size > 0 && avctx->extradata &&
2234 *(char *)avctx->extradata == 1){
2241 h->thread_context[0] = h;
2245 static int frame_start(H264Context *h){
2246 MpegEncContext * const s = &h->s;
2249 if(MPV_frame_start(s, s->avctx) < 0)
2251 ff_er_frame_start(s);
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
2255 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2256 * See decode_nal_units().
2258 s->current_picture_ptr->key_frame= 0;
2260 assert(s->linesize && s->uvlinesize);
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2279 /* some macroblocks will be accessed before they're available */
2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2283 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2287 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2288 MpegEncContext * const s = &h->s;
2292 src_cb -= uvlinesize;
2293 src_cr -= uvlinesize;
2295 // There are two lines saved, the line above the the top macroblock of a pair,
2296 // and the line above the bottom macroblock
2297 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2298 for(i=1; i<17; i++){
2299 h->left_border[i]= src_y[15+i* linesize];
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2305 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2306 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2307 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2309 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2310 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2312 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2313 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2317 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2318 MpegEncContext * const s = &h->s;
2325 if(h->deblocking_filter == 2) {
2326 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2327 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2328 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2330 deblock_left = (s->mb_x > 0);
2331 deblock_top = (s->mb_y > 0);
2334 src_y -= linesize + 1;
2335 src_cb -= uvlinesize + 1;
2336 src_cr -= uvlinesize + 1;
2338 #define XCHG(a,b,t,xchg)\
2345 for(i = !deblock_top; i<17; i++){
2346 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2351 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2352 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2353 if(s->mb_x+1 < s->mb_width){
2354 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2358 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2360 for(i = !deblock_top; i<9; i++){
2361 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2362 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2366 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2367 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2372 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2373 MpegEncContext * const s = &h->s;
2376 src_y -= 2 * linesize;
2377 src_cb -= 2 * uvlinesize;
2378 src_cr -= 2 * uvlinesize;
2380 // There are two lines saved, the line above the the top macroblock of a pair,
2381 // and the line above the bottom macroblock
2382 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2383 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2384 for(i=2; i<34; i++){
2385 h->left_border[i]= src_y[15+i* linesize];
2388 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2389 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2390 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2391 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2393 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2394 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2395 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2396 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2397 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2398 for(i=2; i<18; i++){
2399 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2400 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2402 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2403 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2404 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2405 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2409 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2410 MpegEncContext * const s = &h->s;
2413 int deblock_left = (s->mb_x > 0);
2414 int deblock_top = (s->mb_y > 1);
2416 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2418 src_y -= 2 * linesize + 1;
2419 src_cb -= 2 * uvlinesize + 1;
2420 src_cr -= 2 * uvlinesize + 1;
2422 #define XCHG(a,b,t,xchg)\
2429 for(i = (!deblock_top)<<1; i<34; i++){
2430 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2435 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2439 if(s->mb_x+1 < s->mb_width){
2440 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2441 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2445 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2447 for(i = (!deblock_top) << 1; i<18; i++){
2448 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2449 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2453 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2454 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2455 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2456 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2461 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2462 MpegEncContext * const s = &h->s;
2463 const int mb_x= s->mb_x;
2464 const int mb_y= s->mb_y;
2465 const int mb_xy= mb_x + mb_y*s->mb_stride;
2466 const int mb_type= s->current_picture.mb_type[mb_xy];
2467 uint8_t *dest_y, *dest_cb, *dest_cr;
2468 int linesize, uvlinesize /*dct_offset*/;
2470 int *block_offset = &h->block_offset[0];
2471 const unsigned int bottom = mb_y & 1;
2472 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2473 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2474 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2476 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2477 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2478 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2480 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2481 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2483 if (!simple && MB_FIELD) {
2484 linesize = h->mb_linesize = s->linesize * 2;
2485 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2486 block_offset = &h->block_offset[24];
2487 if(mb_y&1){ //FIXME move out of this func?
2488 dest_y -= s->linesize*15;
2489 dest_cb-= s->uvlinesize*7;
2490 dest_cr-= s->uvlinesize*7;
2494 for(list=0; list<h->list_count; list++){
2495 if(!USES_LIST(mb_type, list))
2497 if(IS_16X16(mb_type)){
2498 int8_t *ref = &h->ref_cache[list][scan8[0]];
2499 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
2501 for(i=0; i<16; i+=4){
2502 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2503 int ref = h->ref_cache[list][scan8[i]];
2505 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
2511 linesize = h->mb_linesize = s->linesize;
2512 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2513 // dct_offset = s->linesize * 16;
2516 if(transform_bypass){
2518 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2519 }else if(IS_8x8DCT(mb_type)){
2520 idct_dc_add = s->dsp.h264_idct8_dc_add;
2521 idct_add = s->dsp.h264_idct8_add;
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2527 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2528 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2529 int mbt_y = mb_y&~1;
2530 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2531 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2532 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2533 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2536 if (!simple && IS_INTRA_PCM(mb_type)) {
2539 // The pixels are stored in h->mb array in the same order as levels,
2540 // copy them in output in the correct order.
2541 for(i=0; i<16; i++) {
2542 for (y=0; y<4; y++) {
2543 for (x=0; x<4; x++) {
2544 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2548 for(i=16; i<16+4; i++) {
2549 for (y=0; y<4; y++) {
2550 for (x=0; x<4; x++) {
2551 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2555 for(i=20; i<20+4; i++) {
2556 for (y=0; y<4; y++) {
2557 for (x=0; x<4; x++) {
2558 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2563 if(IS_INTRA(mb_type)){
2564 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2565 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2567 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2568 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2569 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2572 if(IS_INTRA4x4(mb_type)){
2573 if(simple || !s->encoding){
2574 if(IS_8x8DCT(mb_type)){
2575 for(i=0; i<16; i+=4){
2576 uint8_t * const ptr= dest_y + block_offset[i];
2577 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2579 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2580 (h->topright_samples_available<<i)&0x4000, linesize);
2582 if(nnz == 1 && h->mb[i*16])
2583 idct_dc_add(ptr, h->mb + i*16, linesize);
2585 idct_add(ptr, h->mb + i*16, linesize);
2589 for(i=0; i<16; i++){
2590 uint8_t * const ptr= dest_y + block_offset[i];
2592 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2595 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2596 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2597 assert(mb_y || linesize <= block_offset[i]);
2598 if(!topright_avail){
2599 tr= ptr[3 - linesize]*0x01010101;
2600 topright= (uint8_t*) &tr;
2602 topright= ptr + 4 - linesize;
2606 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2607 nnz = h->non_zero_count_cache[ scan8[i] ];
2610 if(nnz == 1 && h->mb[i*16])
2611 idct_dc_add(ptr, h->mb + i*16, linesize);
2613 idct_add(ptr, h->mb + i*16, linesize);
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2620 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2622 if(!transform_bypass)
2623 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2625 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2627 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2628 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2630 hl_motion(h, dest_y, dest_cb, dest_cr,
2631 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2632 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2633 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2637 if(!IS_INTRA4x4(mb_type)){
2639 if(IS_INTRA16x16(mb_type)){
2640 for(i=0; i<16; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2647 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2648 for(i=0; i<16; i+=di){
2649 int nnz = h->non_zero_count_cache[ scan8[i] ];
2651 if(nnz==1 && h->mb[i*16])
2652 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2654 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2659 for(i=0; i<16; i++){
2660 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2661 uint8_t * const ptr= dest_y + block_offset[i];
2662 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2668 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2669 uint8_t *dest[2] = {dest_cb, dest_cr};
2670 if(transform_bypass){
2671 idct_add = idct_dc_add = s->dsp.add_pixels4;
2673 idct_add = s->dsp.h264_idct_add;
2674 idct_dc_add = s->dsp.h264_idct_dc_add;
2675 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2676 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2679 for(i=16; i<16+8; i++){
2680 if(h->non_zero_count_cache[ scan8[i] ])
2681 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2682 else if(h->mb[i*16])
2683 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2686 for(i=16; i<16+8; i++){
2687 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2688 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2689 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2695 if(h->deblocking_filter) {
2696 if (!simple && FRAME_MBAFF) {
2697 //FIXME try deblocking one mb at a time?
2698 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2699 const int mb_y = s->mb_y - 1;
2700 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2701 const int mb_xy= mb_x + mb_y*s->mb_stride;
2702 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2703 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2704 if (!bottom) return;
2705 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2706 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2707 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2709 if(IS_INTRA(mb_type_top | mb_type_bottom))
2710 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2712 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2716 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2717 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2718 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2719 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2720 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2723 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2724 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2725 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2726 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2727 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2729 tprintf(h->s.avctx, "call filter_mb\n");
2730 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2731 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2732 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2738 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2740 static void hl_decode_mb_simple(H264Context *h){
2741 hl_decode_mb_internal(h, 1);
2745 * Process a macroblock; this handles edge cases, such as interlacing.
2747 static void av_noinline hl_decode_mb_complex(H264Context *h){
2748 hl_decode_mb_internal(h, 0);
2751 static void hl_decode_mb(H264Context *h){
2752 MpegEncContext * const s = &h->s;
2753 const int mb_x= s->mb_x;
2754 const int mb_y= s->mb_y;
2755 const int mb_xy= mb_x + mb_y*s->mb_stride;
2756 const int mb_type= s->current_picture.mb_type[mb_xy];
2757 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2763 hl_decode_mb_complex(h);
2764 else hl_decode_mb_simple(h);
2767 static void pic_as_field(Picture *pic, const int parity){
2769 for (i = 0; i < 4; ++i) {
2770 if (parity == PICT_BOTTOM_FIELD)
2771 pic->data[i] += pic->linesize[i];
2772 pic->reference = parity;
2773 pic->linesize[i] *= 2;
2777 static int split_field_copy(Picture *dest, Picture *src,
2778 int parity, int id_add){
2779 int match = !!(src->reference & parity);
2783 pic_as_field(dest, parity);
2785 dest->pic_id += id_add;
2792 * Split one reference list into field parts, interleaving by parity
2793 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2794 * set to look at the actual start of data for that field.
2796 * @param dest output list
2797 * @param dest_len maximum number of fields to put in dest
2798 * @param src the source reference list containing fields and/or field pairs
2799 * (aka short_ref/long_ref, or
2800 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2801 * @param src_len number of Picture's in source (pairs and unmatched fields)
2802 * @param parity the parity of the picture being decoded/needing
2803 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2804 * @return number of fields placed in dest
2806 static int split_field_half_ref_list(Picture *dest, int dest_len,
2807 Picture *src, int src_len, int parity){
2808 int same_parity = 1;
2814 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2815 if (same_parity && same_i < src_len) {
2816 field_output = split_field_copy(dest + out_i, src + same_i,
2818 same_parity = !field_output;
2821 } else if (opp_i < src_len) {
2822 field_output = split_field_copy(dest + out_i, src + opp_i,
2823 PICT_FRAME - parity, 0);
2824 same_parity = field_output;
2836 * Split the reference frame list into a reference field list.
2837 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2838 * The input list contains both reference field pairs and
2839 * unmatched reference fields; it is ordered as spec describes
2840 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2841 * unmatched field pairs are also present. Conceptually this is equivalent
2842 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2844 * @param dest output reference list where ordered fields are to be placed
2845 * @param dest_len max number of fields to place at dest
2846 * @param src source reference list, as described above
2847 * @param src_len number of pictures (pairs and unmatched fields) in src
2848 * @param parity parity of field being currently decoded
2849 * (one of PICT_{TOP,BOTTOM}_FIELD)
2850 * @param long_i index into src array that holds first long reference picture,
2851 * or src_len if no long refs present.
2853 static int split_field_ref_list(Picture *dest, int dest_len,
2854 Picture *src, int src_len,
2855 int parity, int long_i){
2857 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2861 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2862 src_len - long_i, parity);
2867 * fills the default_ref_list.
2869 static int fill_default_ref_list(H264Context *h){
2870 MpegEncContext * const s = &h->s;
2872 int smallest_poc_greater_than_current = -1;
2874 Picture sorted_short_ref[32];
2875 Picture field_entry_list[2][32];
2876 Picture *frame_list[2];
2878 if (FIELD_PICTURE) {
2879 structure_sel = PICT_FRAME;
2880 frame_list[0] = field_entry_list[0];
2881 frame_list[1] = field_entry_list[1];
2884 frame_list[0] = h->default_ref_list[0];
2885 frame_list[1] = h->default_ref_list[1];
2888 if(h->slice_type==B_TYPE){
2895 /* sort frame according to poc in B slice */
2896 for(out_i=0; out_i<h->short_ref_count; out_i++){
2898 int best_poc=INT_MAX;
2900 for(i=0; i<h->short_ref_count; i++){
2901 const int poc= h->short_ref[i]->poc;
2902 if(poc > limit && poc < best_poc){
2908 assert(best_i != INT_MIN);
2911 sorted_short_ref[out_i]= *h->short_ref[best_i];
2912 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2913 if (-1 == smallest_poc_greater_than_current) {
2914 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2915 smallest_poc_greater_than_current = out_i;
2920 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2922 // find the largest poc
2923 for(list=0; list<2; list++){
2926 int step= list ? -1 : 1;
2928 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2930 while(j<0 || j>= h->short_ref_count){
2931 if(j != -99 && step == (list ? -1 : 1))
2934 j= smallest_poc_greater_than_current + (step>>1);
2936 sel = sorted_short_ref[j].reference | structure_sel;
2937 if(sel != PICT_FRAME) continue;
2938 frame_list[list][index ]= sorted_short_ref[j];
2939 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2941 short_len[list] = index;
2943 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2945 if(h->long_ref[i] == NULL) continue;
2946 sel = h->long_ref[i]->reference | structure_sel;
2947 if(sel != PICT_FRAME) continue;
2949 frame_list[ list ][index ]= *h->long_ref[i];
2950 frame_list[ list ][index++].pic_id= i;;
2954 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
2955 // swap the two first elements of L1 when
2956 // L0 and L1 are identical
2957 Picture temp= frame_list[1][0];
2958 frame_list[1][0] = frame_list[1][1];
2959 frame_list[1][1] = temp;
2964 for(list=0; list<2; list++){
2966 len[list] = split_field_ref_list(h->default_ref_list[list],
2970 s->picture_structure,
2973 if(len[list] < h->ref_count[ list ])
2974 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2981 for(i=0; i<h->short_ref_count; i++){
2983 sel = h->short_ref[i]->reference | structure_sel;
2984 if(sel != PICT_FRAME) continue;
2985 frame_list[0][index ]= *h->short_ref[i];
2986 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2989 for(i = 0; i < 16; i++){
2991 if(h->long_ref[i] == NULL) continue;
2992 sel = h->long_ref[i]->reference | structure_sel;
2993 if(sel != PICT_FRAME) continue;
2994 frame_list[0][index ]= *h->long_ref[i];
2995 frame_list[0][index++].pic_id= i;;
2999 index = split_field_ref_list(h->default_ref_list[0],
3000 h->ref_count[0], frame_list[0],
3001 index, s->picture_structure,
3004 if(index < h->ref_count[0])
3005 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3008 for (i=0; i<h->ref_count[0]; i++) {
3009 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3011 if(h->slice_type==B_TYPE){
3012 for (i=0; i<h->ref_count[1]; i++) {
3013 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3020 static void print_short_term(H264Context *h);
3021 static void print_long_term(H264Context *h);
3024 * Extract structure information about the picture described by pic_num in
3025 * the current decoding context (frame or field). Note that pic_num is
3026 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3027 * @param pic_num picture number for which to extract structure information
3028 * @param structure one of PICT_XXX describing structure of picture
3030 * @return frame number (short term) or long term index of picture
3031 * described by pic_num
3033 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3034 MpegEncContext * const s = &h->s;
3036 *structure = s->picture_structure;
3039 /* opposite field */
3040 *structure ^= PICT_FRAME;
3047 static int decode_ref_pic_list_reordering(H264Context *h){
3048 MpegEncContext * const s = &h->s;
3049 int list, index, pic_structure;
3051 print_short_term(h);
3053 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3055 for(list=0; list<h->list_count; list++){
3056 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3058 if(get_bits1(&s->gb)){
3059 int pred= h->curr_pic_num;
3061 for(index=0; ; index++){
3062 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3063 unsigned int pic_id;
3065 Picture *ref = NULL;
3067 if(reordering_of_pic_nums_idc==3)
3070 if(index >= h->ref_count[list]){
3071 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3075 if(reordering_of_pic_nums_idc<3){
3076 if(reordering_of_pic_nums_idc<2){
3077 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3080 if(abs_diff_pic_num > h->max_pic_num){
3081 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3085 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3086 else pred+= abs_diff_pic_num;
3087 pred &= h->max_pic_num - 1;
3089 frame_num = pic_num_extract(h, pred, &pic_structure);
3091 for(i= h->short_ref_count-1; i>=0; i--){
3092 ref = h->short_ref[i];
3093 assert(ref->reference);
3094 assert(!ref->long_ref);
3095 if(ref->data[0] != NULL &&
3096 ref->frame_num == frame_num &&
3097 (ref->reference & pic_structure) &&
3098 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3105 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3107 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3110 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3113 ref = h->long_ref[long_idx];
3114 assert(!(ref && !ref->reference));
3115 if(ref && (ref->reference & pic_structure)){
3116 ref->pic_id= pic_id;
3117 assert(ref->long_ref);
3125 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3126 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3128 for(i=index; i+1<h->ref_count[list]; i++){
3129 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3132 for(; i > index; i--){
3133 h->ref_list[list][i]= h->ref_list[list][i-1];
3135 h->ref_list[list][index]= *ref;
3137 pic_as_field(&h->ref_list[list][index], pic_structure);
3141 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3147 for(list=0; list<h->list_count; list++){
3148 for(index= 0; index < h->ref_count[list]; index++){
3149 if(!h->ref_list[list][index].data[0])
3150 h->ref_list[list][index]= s->current_picture;
3154 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3155 direct_dist_scale_factor(h);
3156 direct_ref_list_init(h);
3160 static void fill_mbaff_ref_list(H264Context *h){
3162 for(list=0; list<2; list++){ //FIXME try list_count
3163 for(i=0; i<h->ref_count[list]; i++){
3164 Picture *frame = &h->ref_list[list][i];
3165 Picture *field = &h->ref_list[list][16+2*i];
3168 field[0].linesize[j] <<= 1;
3169 field[0].reference = PICT_TOP_FIELD;
3170 field[1] = field[0];
3172 field[1].data[j] += frame->linesize[j];
3173 field[1].reference = PICT_BOTTOM_FIELD;
3175 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3176 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3178 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3179 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3183 for(j=0; j<h->ref_count[1]; j++){
3184 for(i=0; i<h->ref_count[0]; i++)
3185 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3186 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3187 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3191 static int pred_weight_table(H264Context *h){
3192 MpegEncContext * const s = &h->s;
3194 int luma_def, chroma_def;
3197 h->use_weight_chroma= 0;
3198 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3199 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3200 luma_def = 1<<h->luma_log2_weight_denom;
3201 chroma_def = 1<<h->chroma_log2_weight_denom;
3203 for(list=0; list<2; list++){
3204 for(i=0; i<h->ref_count[list]; i++){
3205 int luma_weight_flag, chroma_weight_flag;
3207 luma_weight_flag= get_bits1(&s->gb);
3208 if(luma_weight_flag){
3209 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3210 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3211 if( h->luma_weight[list][i] != luma_def
3212 || h->luma_offset[list][i] != 0)
3215 h->luma_weight[list][i]= luma_def;
3216 h->luma_offset[list][i]= 0;
3219 chroma_weight_flag= get_bits1(&s->gb);
3220 if(chroma_weight_flag){
3223 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3224 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3225 if( h->chroma_weight[list][i][j] != chroma_def
3226 || h->chroma_offset[list][i][j] != 0)
3227 h->use_weight_chroma= 1;
3232 h->chroma_weight[list][i][j]= chroma_def;
3233 h->chroma_offset[list][i][j]= 0;
3237 if(h->slice_type != B_TYPE) break;
3239 h->use_weight= h->use_weight || h->use_weight_chroma;
3243 static void implicit_weight_table(H264Context *h){
3244 MpegEncContext * const s = &h->s;
3246 int cur_poc = s->current_picture_ptr->poc;
3248 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3249 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3251 h->use_weight_chroma= 0;
3256 h->use_weight_chroma= 2;
3257 h->luma_log2_weight_denom= 5;
3258 h->chroma_log2_weight_denom= 5;
3260 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3261 int poc0 = h->ref_list[0][ref0].poc;
3262 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3263 int poc1 = h->ref_list[1][ref1].poc;
3264 int td = av_clip(poc1 - poc0, -128, 127);
3266 int tb = av_clip(cur_poc - poc0, -128, 127);
3267 int tx = (16384 + (FFABS(td) >> 1)) / td;
3268 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3269 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3270 h->implicit_weight[ref0][ref1] = 32;
3272 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3274 h->implicit_weight[ref0][ref1] = 32;
3280 * Mark a picture as no longer needed for reference. The refmask
3281 * argument allows unreferencing of individual fields or the whole frame.
3282 * If the picture becomes entirely unreferenced, but is being held for
3283 * display purposes, it is marked as such.
3284 * @param refmask mask of fields to unreference; the mask is bitwise
3285 * anded with the reference marking of pic
3286 * @return non-zero if pic becomes entirely unreferenced (except possibly
3287 * for display purposes) zero if one of the fields remains in
3290 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3292 if (pic->reference &= refmask) {
3295 if(pic == h->delayed_output_pic)
3296 pic->reference=DELAYED_PIC_REF;
3298 for(i = 0; h->delayed_pic[i]; i++)
3299 if(pic == h->delayed_pic[i]){
3300 pic->reference=DELAYED_PIC_REF;
3309 * instantaneous decoder refresh.
3311 static void idr(H264Context *h){
3314 for(i=0; i<16; i++){
3315 if (h->long_ref[i] != NULL) {
3316 unreference_pic(h, h->long_ref[i], 0);
3317 h->long_ref[i]= NULL;
3320 h->long_ref_count=0;
3322 for(i=0; i<h->short_ref_count; i++){
3323 unreference_pic(h, h->short_ref[i], 0);
3324 h->short_ref[i]= NULL;
3326 h->short_ref_count=0;
3329 /* forget old pics after a seek */
3330 static void flush_dpb(AVCodecContext *avctx){
3331 H264Context *h= avctx->priv_data;
3333 for(i=0; i<16; i++) {
3334 if(h->delayed_pic[i])
3335 h->delayed_pic[i]->reference= 0;
3336 h->delayed_pic[i]= NULL;
3338 if(h->delayed_output_pic)
3339 h->delayed_output_pic->reference= 0;
3340 h->delayed_output_pic= NULL;
3342 if(h->s.current_picture_ptr)
3343 h->s.current_picture_ptr->reference= 0;
3344 h->s.first_field= 0;
3348 * Find a Picture in the short term reference list by frame number.
3349 * @param frame_num frame number to search for
3350 * @param idx the index into h->short_ref where returned picture is found
3351 * undefined if no picture found.
3352 * @return pointer to the found picture, or NULL if no pic with the provided
3353 * frame number is found
3355 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3356 MpegEncContext * const s = &h->s;
3359 for(i=0; i<h->short_ref_count; i++){
3360 Picture *pic= h->short_ref[i];
3361 if(s->avctx->debug&FF_DEBUG_MMCO)
3362 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3363 if(pic->frame_num == frame_num) {
3372 * Remove a picture from the short term reference list by its index in
3373 * that list. This does no checking on the provided index; it is assumed
3374 * to be valid. Other list entries are shifted down.
3375 * @param i index into h->short_ref of picture to remove.
3377 static void remove_short_at_index(H264Context *h, int i){
3378 assert(i > 0 && i < h->short_ref_count);
3379 h->short_ref[i]= NULL;
3380 if (--h->short_ref_count)
3381 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3386 * @return the removed picture or NULL if an error occurs
3388 static Picture * remove_short(H264Context *h, int frame_num){
3389 MpegEncContext * const s = &h->s;
3393 if(s->avctx->debug&FF_DEBUG_MMCO)
3394 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3396 pic = find_short(h, frame_num, &i);
3398 remove_short_at_index(h, i);
3404 * Remove a picture from the long term reference list by its index in
3405 * that list. This does no checking on the provided index; it is assumed
3406 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3407 * @param i index into h->long_ref of picture to remove.
3409 static void remove_long_at_index(H264Context *h, int i){
3410 h->long_ref[i]= NULL;
3411 h->long_ref_count--;
3416 * @return the removed picture or NULL if an error occurs
3418 static Picture * remove_long(H264Context *h, int i){
3421 pic= h->long_ref[i];
3423 remove_long_at_index(h, i);
3429 * print short term list
3431 static void print_short_term(H264Context *h) {
3433 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3434 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3435 for(i=0; i<h->short_ref_count; i++){
3436 Picture *pic= h->short_ref[i];
3437 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3443 * print long term list
3445 static void print_long_term(H264Context *h) {
3447 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3448 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3449 for(i = 0; i < 16; i++){
3450 Picture *pic= h->long_ref[i];
3452 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3459 * Executes the reference picture marking (memory management control operations).
3461 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3462 MpegEncContext * const s = &h->s;
3464 int current_ref_assigned=0;
3467 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3468 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3470 for(i=0; i<mmco_count; i++){
3471 int structure, frame_num, unref_pic;
3472 if(s->avctx->debug&FF_DEBUG_MMCO)
3473 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3475 switch(mmco[i].opcode){
3476 case MMCO_SHORT2UNUSED:
3477 if(s->avctx->debug&FF_DEBUG_MMCO)
3478 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3479 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3480 pic = find_short(h, frame_num, &j);
3482 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3483 remove_short_at_index(h, j);
3484 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3485 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3487 case MMCO_SHORT2LONG:
3488 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3489 h->long_ref[mmco[i].long_arg]->frame_num ==
3490 mmco[i].short_pic_num / 2) {
3491 /* do nothing, we've already moved this field pair. */
3493 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3495 pic= remove_long(h, mmco[i].long_arg);
3496 if(pic) unreference_pic(h, pic, 0);
3498 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3499 if (h->long_ref[ mmco[i].long_arg ]){
3500 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3501 h->long_ref_count++;
3505 case MMCO_LONG2UNUSED:
3506 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3507 pic = h->long_ref[j];
3509 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3510 remove_long_at_index(h, j);
3511 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3512 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3516 if (FIELD_PICTURE && !s->first_field) {
3517 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3518 /* Just mark second field as referenced */
3520 } else if (s->current_picture_ptr->reference) {
3521 /* First field in pair is in short term list or
3522 * at a different long term index.
3523 * This is not allowed; see 7.4.3, notes 2 and 3.
3524 * Report the problem and keep the pair where it is,
3525 * and mark this field valid.
3527 av_log(h->s.avctx, AV_LOG_ERROR,
3528 "illegal long term reference assignment for second "
3529 "field in complementary field pair (first field is "
3530 "short term or has non-matching long index)\n");
3536 pic= remove_long(h, mmco[i].long_arg);
3537 if(pic) unreference_pic(h, pic, 0);
3539 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3540 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3541 h->long_ref_count++;
3544 s->current_picture_ptr->reference |= s->picture_structure;
3545 current_ref_assigned=1;
3547 case MMCO_SET_MAX_LONG:
3548 assert(mmco[i].long_arg <= 16);
3549 // just remove the long term which index is greater than new max
3550 for(j = mmco[i].long_arg; j<16; j++){
3551 pic = remove_long(h, j);
3552 if (pic) unreference_pic(h, pic, 0);
3556 while(h->short_ref_count){
3557 pic= remove_short(h, h->short_ref[0]->frame_num);
3558 if(pic) unreference_pic(h, pic, 0);
3560 for(j = 0; j < 16; j++) {
3561 pic= remove_long(h, j);
3562 if(pic) unreference_pic(h, pic, 0);
3569 if (!current_ref_assigned && FIELD_PICTURE &&
3570 !s->first_field && s->current_picture_ptr->reference) {
3572 /* Second field of complementary field pair; the first field of
3573 * which is already referenced. If short referenced, it
3574 * should be first entry in short_ref. If not, it must exist
3575 * in long_ref; trying to put it on the short list here is an
3576 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3578 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3579 /* Just mark the second field valid */
3580 s->current_picture_ptr->reference = PICT_FRAME;
3581 } else if (s->current_picture_ptr->long_ref) {
3582 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3583 "assignment for second field "
3584 "in complementary field pair "
3585 "(first field is long term)\n");
3588 * First field in reference, but not in any sensible place on our
3589 * reference lists. This shouldn't happen unless reference
3590 * handling somewhere else is wrong.
3594 current_ref_assigned = 1;
3597 if(!current_ref_assigned){
3598 pic= remove_short(h, s->current_picture_ptr->frame_num);
3600 unreference_pic(h, pic, 0);
3601 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3604 if(h->short_ref_count)
3605 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3607 h->short_ref[0]= s->current_picture_ptr;
3608 h->short_ref[0]->long_ref=0;
3609 h->short_ref_count++;
3610 s->current_picture_ptr->reference |= s->picture_structure;
3613 print_short_term(h);
3618 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3619 MpegEncContext * const s = &h->s;
3622 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3623 s->broken_link= get_bits1(gb) -1;
3624 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3625 if(h->mmco[0].long_arg == -1)
3628 h->mmco[0].opcode= MMCO_LONG;
3632 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3633 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3634 MMCOOpcode opcode= get_ue_golomb(gb);
3636 h->mmco[i].opcode= opcode;
3637 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3638 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3639 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3640 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3644 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3645 unsigned int long_arg= get_ue_golomb(gb);
3646 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3647 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3650 h->mmco[i].long_arg= long_arg;
3653 if(opcode > (unsigned)MMCO_LONG){
3654 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3657 if(opcode == MMCO_END)
3662 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3664 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3665 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3666 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3667 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3669 if (FIELD_PICTURE) {
3670 h->mmco[0].short_pic_num *= 2;
3671 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3672 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3683 static int init_poc(H264Context *h){
3684 MpegEncContext * const s = &h->s;
3685 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3688 if(h->nal_unit_type == NAL_IDR_SLICE){
3689 h->frame_num_offset= 0;
3691 if(h->frame_num < h->prev_frame_num)
3692 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3694 h->frame_num_offset= h->prev_frame_num_offset;
3697 if(h->sps.poc_type==0){
3698 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3700 if(h->nal_unit_type == NAL_IDR_SLICE){
3705 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3706 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3707 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3708 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3710 h->poc_msb = h->prev_poc_msb;
3711 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3713 field_poc[1] = h->poc_msb + h->poc_lsb;
3714 if(s->picture_structure == PICT_FRAME)
3715 field_poc[1] += h->delta_poc_bottom;
3716 }else if(h->sps.poc_type==1){
3717 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3720 if(h->sps.poc_cycle_length != 0)
3721 abs_frame_num = h->frame_num_offset + h->frame_num;
3725 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3728 expected_delta_per_poc_cycle = 0;
3729 for(i=0; i < h->sps.poc_cycle_length; i++)
3730 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3732 if(abs_frame_num > 0){
3733 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3734 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3736 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3737 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3738 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3742 if(h->nal_ref_idc == 0)
3743 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3745 field_poc[0] = expectedpoc + h->delta_poc[0];
3746 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3748 if(s->picture_structure == PICT_FRAME)
3749 field_poc[1] += h->delta_poc[1];
3752 if(h->nal_unit_type == NAL_IDR_SLICE){
3755 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3756 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3762 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3763 s->current_picture_ptr->field_poc[0]= field_poc[0];
3764 s->current_picture_ptr->poc = field_poc[0];
3766 if(s->picture_structure != PICT_TOP_FIELD) {
3767 s->current_picture_ptr->field_poc[1]= field_poc[1];
3768 s->current_picture_ptr->poc = field_poc[1];
3770 if(!FIELD_PICTURE || !s->first_field)
3771 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
3778 * initialize scan tables
3780 static void init_scan_tables(H264Context *h){
3781 MpegEncContext * const s = &h->s;
3783 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3784 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3785 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3787 for(i=0; i<16; i++){
3788 #define T(x) (x>>2) | ((x<<2) & 0xF)
3789 h->zigzag_scan[i] = T(zigzag_scan[i]);
3790 h-> field_scan[i] = T( field_scan[i]);
3794 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3795 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3796 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3797 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3798 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3800 for(i=0; i<64; i++){
3801 #define T(x) (x>>3) | ((x&7)<<3)
3802 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3803 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3804 h->field_scan8x8[i] = T(field_scan8x8[i]);
3805 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3809 if(h->sps.transform_bypass){ //FIXME same ugly
3810 h->zigzag_scan_q0 = zigzag_scan;
3811 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3812 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3813 h->field_scan_q0 = field_scan;
3814 h->field_scan8x8_q0 = field_scan8x8;
3815 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3817 h->zigzag_scan_q0 = h->zigzag_scan;
3818 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3819 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3820 h->field_scan_q0 = h->field_scan;
3821 h->field_scan8x8_q0 = h->field_scan8x8;
3822 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3827 * Replicates H264 "master" context to thread contexts.
3829 static void clone_slice(H264Context *dst, H264Context *src)
3831 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3832 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3833 dst->s.current_picture = src->s.current_picture;
3834 dst->s.linesize = src->s.linesize;
3835 dst->s.uvlinesize = src->s.uvlinesize;
3836 dst->s.first_field = src->s.first_field;
3838 dst->prev_poc_msb = src->prev_poc_msb;
3839 dst->prev_poc_lsb = src->prev_poc_lsb;
3840 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3841 dst->prev_frame_num = src->prev_frame_num;
3842 dst->short_ref_count = src->short_ref_count;
3844 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3845 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3846 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3847 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3849 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3850 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3854 * decodes a slice header.
3855 * this will allso call MPV_common_init() and frame_start() as needed
3857 * @param h h264context
3858 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3860 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3862 static int decode_slice_header(H264Context *h, H264Context *h0){
3863 MpegEncContext * const s = &h->s;
3864 MpegEncContext * const s0 = &h0->s;
3865 unsigned int first_mb_in_slice;
3866 unsigned int pps_id;
3867 int num_ref_idx_active_override_flag;
3868 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3869 unsigned int slice_type, tmp, i;
3870 int default_ref_list_done = 0;
3871 int last_pic_structure;
3873 s->dropable= h->nal_ref_idc == 0;
3875 first_mb_in_slice= get_ue_golomb(&s->gb);
3877 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3878 h0->current_slice = 0;
3879 if (!s0->first_field)
3880 s->current_picture_ptr= NULL;
3883 slice_type= get_ue_golomb(&s->gb);
3885 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3890 h->slice_type_fixed=1;
3892 h->slice_type_fixed=0;
3894 slice_type= slice_type_map[ slice_type ];
3895 if (slice_type == I_TYPE
3896 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3897 default_ref_list_done = 1;
3899 h->slice_type= slice_type;
3901 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3903 pps_id= get_ue_golomb(&s->gb);
3904 if(pps_id>=MAX_PPS_COUNT){
3905 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3908 if(!h0->pps_buffers[pps_id]) {
3909 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3912 h->pps= *h0->pps_buffers[pps_id];
3914 if(!h0->sps_buffers[h->pps.sps_id]) {
3915 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3918 h->sps = *h0->sps_buffers[h->pps.sps_id];
3920 if(h == h0 && h->dequant_coeff_pps != pps_id){
3921 h->dequant_coeff_pps = pps_id;
3922 init_dequant_tables(h);
3925 s->mb_width= h->sps.mb_width;
3926 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3928 h->b_stride= s->mb_width*4;
3929 h->b8_stride= s->mb_width*2;
3931 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3932 if(h->sps.frame_mbs_only_flag)
3933 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3935 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3937 if (s->context_initialized
3938 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3940 return -1; // width / height changed during parallelized decoding
3944 if (!s->context_initialized) {
3946 return -1; // we cant (re-)initialize context during parallel decoding
3947 if (MPV_common_init(s) < 0)
3951 init_scan_tables(h);
3954 for(i = 1; i < s->avctx->thread_count; i++) {
3956 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3957 memcpy(c, h, sizeof(MpegEncContext));
3958 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3961 init_scan_tables(c);
3965 for(i = 0; i < s->avctx->thread_count; i++)
3966 if(context_init(h->thread_context[i]) < 0)
3969 s->avctx->width = s->width;
3970 s->avctx->height = s->height;
3971 s->avctx->sample_aspect_ratio= h->sps.sar;
3972 if(!s->avctx->sample_aspect_ratio.den)
3973 s->avctx->sample_aspect_ratio.den = 1;
3975 if(h->sps.timing_info_present_flag){
3976 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3977 if(h->x264_build > 0 && h->x264_build < 44)
3978 s->avctx->time_base.den *= 2;
3979 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3980 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3984 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3987 h->mb_aff_frame = 0;
3988 last_pic_structure = s0->picture_structure;
3989 if(h->sps.frame_mbs_only_flag){
3990 s->picture_structure= PICT_FRAME;
3992 if(get_bits1(&s->gb)) { //field_pic_flag
3993 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3995 s->picture_structure= PICT_FRAME;
3996 h->mb_aff_frame = h->sps.mb_aff;
4000 if(h0->current_slice == 0){
4001 /* See if we have a decoded first field looking for a pair... */
4002 if (s0->first_field) {
4003 assert(s0->current_picture_ptr);
4004 assert(s0->current_picture_ptr->data[0]);
4005 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4007 /* figure out if we have a complementary field pair */
4008 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4010 * Previous field is unmatched. Don't display it, but let it
4011 * remain for reference if marked as such.
4013 s0->current_picture_ptr = NULL;
4014 s0->first_field = FIELD_PICTURE;
4017 if (h->nal_ref_idc &&
4018 s0->current_picture_ptr->reference &&
4019 s0->current_picture_ptr->frame_num != h->frame_num) {
4021 * This and previous field were reference, but had
4022 * different frame_nums. Consider this field first in
4023 * pair. Throw away previous field except for reference
4026 s0->first_field = 1;
4027 s0->current_picture_ptr = NULL;
4030 /* Second field in complementary pair */
4031 s0->first_field = 0;
4036 /* Frame or first field in a potentially complementary pair */
4037 assert(!s0->current_picture_ptr);
4038 s0->first_field = FIELD_PICTURE;
4041 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4042 s0->first_field = 0;
4049 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4051 assert(s->mb_num == s->mb_width * s->mb_height);
4052 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4053 first_mb_in_slice >= s->mb_num){
4054 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4057 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4058 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4059 if (s->picture_structure == PICT_BOTTOM_FIELD)
4060 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4061 assert(s->mb_y < s->mb_height);
4063 if(s->picture_structure==PICT_FRAME){
4064 h->curr_pic_num= h->frame_num;
4065 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4067 h->curr_pic_num= 2*h->frame_num + 1;
4068 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4071 if(h->nal_unit_type == NAL_IDR_SLICE){
4072 get_ue_golomb(&s->gb); /* idr_pic_id */
4075 if(h->sps.poc_type==0){
4076 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4078 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4079 h->delta_poc_bottom= get_se_golomb(&s->gb);
4083 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4084 h->delta_poc[0]= get_se_golomb(&s->gb);
4086 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4087 h->delta_poc[1]= get_se_golomb(&s->gb);
4092 if(h->pps.redundant_pic_cnt_present){
4093 h->redundant_pic_count= get_ue_golomb(&s->gb);
4096 //set defaults, might be overriden a few line later
4097 h->ref_count[0]= h->pps.ref_count[0];
4098 h->ref_count[1]= h->pps.ref_count[1];
4100 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4101 if(h->slice_type == B_TYPE){
4102 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4103 if(FIELD_OR_MBAFF_PICTURE && h->direct_spatial_mv_pred)
4104 av_log(h->s.avctx, AV_LOG_ERROR, "Interlaced pictures + spatial direct mode is not implemented\n");
4106 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4108 if(num_ref_idx_active_override_flag){
4109 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4110 if(h->slice_type==B_TYPE)
4111 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4113 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4114 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4115 h->ref_count[0]= h->ref_count[1]= 1;
4119 if(h->slice_type == B_TYPE)
4126 if(!default_ref_list_done){
4127 fill_default_ref_list(h);
4130 if(decode_ref_pic_list_reordering(h) < 0)
4133 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4134 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4135 pred_weight_table(h);
4136 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4137 implicit_weight_table(h);
4142 decode_ref_pic_marking(h0, &s->gb);
4145 fill_mbaff_ref_list(h);
4147 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4148 tmp = get_ue_golomb(&s->gb);
4150 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4153 h->cabac_init_idc= tmp;
4156 h->last_qscale_diff = 0;
4157 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4159 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4163 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4164 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4165 //FIXME qscale / qp ... stuff
4166 if(h->slice_type == SP_TYPE){
4167 get_bits1(&s->gb); /* sp_for_switch_flag */
4169 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4170 get_se_golomb(&s->gb); /* slice_qs_delta */
4173 h->deblocking_filter = 1;
4174 h->slice_alpha_c0_offset = 0;
4175 h->slice_beta_offset = 0;
4176 if( h->pps.deblocking_filter_parameters_present ) {
4177 tmp= get_ue_golomb(&s->gb);
4179 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4182 h->deblocking_filter= tmp;
4183 if(h->deblocking_filter < 2)
4184 h->deblocking_filter^= 1; // 1<->0
4186 if( h->deblocking_filter ) {
4187 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4188 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4192 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4193 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4194 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4195 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4196 h->deblocking_filter= 0;
4198 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4199 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4200 /* Cheat slightly for speed:
4201 Dont bother to deblock across slices */
4202 h->deblocking_filter = 2;
4204 h0->max_contexts = 1;
4205 if(!h0->single_decode_warning) {
4206 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4207 h0->single_decode_warning = 1;
4210 return 1; // deblocking switched inside frame
4215 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4216 slice_group_change_cycle= get_bits(&s->gb, ?);
4219 h0->last_slice_type = slice_type;
4220 h->slice_num = ++h0->current_slice;
4222 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4223 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4225 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4226 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4228 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4230 av_get_pict_type_char(h->slice_type),
4231 pps_id, h->frame_num,
4232 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4233 h->ref_count[0], h->ref_count[1],
4235 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4237 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4241 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
4242 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4243 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4245 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4246 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4255 static inline int get_level_prefix(GetBitContext *gb){
4259 OPEN_READER(re, gb);
4260 UPDATE_CACHE(re, gb);
4261 buf=GET_CACHE(re, gb);
4263 log= 32 - av_log2(buf);
4265 print_bin(buf>>(32-log), log);
4266 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4269 LAST_SKIP_BITS(re, gb, log);
4270 CLOSE_READER(re, gb);
4275 static inline int get_dct8x8_allowed(H264Context *h){
4278 if(!IS_SUB_8X8(h->sub_mb_type[i])
4279 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4286 * decodes a residual block.
4287 * @param n block index
4288 * @param scantable scantable
4289 * @param max_coeff number of coefficients in the block
4290 * @return <0 if an error occured
4292 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4293 MpegEncContext * const s = &h->s;
4294 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4296 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4298 //FIXME put trailing_onex into the context
4300 if(n == CHROMA_DC_BLOCK_INDEX){
4301 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4302 total_coeff= coeff_token>>2;
4304 if(n == LUMA_DC_BLOCK_INDEX){
4305 total_coeff= pred_non_zero_count(h, 0);
4306 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4307 total_coeff= coeff_token>>2;
4309 total_coeff= pred_non_zero_count(h, n);
4310 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4311 total_coeff= coeff_token>>2;
4312 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4316 //FIXME set last_non_zero?
4320 if(total_coeff > (unsigned)max_coeff) {
4321 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4325 trailing_ones= coeff_token&3;
4326 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4327 assert(total_coeff<=16);
4329 for(i=0; i<trailing_ones; i++){
4330 level[i]= 1 - 2*get_bits1(gb);
4334 int level_code, mask;
4335 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4336 int prefix= get_level_prefix(gb);
4338 //first coefficient has suffix_length equal to 0 or 1
4339 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4341 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4343 level_code= (prefix<<suffix_length); //part
4344 }else if(prefix==14){
4346 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4348 level_code= prefix + get_bits(gb, 4); //part
4349 }else if(prefix==15){
4350 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4351 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4353 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4357 if(trailing_ones < 3) level_code += 2;
4362 mask= -(level_code&1);
4363 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4366 //remaining coefficients have suffix_length > 0
4367 for(;i<total_coeff;i++) {
4368 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4369 prefix = get_level_prefix(gb);
4371 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4372 }else if(prefix==15){
4373 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4375 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4378 mask= -(level_code&1);
4379 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4380 if(level_code > suffix_limit[suffix_length])
4385 if(total_coeff == max_coeff)
4388 if(n == CHROMA_DC_BLOCK_INDEX)
4389 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4391 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4394 coeff_num = zeros_left + total_coeff - 1;
4395 j = scantable[coeff_num];
4397 block[j] = level[0];
4398 for(i=1;i<total_coeff;i++) {
4401 else if(zeros_left < 7){
4402 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4404 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4406 zeros_left -= run_before;
4407 coeff_num -= 1 + run_before;
4408 j= scantable[ coeff_num ];
4413 block[j] = (level[0] * qmul[j] + 32)>>6;
4414 for(i=1;i<total_coeff;i++) {
4417 else if(zeros_left < 7){
4418 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4420 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4422 zeros_left -= run_before;
4423 coeff_num -= 1 + run_before;
4424 j= scantable[ coeff_num ];
4426 block[j]= (level[i] * qmul[j] + 32)>>6;
4431 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4438 static void predict_field_decoding_flag(H264Context *h){
4439 MpegEncContext * const s = &h->s;
4440 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4441 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4442 ? s->current_picture.mb_type[mb_xy-1]
4443 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4444 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4446 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4450 * decodes a P_SKIP or B_SKIP macroblock
4452 static void decode_mb_skip(H264Context *h){
4453 MpegEncContext * const s = &h->s;
4454 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4457 memset(h->non_zero_count[mb_xy], 0, 16);
4458 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4461 mb_type|= MB_TYPE_INTERLACED;
4463 if( h->slice_type == B_TYPE )
4465 // just for fill_caches. pred_direct_motion will set the real mb_type
4466 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4468 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4469 pred_direct_motion(h, &mb_type);
4470 mb_type|= MB_TYPE_SKIP;
4475 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4477 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4478 pred_pskip_motion(h, &mx, &my);
4479 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4480 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4483 write_back_motion(h, mb_type);
4484 s->current_picture.mb_type[mb_xy]= mb_type;
4485 s->current_picture.qscale_table[mb_xy]= s->qscale;
4486 h->slice_table[ mb_xy ]= h->slice_num;
4487 h->prev_mb_skipped= 1;
4491 * decodes a macroblock
4492 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4494 static int decode_mb_cavlc(H264Context *h){
4495 MpegEncContext * const s = &h->s;
4496 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4497 int partition_count;
4498 unsigned int mb_type, cbp;
4499 int dct8x8_allowed= h->pps.transform_8x8_mode;
4501 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4503 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4504 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4506 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4507 if(s->mb_skip_run==-1)
4508 s->mb_skip_run= get_ue_golomb(&s->gb);
4510 if (s->mb_skip_run--) {
4511 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4512 if(s->mb_skip_run==0)
4513 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4515 predict_field_decoding_flag(h);
4522 if( (s->mb_y&1) == 0 )
4523 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4525 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4527 h->prev_mb_skipped= 0;
4529 mb_type= get_ue_golomb(&s->gb);
4530 if(h->slice_type == B_TYPE){
4532 partition_count= b_mb_type_info[mb_type].partition_count;
4533 mb_type= b_mb_type_info[mb_type].type;
4536 goto decode_intra_mb;
4538 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4540 partition_count= p_mb_type_info[mb_type].partition_count;
4541 mb_type= p_mb_type_info[mb_type].type;
4544 goto decode_intra_mb;
4547 assert(h->slice_type == I_TYPE);
4550 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4554 cbp= i_mb_type_info[mb_type].cbp;
4555 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4556 mb_type= i_mb_type_info[mb_type].type;
4560 mb_type |= MB_TYPE_INTERLACED;
4562 h->slice_table[ mb_xy ]= h->slice_num;
4564 if(IS_INTRA_PCM(mb_type)){
4567 // We assume these blocks are very rare so we do not optimize it.
4568 align_get_bits(&s->gb);
4570 // The pixels are stored in the same order as levels in h->mb array.
4571 for(y=0; y<16; y++){
4572 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4573 for(x=0; x<16; x++){
4574 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4575 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4579 const int index= 256 + 4*(y&3) + 32*(y>>2);
4581 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4582 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4586 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4588 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4589 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4593 // In deblocking, the quantizer is 0
4594 s->current_picture.qscale_table[mb_xy]= 0;
4595 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4596 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4597 // All coeffs are present
4598 memset(h->non_zero_count[mb_xy], 16, 16);
4600 s->current_picture.mb_type[mb_xy]= mb_type;
4605 h->ref_count[0] <<= 1;
4606 h->ref_count[1] <<= 1;
4609 fill_caches(h, mb_type, 0);
4612 if(IS_INTRA(mb_type)){
4614 // init_top_left_availability(h);
4615 if(IS_INTRA4x4(mb_type)){
4618 if(dct8x8_allowed && get_bits1(&s->gb)){
4619 mb_type |= MB_TYPE_8x8DCT;
4623 // fill_intra4x4_pred_table(h);
4624 for(i=0; i<16; i+=di){
4625 int mode= pred_intra_mode(h, i);
4627 if(!get_bits1(&s->gb)){
4628 const int rem_mode= get_bits(&s->gb, 3);
4629 mode = rem_mode + (rem_mode >= mode);
4633 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4635 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4637 write_back_intra_pred_mode(h);
4638 if( check_intra4x4_pred_mode(h) < 0)
4641 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4642 if(h->intra16x16_pred_mode < 0)
4646 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4649 h->chroma_pred_mode= pred_mode;
4650 }else if(partition_count==4){
4651 int i, j, sub_partition_count[4], list, ref[2][4];
4653 if(h->slice_type == B_TYPE){
4655 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4656 if(h->sub_mb_type[i] >=13){
4657 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4660 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4661 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4663 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4664 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4665 pred_direct_motion(h, &mb_type);
4666 h->ref_cache[0][scan8[4]] =
4667 h->ref_cache[1][scan8[4]] =
4668 h->ref_cache[0][scan8[12]] =
4669 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4672 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4674 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4675 if(h->sub_mb_type[i] >=4){
4676 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4679 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4680 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4684 for(list=0; list<h->list_count; list++){
4685 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4687 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4688 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4689 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4691 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4703 dct8x8_allowed = get_dct8x8_allowed(h);
4705 for(list=0; list<h->list_count; list++){
4707 if(IS_DIRECT(h->sub_mb_type[i])) {
4708 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4711 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4712 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4714 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4715 const int sub_mb_type= h->sub_mb_type[i];
4716 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4717 for(j=0; j<sub_partition_count[i]; j++){
4719 const int index= 4*i + block_width*j;
4720 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4721 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4722 mx += get_se_golomb(&s->gb);
4723 my += get_se_golomb(&s->gb);
4724 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4726 if(IS_SUB_8X8(sub_mb_type)){
4728 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4730 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4731 }else if(IS_SUB_8X4(sub_mb_type)){
4732 mv_cache[ 1 ][0]= mx;
4733 mv_cache[ 1 ][1]= my;
4734 }else if(IS_SUB_4X8(sub_mb_type)){
4735 mv_cache[ 8 ][0]= mx;
4736 mv_cache[ 8 ][1]= my;
4738 mv_cache[ 0 ][0]= mx;
4739 mv_cache[ 0 ][1]= my;
4742 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4748 }else if(IS_DIRECT(mb_type)){
4749 pred_direct_motion(h, &mb_type);
4750 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4752 int list, mx, my, i;
4753 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4754 if(IS_16X16(mb_type)){
4755 for(list=0; list<h->list_count; list++){
4757 if(IS_DIR(mb_type, 0, list)){
4758 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4759 if(val >= h->ref_count[list]){
4760 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4764 val= LIST_NOT_USED&0xFF;
4765 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4767 for(list=0; list<h->list_count; list++){
4769 if(IS_DIR(mb_type, 0, list)){
4770 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4771 mx += get_se_golomb(&s->gb);
4772 my += get_se_golomb(&s->gb);
4773 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4775 val= pack16to32(mx,my);
4778 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4781 else if(IS_16X8(mb_type)){
4782 for(list=0; list<h->list_count; list++){
4785 if(IS_DIR(mb_type, i, list)){
4786 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4787 if(val >= h->ref_count[list]){
4788 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4792 val= LIST_NOT_USED&0xFF;
4793 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4796 for(list=0; list<h->list_count; list++){
4799 if(IS_DIR(mb_type, i, list)){
4800 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4801 mx += get_se_golomb(&s->gb);
4802 my += get_se_golomb(&s->gb);
4803 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4805 val= pack16to32(mx,my);
4808 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4812 assert(IS_8X16(mb_type));
4813 for(list=0; list<h->list_count; list++){
4816 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4817 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4818 if(val >= h->ref_count[list]){
4819 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4823 val= LIST_NOT_USED&0xFF;
4824 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4827 for(list=0; list<h->list_count; list++){
4830 if(IS_DIR(mb_type, i, list)){
4831 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4832 mx += get_se_golomb(&s->gb);
4833 my += get_se_golomb(&s->gb);
4834 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4836 val= pack16to32(mx,my);
4839 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4845 if(IS_INTER(mb_type))
4846 write_back_motion(h, mb_type);
4848 if(!IS_INTRA16x16(mb_type)){
4849 cbp= get_ue_golomb(&s->gb);
4851 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4855 if(IS_INTRA4x4(mb_type))
4856 cbp= golomb_to_intra4x4_cbp[cbp];
4858 cbp= golomb_to_inter_cbp[cbp];
4862 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4863 if(get_bits1(&s->gb))
4864 mb_type |= MB_TYPE_8x8DCT;
4866 s->current_picture.mb_type[mb_xy]= mb_type;
4868 if(cbp || IS_INTRA16x16(mb_type)){
4869 int i8x8, i4x4, chroma_idx;
4871 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4872 const uint8_t *scan, *scan8x8, *dc_scan;
4874 // fill_non_zero_count_cache(h);
4876 if(IS_INTERLACED(mb_type)){
4877 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4878 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4879 dc_scan= luma_dc_field_scan;
4881 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4882 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4883 dc_scan= luma_dc_zigzag_scan;
4886 dquant= get_se_golomb(&s->gb);
4888 if( dquant > 25 || dquant < -26 ){
4889 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4893 s->qscale += dquant;
4894 if(((unsigned)s->qscale) > 51){
4895 if(s->qscale<0) s->qscale+= 52;
4896 else s->qscale-= 52;
4899 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4900 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4901 if(IS_INTRA16x16(mb_type)){
4902 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4903 return -1; //FIXME continue if partitioned and other return -1 too
4906 assert((cbp&15) == 0 || (cbp&15) == 15);
4909 for(i8x8=0; i8x8<4; i8x8++){
4910 for(i4x4=0; i4x4<4; i4x4++){
4911 const int index= i4x4 + 4*i8x8;
4912 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4918 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4921 for(i8x8=0; i8x8<4; i8x8++){
4922 if(cbp & (1<<i8x8)){
4923 if(IS_8x8DCT(mb_type)){
4924 DCTELEM *buf = &h->mb[64*i8x8];
4926 for(i4x4=0; i4x4<4; i4x4++){
4927 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4928 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4931 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4932 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4934 for(i4x4=0; i4x4<4; i4x4++){
4935 const int index= i4x4 + 4*i8x8;
4937 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4943 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4944 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4950 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4951 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4957 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4958 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4959 for(i4x4=0; i4x4<4; i4x4++){
4960 const int index= 16 + 4*chroma_idx + i4x4;
4961 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4967 uint8_t * const nnz= &h->non_zero_count_cache[0];
4968 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4969 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4972 uint8_t * const nnz= &h->non_zero_count_cache[0];
4973 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4974 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4975 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4977 s->current_picture.qscale_table[mb_xy]= s->qscale;
4978 write_back_non_zero_count(h);
4981 h->ref_count[0] >>= 1;
4982 h->ref_count[1] >>= 1;
4988 static int decode_cabac_field_decoding_flag(H264Context *h) {
4989 MpegEncContext * const s = &h->s;
4990 const int mb_x = s->mb_x;
4991 const int mb_y = s->mb_y & ~1;
4992 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4993 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4995 unsigned int ctx = 0;
4997 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5000 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5004 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5007 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5008 uint8_t *state= &h->cabac_state[ctx_base];
5012 MpegEncContext * const s = &h->s;
5013 const int mba_xy = h->left_mb_xy[0];
5014 const int mbb_xy = h->top_mb_xy;
5016 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5018 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5020 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5021 return 0; /* I4x4 */
5024 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5025 return 0; /* I4x4 */
5028 if( get_cabac_terminate( &h->cabac ) )
5029 return 25; /* PCM */
5031 mb_type = 1; /* I16x16 */
5032 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5033 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5034 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5035 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5036 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5040 static int decode_cabac_mb_type( H264Context *h ) {
5041 MpegEncContext * const s = &h->s;
5043 if( h->slice_type == I_TYPE ) {
5044 return decode_cabac_intra_mb_type(h, 3, 1);
5045 } else if( h->slice_type == P_TYPE ) {
5046 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5048 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5049 /* P_L0_D16x16, P_8x8 */
5050 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5052 /* P_L0_D8x16, P_L0_D16x8 */
5053 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5056 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5058 } else if( h->slice_type == B_TYPE ) {
5059 const int mba_xy = h->left_mb_xy[0];
5060 const int mbb_xy = h->top_mb_xy;
5064 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5066 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5069 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5070 return 0; /* B_Direct_16x16 */
5072 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5073 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5076 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5077 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5078 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5079 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5081 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5082 else if( bits == 13 ) {
5083 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5084 } else if( bits == 14 )
5085 return 11; /* B_L1_L0_8x16 */
5086 else if( bits == 15 )
5087 return 22; /* B_8x8 */
5089 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5090 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5092 /* TODO SI/SP frames? */
5097 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5098 MpegEncContext * const s = &h->s;
5102 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5103 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5106 && h->slice_table[mba_xy] == h->slice_num
5107 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5108 mba_xy += s->mb_stride;
5110 mbb_xy = mb_xy - s->mb_stride;
5112 && h->slice_table[mbb_xy] == h->slice_num
5113 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5114 mbb_xy -= s->mb_stride;
5116 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5118 int mb_xy = mb_x + mb_y*s->mb_stride;
5120 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5123 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5125 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5128 if( h->slice_type == B_TYPE )
5130 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5133 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5136 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5139 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5140 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5141 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5143 if( mode >= pred_mode )
5149 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5150 const int mba_xy = h->left_mb_xy[0];
5151 const int mbb_xy = h->top_mb_xy;
5155 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5156 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5159 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5162 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5165 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5167 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5173 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5174 int cbp_b, cbp_a, ctx, cbp = 0;
5176 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5177 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5179 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5180 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5181 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5182 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5183 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5184 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5185 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5186 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5189 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5193 cbp_a = (h->left_cbp>>4)&0x03;
5194 cbp_b = (h-> top_cbp>>4)&0x03;
5197 if( cbp_a > 0 ) ctx++;
5198 if( cbp_b > 0 ) ctx += 2;
5199 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5203 if( cbp_a == 2 ) ctx++;
5204 if( cbp_b == 2 ) ctx += 2;
5205 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5207 static int decode_cabac_mb_dqp( H264Context *h) {
5211 if( h->last_qscale_diff != 0 )
5214 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5220 if(val > 102) //prevent infinite loop
5227 return -(val + 1)/2;
5229 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5230 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5232 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5234 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5238 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5240 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5241 return 0; /* B_Direct_8x8 */
5242 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5243 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5245 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5246 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5247 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5250 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5251 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5255 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5256 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5259 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5260 int refa = h->ref_cache[list][scan8[n] - 1];
5261 int refb = h->ref_cache[list][scan8[n] - 8];
5265 if( h->slice_type == B_TYPE) {
5266 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5268 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5277 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5283 if(ref >= 32 /*h->ref_list[list]*/){
5284 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5285 return 0; //FIXME we should return -1 and check the return everywhere
5291 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5292 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5293 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5294 int ctxbase = (l == 0) ? 40 : 47;
5299 else if( amvd > 32 )
5304 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5309 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5317 while( get_cabac_bypass( &h->cabac ) ) {
5321 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5326 if( get_cabac_bypass( &h->cabac ) )
5330 return get_cabac_bypass_sign( &h->cabac, -mvd );
5333 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5338 nza = h->left_cbp&0x100;
5339 nzb = h-> top_cbp&0x100;
5340 } else if( cat == 1 || cat == 2 ) {
5341 nza = h->non_zero_count_cache[scan8[idx] - 1];
5342 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5343 } else if( cat == 3 ) {
5344 nza = (h->left_cbp>>(6+idx))&0x01;
5345 nzb = (h-> top_cbp>>(6+idx))&0x01;
5348 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5349 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5358 return ctx + 4 * cat;
5361 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5362 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5363 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5364 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5365 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5368 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5369 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5370 static const int significant_coeff_flag_offset[2][6] = {
5371 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5372 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5374 static const int last_coeff_flag_offset[2][6] = {
5375 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5376 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5378 static const int coeff_abs_level_m1_offset[6] = {
5379 227+0, 227+10, 227+20, 227+30, 227+39, 426
5381 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5382 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5383 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5384 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5385 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5386 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5387 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5388 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5389 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5395 int coeff_count = 0;
5398 int abslevelgt1 = 0;
5400 uint8_t *significant_coeff_ctx_base;
5401 uint8_t *last_coeff_ctx_base;
5402 uint8_t *abs_level_m1_ctx_base;
5405 #define CABAC_ON_STACK
5407 #ifdef CABAC_ON_STACK
5410 cc.range = h->cabac.range;
5411 cc.low = h->cabac.low;
5412 cc.bytestream= h->cabac.bytestream;
5414 #define CC &h->cabac
5418 /* cat: 0-> DC 16x16 n = 0
5419 * 1-> AC 16x16 n = luma4x4idx
5420 * 2-> Luma4x4 n = luma4x4idx
5421 * 3-> DC Chroma n = iCbCr
5422 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5423 * 5-> Luma8x8 n = 4 * luma8x8idx
5426 /* read coded block flag */
5428 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5429 if( cat == 1 || cat == 2 )
5430 h->non_zero_count_cache[scan8[n]] = 0;
5432 h->non_zero_count_cache[scan8[16+n]] = 0;
5433 #ifdef CABAC_ON_STACK
5434 h->cabac.range = cc.range ;
5435 h->cabac.low = cc.low ;
5436 h->cabac.bytestream= cc.bytestream;
5442 significant_coeff_ctx_base = h->cabac_state
5443 + significant_coeff_flag_offset[MB_FIELD][cat];
5444 last_coeff_ctx_base = h->cabac_state
5445 + last_coeff_flag_offset[MB_FIELD][cat];
5446 abs_level_m1_ctx_base = h->cabac_state
5447 + coeff_abs_level_m1_offset[cat];
5450 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5451 for(last= 0; last < coefs; last++) { \
5452 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5453 if( get_cabac( CC, sig_ctx )) { \
5454 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5455 index[coeff_count++] = last; \
5456 if( get_cabac( CC, last_ctx ) ) { \
5462 if( last == max_coeff -1 ) {\
5463 index[coeff_count++] = last;\
5465 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5466 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5467 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5469 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5471 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5473 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5476 assert(coeff_count > 0);
5479 h->cbp_table[mb_xy] |= 0x100;
5480 else if( cat == 1 || cat == 2 )
5481 h->non_zero_count_cache[scan8[n]] = coeff_count;
5483 h->cbp_table[mb_xy] |= 0x40 << n;
5485 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5488 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5491 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5492 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5493 int j= scantable[index[coeff_count]];
5495 if( get_cabac( CC, ctx ) == 0 ) {
5497 block[j] = get_cabac_bypass_sign( CC, -1);
5499 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5505 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5506 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5510 if( coeff_abs >= 15 ) {
5512 while( get_cabac_bypass( CC ) ) {
5518 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5524 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5525 else block[j] = coeff_abs;
5527 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5528 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5534 #ifdef CABAC_ON_STACK
5535 h->cabac.range = cc.range ;
5536 h->cabac.low = cc.low ;
5537 h->cabac.bytestream= cc.bytestream;
5542 static inline void compute_mb_neighbors(H264Context *h)
5544 MpegEncContext * const s = &h->s;
5545 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5546 h->top_mb_xy = mb_xy - s->mb_stride;
5547 h->left_mb_xy[0] = mb_xy - 1;
5549 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5550 const int top_pair_xy = pair_xy - s->mb_stride;
5551 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5552 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5553 const int curr_mb_frame_flag = !MB_FIELD;
5554 const int bottom = (s->mb_y & 1);
5556 ? !curr_mb_frame_flag // bottom macroblock
5557 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5559 h->top_mb_xy -= s->mb_stride;
5561 if (left_mb_frame_flag != curr_mb_frame_flag) {
5562 h->left_mb_xy[0] = pair_xy - 1;
5564 } else if (FIELD_PICTURE) {
5565 h->top_mb_xy -= s->mb_stride;
5571 * decodes a macroblock
5572 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5574 static int decode_mb_cabac(H264Context *h) {
5575 MpegEncContext * const s = &h->s;
5576 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5577 int mb_type, partition_count, cbp = 0;
5578 int dct8x8_allowed= h->pps.transform_8x8_mode;
5580 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5582 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5583 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5585 /* a skipped mb needs the aff flag from the following mb */
5586 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5587 predict_field_decoding_flag(h);
5588 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5589 skip = h->next_mb_skipped;
5591 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5592 /* read skip flags */
5594 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5595 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5596 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5597 if(h->next_mb_skipped)
5598 predict_field_decoding_flag(h);
5600 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5605 h->cbp_table[mb_xy] = 0;
5606 h->chroma_pred_mode_table[mb_xy] = 0;
5607 h->last_qscale_diff = 0;
5614 if( (s->mb_y&1) == 0 )
5616 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5618 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5620 h->prev_mb_skipped = 0;
5622 compute_mb_neighbors(h);
5623 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5624 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5628 if( h->slice_type == B_TYPE ) {
5630 partition_count= b_mb_type_info[mb_type].partition_count;
5631 mb_type= b_mb_type_info[mb_type].type;
5634 goto decode_intra_mb;
5636 } else if( h->slice_type == P_TYPE ) {
5638 partition_count= p_mb_type_info[mb_type].partition_count;
5639 mb_type= p_mb_type_info[mb_type].type;
5642 goto decode_intra_mb;
5645 assert(h->slice_type == I_TYPE);
5647 partition_count = 0;
5648 cbp= i_mb_type_info[mb_type].cbp;
5649 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5650 mb_type= i_mb_type_info[mb_type].type;
5653 mb_type |= MB_TYPE_INTERLACED;
5655 h->slice_table[ mb_xy ]= h->slice_num;
5657 if(IS_INTRA_PCM(mb_type)) {
5661 // We assume these blocks are very rare so we do not optimize it.
5662 // FIXME The two following lines get the bitstream position in the cabac
5663 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5664 ptr= h->cabac.bytestream;
5665 if(h->cabac.low&0x1) ptr--;
5667 if(h->cabac.low&0x1FF) ptr--;
5670 // The pixels are stored in the same order as levels in h->mb array.
5671 for(y=0; y<16; y++){
5672 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5673 for(x=0; x<16; x++){
5674 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5675 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5679 const int index= 256 + 4*(y&3) + 32*(y>>2);
5681 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5682 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5686 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5688 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5689 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5693 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5695 // All blocks are present
5696 h->cbp_table[mb_xy] = 0x1ef;
5697 h->chroma_pred_mode_table[mb_xy] = 0;
5698 // In deblocking, the quantizer is 0
5699 s->current_picture.qscale_table[mb_xy]= 0;
5700 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5701 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5702 // All coeffs are present
5703 memset(h->non_zero_count[mb_xy], 16, 16);
5704 s->current_picture.mb_type[mb_xy]= mb_type;
5709 h->ref_count[0] <<= 1;
5710 h->ref_count[1] <<= 1;
5713 fill_caches(h, mb_type, 0);
5715 if( IS_INTRA( mb_type ) ) {
5717 if( IS_INTRA4x4( mb_type ) ) {
5718 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5719 mb_type |= MB_TYPE_8x8DCT;
5720 for( i = 0; i < 16; i+=4 ) {
5721 int pred = pred_intra_mode( h, i );
5722 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5723 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5726 for( i = 0; i < 16; i++ ) {
5727 int pred = pred_intra_mode( h, i );
5728 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5730 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5733 write_back_intra_pred_mode(h);
5734 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5736 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5737 if( h->intra16x16_pred_mode < 0 ) return -1;
5739 h->chroma_pred_mode_table[mb_xy] =
5740 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5742 pred_mode= check_intra_pred_mode( h, pred_mode );
5743 if( pred_mode < 0 ) return -1;
5744 h->chroma_pred_mode= pred_mode;
5745 } else if( partition_count == 4 ) {
5746 int i, j, sub_partition_count[4], list, ref[2][4];
5748 if( h->slice_type == B_TYPE ) {
5749 for( i = 0; i < 4; i++ ) {
5750 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5751 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5752 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5754 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5755 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5756 pred_direct_motion(h, &mb_type);
5757 h->ref_cache[0][scan8[4]] =
5758 h->ref_cache[1][scan8[4]] =
5759 h->ref_cache[0][scan8[12]] =
5760 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5761 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5762 for( i = 0; i < 4; i++ )
5763 if( IS_DIRECT(h->sub_mb_type[i]) )
5764 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5768 for( i = 0; i < 4; i++ ) {
5769 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5770 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5771 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5775 for( list = 0; list < h->list_count; list++ ) {
5776 for( i = 0; i < 4; i++ ) {
5777 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5778 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5779 if( h->ref_count[list] > 1 )
5780 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5786 h->ref_cache[list][ scan8[4*i]+1 ]=
5787 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5792 dct8x8_allowed = get_dct8x8_allowed(h);
5794 for(list=0; list<h->list_count; list++){
5796 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5797 if(IS_DIRECT(h->sub_mb_type[i])){
5798 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5802 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5803 const int sub_mb_type= h->sub_mb_type[i];
5804 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5805 for(j=0; j<sub_partition_count[i]; j++){
5808 const int index= 4*i + block_width*j;
5809 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5810 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5811 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5813 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5814 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5815 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5817 if(IS_SUB_8X8(sub_mb_type)){
5819 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5821 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5824 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5826 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5827 }else if(IS_SUB_8X4(sub_mb_type)){
5828 mv_cache[ 1 ][0]= mx;
5829 mv_cache[ 1 ][1]= my;
5831 mvd_cache[ 1 ][0]= mx - mpx;
5832 mvd_cache[ 1 ][1]= my - mpy;
5833 }else if(IS_SUB_4X8(sub_mb_type)){
5834 mv_cache[ 8 ][0]= mx;
5835 mv_cache[ 8 ][1]= my;
5837 mvd_cache[ 8 ][0]= mx - mpx;
5838 mvd_cache[ 8 ][1]= my - mpy;
5840 mv_cache[ 0 ][0]= mx;
5841 mv_cache[ 0 ][1]= my;
5843 mvd_cache[ 0 ][0]= mx - mpx;
5844 mvd_cache[ 0 ][1]= my - mpy;
5847 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5848 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5849 p[0] = p[1] = p[8] = p[9] = 0;
5850 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5854 } else if( IS_DIRECT(mb_type) ) {
5855 pred_direct_motion(h, &mb_type);
5856 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5857 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5858 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5860 int list, mx, my, i, mpx, mpy;
5861 if(IS_16X16(mb_type)){
5862 for(list=0; list<h->list_count; list++){
5863 if(IS_DIR(mb_type, 0, list)){
5864 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5865 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5867 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5869 for(list=0; list<h->list_count; list++){
5870 if(IS_DIR(mb_type, 0, list)){
5871 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5873 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5874 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5875 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5877 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5878 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5880 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5883 else if(IS_16X8(mb_type)){
5884 for(list=0; list<h->list_count; list++){
5886 if(IS_DIR(mb_type, i, list)){
5887 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5888 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5890 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5893 for(list=0; list<h->list_count; list++){
5895 if(IS_DIR(mb_type, i, list)){
5896 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5897 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5898 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5899 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5901 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5902 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5904 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5905 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5910 assert(IS_8X16(mb_type));
5911 for(list=0; list<h->list_count; list++){
5913 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5914 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5915 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5917 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5920 for(list=0; list<h->list_count; list++){
5922 if(IS_DIR(mb_type, i, list)){
5923 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5924 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5925 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5927 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5928 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5929 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5931 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5932 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5939 if( IS_INTER( mb_type ) ) {
5940 h->chroma_pred_mode_table[mb_xy] = 0;
5941 write_back_motion( h, mb_type );
5944 if( !IS_INTRA16x16( mb_type ) ) {
5945 cbp = decode_cabac_mb_cbp_luma( h );
5946 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5949 h->cbp_table[mb_xy] = h->cbp = cbp;
5951 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5952 if( decode_cabac_mb_transform_size( h ) )
5953 mb_type |= MB_TYPE_8x8DCT;
5955 s->current_picture.mb_type[mb_xy]= mb_type;
5957 if( cbp || IS_INTRA16x16( mb_type ) ) {
5958 const uint8_t *scan, *scan8x8, *dc_scan;
5959 const uint32_t *qmul;
5962 if(IS_INTERLACED(mb_type)){
5963 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5964 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5965 dc_scan= luma_dc_field_scan;
5967 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5968 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5969 dc_scan= luma_dc_zigzag_scan;
5972 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5973 if( dqp == INT_MIN ){
5974 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5978 if(((unsigned)s->qscale) > 51){
5979 if(s->qscale<0) s->qscale+= 52;
5980 else s->qscale-= 52;
5982 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5983 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5985 if( IS_INTRA16x16( mb_type ) ) {
5987 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5988 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5991 qmul = h->dequant4_coeff[0][s->qscale];
5992 for( i = 0; i < 16; i++ ) {
5993 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5994 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5997 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6001 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6002 if( cbp & (1<<i8x8) ) {
6003 if( IS_8x8DCT(mb_type) ) {
6004 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6005 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6007 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6008 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6009 const int index = 4*i8x8 + i4x4;
6010 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6012 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6013 //STOP_TIMER("decode_residual")
6017 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6018 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6025 for( c = 0; c < 2; c++ ) {
6026 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6027 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6033 for( c = 0; c < 2; c++ ) {
6034 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6035 for( i = 0; i < 4; i++ ) {
6036 const int index = 16 + 4 * c + i;
6037 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6038 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6042 uint8_t * const nnz= &h->non_zero_count_cache[0];
6043 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6044 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6047 uint8_t * const nnz= &h->non_zero_count_cache[0];
6048 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6049 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6050 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6051 h->last_qscale_diff = 0;
6054 s->current_picture.qscale_table[mb_xy]= s->qscale;
6055 write_back_non_zero_count(h);
6058 h->ref_count[0] >>= 1;
6059 h->ref_count[1] >>= 1;
6066 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6068 const int index_a = qp + h->slice_alpha_c0_offset;
6069 const int alpha = (alpha_table+52)[index_a];
6070 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6075 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6076 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6078 /* 16px edge length, because bS=4 is triggered by being at
6079 * the edge of an intra MB, so all 4 bS are the same */
6080 for( d = 0; d < 16; d++ ) {
6081 const int p0 = pix[-1];
6082 const int p1 = pix[-2];
6083 const int p2 = pix[-3];
6085 const int q0 = pix[0];
6086 const int q1 = pix[1];
6087 const int q2 = pix[2];
6089 if( FFABS( p0 - q0 ) < alpha &&
6090 FFABS( p1 - p0 ) < beta &&
6091 FFABS( q1 - q0 ) < beta ) {
6093 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6094 if( FFABS( p2 - p0 ) < beta)
6096 const int p3 = pix[-4];
6098 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6099 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6100 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6103 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6105 if( FFABS( q2 - q0 ) < beta)
6107 const int q3 = pix[3];
6109 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6110 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6111 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6114 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6118 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6119 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6121 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6127 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6129 const int index_a = qp + h->slice_alpha_c0_offset;
6130 const int alpha = (alpha_table+52)[index_a];
6131 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6136 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6137 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6139 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6143 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6145 for( i = 0; i < 16; i++, pix += stride) {
6151 int bS_index = (i >> 1);
6154 bS_index |= (i & 1);
6157 if( bS[bS_index] == 0 ) {
6161 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6162 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6163 alpha = (alpha_table+52)[index_a];
6164 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6166 if( bS[bS_index] < 4 ) {
6167 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6168 const int p0 = pix[-1];
6169 const int p1 = pix[-2];
6170 const int p2 = pix[-3];
6171 const int q0 = pix[0];
6172 const int q1 = pix[1];
6173 const int q2 = pix[2];
6175 if( FFABS( p0 - q0 ) < alpha &&
6176 FFABS( p1 - p0 ) < beta &&
6177 FFABS( q1 - q0 ) < beta ) {
6181 if( FFABS( p2 - p0 ) < beta ) {
6182 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6185 if( FFABS( q2 - q0 ) < beta ) {
6186 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6190 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6191 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6192 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6193 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6196 const int p0 = pix[-1];
6197 const int p1 = pix[-2];
6198 const int p2 = pix[-3];
6200 const int q0 = pix[0];
6201 const int q1 = pix[1];
6202 const int q2 = pix[2];
6204 if( FFABS( p0 - q0 ) < alpha &&
6205 FFABS( p1 - p0 ) < beta &&
6206 FFABS( q1 - q0 ) < beta ) {
6208 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6209 if( FFABS( p2 - p0 ) < beta)
6211 const int p3 = pix[-4];
6213 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6214 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6215 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6218 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6220 if( FFABS( q2 - q0 ) < beta)
6222 const int q3 = pix[3];
6224 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6225 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6226 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6229 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6233 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6234 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6236 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6241 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6243 for( i = 0; i < 8; i++, pix += stride) {
6251 if( bS[bS_index] == 0 ) {
6255 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6256 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6257 alpha = (alpha_table+52)[index_a];
6258 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6260 if( bS[bS_index] < 4 ) {
6261 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6262 const int p0 = pix[-1];
6263 const int p1 = pix[-2];
6264 const int q0 = pix[0];
6265 const int q1 = pix[1];
6267 if( FFABS( p0 - q0 ) < alpha &&
6268 FFABS( p1 - p0 ) < beta &&
6269 FFABS( q1 - q0 ) < beta ) {
6270 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6272 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6273 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6274 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6277 const int p0 = pix[-1];
6278 const int p1 = pix[-2];
6279 const int q0 = pix[0];
6280 const int q1 = pix[1];
6282 if( FFABS( p0 - q0 ) < alpha &&
6283 FFABS( p1 - p0 ) < beta &&
6284 FFABS( q1 - q0 ) < beta ) {
6286 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6287 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6288 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6294 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6296 const int index_a = qp + h->slice_alpha_c0_offset;
6297 const int alpha = (alpha_table+52)[index_a];
6298 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6299 const int pix_next = stride;
6304 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6305 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6307 /* 16px edge length, see filter_mb_edgev */
6308 for( d = 0; d < 16; d++ ) {
6309 const int p0 = pix[-1*pix_next];
6310 const int p1 = pix[-2*pix_next];
6311 const int p2 = pix[-3*pix_next];
6312 const int q0 = pix[0];
6313 const int q1 = pix[1*pix_next];
6314 const int q2 = pix[2*pix_next];
6316 if( FFABS( p0 - q0 ) < alpha &&
6317 FFABS( p1 - p0 ) < beta &&
6318 FFABS( q1 - q0 ) < beta ) {
6320 const int p3 = pix[-4*pix_next];
6321 const int q3 = pix[ 3*pix_next];
6323 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6324 if( FFABS( p2 - p0 ) < beta) {
6326 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6327 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6328 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6331 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6333 if( FFABS( q2 - q0 ) < beta) {
6335 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6336 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6337 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6340 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6344 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6345 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6347 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6354 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6356 const int index_a = qp + h->slice_alpha_c0_offset;
6357 const int alpha = (alpha_table+52)[index_a];
6358 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6363 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6364 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6366 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6370 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6371 MpegEncContext * const s = &h->s;
6373 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6375 mb_xy = mb_x + mb_y*s->mb_stride;
6377 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6378 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6379 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6380 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6383 assert(!FRAME_MBAFF);
6385 mb_type = s->current_picture.mb_type[mb_xy];
6386 qp = s->current_picture.qscale_table[mb_xy];
6387 qp0 = s->current_picture.qscale_table[mb_xy-1];
6388 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6389 qpc = get_chroma_qp( h, 0, qp );
6390 qpc0 = get_chroma_qp( h, 0, qp0 );
6391 qpc1 = get_chroma_qp( h, 0, qp1 );
6392 qp0 = (qp + qp0 + 1) >> 1;
6393 qp1 = (qp + qp1 + 1) >> 1;
6394 qpc0 = (qpc + qpc0 + 1) >> 1;
6395 qpc1 = (qpc + qpc1 + 1) >> 1;
6396 qp_thresh = 15 - h->slice_alpha_c0_offset;
6397 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6398 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6401 if( IS_INTRA(mb_type) ) {
6402 int16_t bS4[4] = {4,4,4,4};
6403 int16_t bS3[4] = {3,3,3,3};
6404 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6405 if( IS_8x8DCT(mb_type) ) {
6406 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6407 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6408 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6409 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6411 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6412 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6413 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6414 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6415 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6416 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6417 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6418 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6420 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6421 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6422 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6423 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6424 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6425 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6426 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6427 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6430 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6431 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6433 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6435 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6437 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6438 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6439 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6440 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6442 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6443 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6444 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6445 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6447 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6448 bSv[0][0] = 0x0004000400040004ULL;
6449 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6450 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6452 #define FILTER(hv,dir,edge)\
6453 if(bSv[dir][edge]) {\
6454 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6456 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6457 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6463 } else if( IS_8x8DCT(mb_type) ) {
6482 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6483 MpegEncContext * const s = &h->s;
6484 const int mb_xy= mb_x + mb_y*s->mb_stride;
6485 const int mb_type = s->current_picture.mb_type[mb_xy];
6486 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6487 int first_vertical_edge_done = 0;
6489 /* FIXME: A given frame may occupy more than one position in
6490 * the reference list. So ref2frm should be populated with
6491 * frame numbers, not indices. */
6492 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6493 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6495 //for sufficiently low qp, filtering wouldn't do anything
6496 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6498 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6499 int qp = s->current_picture.qscale_table[mb_xy];
6501 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6502 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6508 // left mb is in picture
6509 && h->slice_table[mb_xy-1] != 255
6510 // and current and left pair do not have the same interlaced type
6511 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6512 // and left mb is in the same slice if deblocking_filter == 2
6513 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6514 /* First vertical edge is different in MBAFF frames
6515 * There are 8 different bS to compute and 2 different Qp
6517 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6518 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6523 int mb_qp, mbn0_qp, mbn1_qp;
6525 first_vertical_edge_done = 1;
6527 if( IS_INTRA(mb_type) )
6528 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6530 for( i = 0; i < 8; i++ ) {
6531 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6533 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6535 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6536 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6537 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6544 mb_qp = s->current_picture.qscale_table[mb_xy];
6545 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6546 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6547 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6548 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6549 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6550 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6551 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6552 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6553 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6554 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6555 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6556 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6559 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6560 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6561 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6562 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6563 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6565 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6566 for( dir = 0; dir < 2; dir++ )
6569 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6570 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6571 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6573 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6574 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6575 // how often to recheck mv-based bS when iterating between edges
6576 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6577 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6578 // how often to recheck mv-based bS when iterating along each edge
6579 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6581 if (first_vertical_edge_done) {
6583 first_vertical_edge_done = 0;
6586 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6589 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6590 && !IS_INTERLACED(mb_type)
6591 && IS_INTERLACED(mbm_type)
6593 // This is a special case in the norm where the filtering must
6594 // be done twice (one each of the field) even if we are in a
6595 // frame macroblock.
6597 static const int nnz_idx[4] = {4,5,6,3};
6598 unsigned int tmp_linesize = 2 * linesize;
6599 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6600 int mbn_xy = mb_xy - 2 * s->mb_stride;
6605 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6606 if( IS_INTRA(mb_type) ||
6607 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6608 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6610 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6611 for( i = 0; i < 4; i++ ) {
6612 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6613 mbn_nnz[nnz_idx[i]] != 0 )
6619 // Do not use s->qscale as luma quantizer because it has not the same
6620 // value in IPCM macroblocks.
6621 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6622 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6623 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6624 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6625 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6626 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6627 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6628 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6635 for( edge = start; edge < edges; edge++ ) {
6636 /* mbn_xy: neighbor macroblock */
6637 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6638 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6642 if( (edge&1) && IS_8x8DCT(mb_type) )
6645 if( IS_INTRA(mb_type) ||
6646 IS_INTRA(mbn_type) ) {
6649 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6650 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6659 bS[0] = bS[1] = bS[2] = bS[3] = value;
6664 if( edge & mask_edge ) {
6665 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6668 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6669 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6672 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6673 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6674 int bn_idx= b_idx - (dir ? 8:1);
6676 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6677 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6678 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6679 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6681 bS[0] = bS[1] = bS[2] = bS[3] = v;
6687 for( i = 0; i < 4; i++ ) {
6688 int x = dir == 0 ? edge : i;
6689 int y = dir == 0 ? i : edge;
6690 int b_idx= 8 + 4 + x + 8*y;
6691 int bn_idx= b_idx - (dir ? 8:1);
6693 if( h->non_zero_count_cache[b_idx] != 0 ||
6694 h->non_zero_count_cache[bn_idx] != 0 ) {
6700 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6701 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6702 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6703 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6711 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6716 // Do not use s->qscale as luma quantizer because it has not the same
6717 // value in IPCM macroblocks.
6718 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6719 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6720 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6721 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6723 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6724 if( (edge&1) == 0 ) {
6725 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6726 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6727 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6728 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6731 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6732 if( (edge&1) == 0 ) {
6733 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6734 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6735 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6736 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6743 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6744 MpegEncContext * const s = &h->s;
6745 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6749 if( h->pps.cabac ) {
6753 align_get_bits( &s->gb );
6756 ff_init_cabac_states( &h->cabac);
6757 ff_init_cabac_decoder( &h->cabac,
6758 s->gb.buffer + get_bits_count(&s->gb)/8,
6759 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6760 /* calculate pre-state */
6761 for( i= 0; i < 460; i++ ) {
6763 if( h->slice_type == I_TYPE )
6764 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6766 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6769 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6771 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6776 int ret = decode_mb_cabac(h);
6778 //STOP_TIMER("decode_mb_cabac")
6780 if(ret>=0) hl_decode_mb(h);
6782 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6785 if(ret>=0) ret = decode_mb_cabac(h);
6787 if(ret>=0) hl_decode_mb(h);
6790 eos = get_cabac_terminate( &h->cabac );
6792 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6793 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6794 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6798 if( ++s->mb_x >= s->mb_width ) {
6800 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6802 if(FIELD_OR_MBAFF_PICTURE) {
6807 if( eos || s->mb_y >= s->mb_height ) {
6808 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6809 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6816 int ret = decode_mb_cavlc(h);
6818 if(ret>=0) hl_decode_mb(h);
6820 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6822 ret = decode_mb_cavlc(h);
6824 if(ret>=0) hl_decode_mb(h);
6829 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6830 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6835 if(++s->mb_x >= s->mb_width){
6837 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6839 if(FIELD_OR_MBAFF_PICTURE) {
6842 if(s->mb_y >= s->mb_height){
6843 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6845 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6846 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6850 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6857 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6858 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6859 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6860 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6864 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6873 for(;s->mb_y < s->mb_height; s->mb_y++){
6874 for(;s->mb_x < s->mb_width; s->mb_x++){
6875 int ret= decode_mb(h);
6880 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6881 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6886 if(++s->mb_x >= s->mb_width){
6888 if(++s->mb_y >= s->mb_height){
6889 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6890 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6894 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6901 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6902 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6903 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6907 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6914 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6917 return -1; //not reached
6920 static int decode_unregistered_user_data(H264Context *h, int size){
6921 MpegEncContext * const s = &h->s;
6922 uint8_t user_data[16+256];
6928 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6929 user_data[i]= get_bits(&s->gb, 8);
6933 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6934 if(e==1 && build>=0)
6935 h->x264_build= build;
6937 if(s->avctx->debug & FF_DEBUG_BUGS)
6938 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6941 skip_bits(&s->gb, 8);
6946 static int decode_sei(H264Context *h){
6947 MpegEncContext * const s = &h->s;
6949 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6954 type+= show_bits(&s->gb, 8);
6955 }while(get_bits(&s->gb, 8) == 255);
6959 size+= show_bits(&s->gb, 8);
6960 }while(get_bits(&s->gb, 8) == 255);
6964 if(decode_unregistered_user_data(h, size) < 0)
6968 skip_bits(&s->gb, 8*size);
6971 //FIXME check bits here
6972 align_get_bits(&s->gb);
6978 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6979 MpegEncContext * const s = &h->s;
6981 cpb_count = get_ue_golomb(&s->gb) + 1;
6982 get_bits(&s->gb, 4); /* bit_rate_scale */
6983 get_bits(&s->gb, 4); /* cpb_size_scale */
6984 for(i=0; i<cpb_count; i++){
6985 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6986 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6987 get_bits1(&s->gb); /* cbr_flag */
6989 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6990 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6991 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6992 get_bits(&s->gb, 5); /* time_offset_length */
6995 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6996 MpegEncContext * const s = &h->s;
6997 int aspect_ratio_info_present_flag;
6998 unsigned int aspect_ratio_idc;
6999 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7001 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7003 if( aspect_ratio_info_present_flag ) {
7004 aspect_ratio_idc= get_bits(&s->gb, 8);
7005 if( aspect_ratio_idc == EXTENDED_SAR ) {
7006 sps->sar.num= get_bits(&s->gb, 16);
7007 sps->sar.den= get_bits(&s->gb, 16);
7008 }else if(aspect_ratio_idc < 14){
7009 sps->sar= pixel_aspect[aspect_ratio_idc];
7011 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7018 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7020 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7021 get_bits1(&s->gb); /* overscan_appropriate_flag */
7024 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7025 get_bits(&s->gb, 3); /* video_format */
7026 get_bits1(&s->gb); /* video_full_range_flag */
7027 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7028 get_bits(&s->gb, 8); /* colour_primaries */
7029 get_bits(&s->gb, 8); /* transfer_characteristics */
7030 get_bits(&s->gb, 8); /* matrix_coefficients */
7034 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7035 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7036 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7039 sps->timing_info_present_flag = get_bits1(&s->gb);
7040 if(sps->timing_info_present_flag){
7041 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7042 sps->time_scale = get_bits_long(&s->gb, 32);
7043 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7046 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7047 if(nal_hrd_parameters_present_flag)
7048 decode_hrd_parameters(h, sps);
7049 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7050 if(vcl_hrd_parameters_present_flag)
7051 decode_hrd_parameters(h, sps);
7052 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7053 get_bits1(&s->gb); /* low_delay_hrd_flag */
7054 get_bits1(&s->gb); /* pic_struct_present_flag */
7056 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7057 if(sps->bitstream_restriction_flag){
7058 unsigned int num_reorder_frames;
7059 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7060 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7061 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7062 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7063 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7064 num_reorder_frames= get_ue_golomb(&s->gb);
7065 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7067 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7068 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7072 sps->num_reorder_frames= num_reorder_frames;
7078 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7079 const uint8_t *jvt_list, const uint8_t *fallback_list){
7080 MpegEncContext * const s = &h->s;
7081 int i, last = 8, next = 8;
7082 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7083 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7084 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7086 for(i=0;i<size;i++){
7088 next = (last + get_se_golomb(&s->gb)) & 0xff;
7089 if(!i && !next){ /* matrix not written, we use the preset one */
7090 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7093 last = factors[scan[i]] = next ? next : last;
7097 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7098 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7099 MpegEncContext * const s = &h->s;
7100 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7101 const uint8_t *fallback[4] = {
7102 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7103 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7104 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7105 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7107 if(get_bits1(&s->gb)){
7108 sps->scaling_matrix_present |= is_sps;
7109 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7110 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7111 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7112 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7113 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7114 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7115 if(is_sps || pps->transform_8x8_mode){
7116 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7117 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7119 } else if(fallback_sps) {
7120 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7121 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7126 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7129 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7130 const size_t size, const char *name)
7133 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7138 vec[id] = av_mallocz(size);
7140 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7145 static inline int decode_seq_parameter_set(H264Context *h){
7146 MpegEncContext * const s = &h->s;
7147 int profile_idc, level_idc;
7148 unsigned int sps_id, tmp, mb_width, mb_height;
7152 profile_idc= get_bits(&s->gb, 8);
7153 get_bits1(&s->gb); //constraint_set0_flag
7154 get_bits1(&s->gb); //constraint_set1_flag
7155 get_bits1(&s->gb); //constraint_set2_flag
7156 get_bits1(&s->gb); //constraint_set3_flag
7157 get_bits(&s->gb, 4); // reserved
7158 level_idc= get_bits(&s->gb, 8);
7159 sps_id= get_ue_golomb(&s->gb);
7161 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7165 sps->profile_idc= profile_idc;
7166 sps->level_idc= level_idc;
7168 if(sps->profile_idc >= 100){ //high profile
7169 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7170 get_bits1(&s->gb); //residual_color_transform_flag
7171 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7172 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7173 sps->transform_bypass = get_bits1(&s->gb);
7174 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7176 sps->scaling_matrix_present = 0;
7178 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7179 sps->poc_type= get_ue_golomb(&s->gb);
7181 if(sps->poc_type == 0){ //FIXME #define
7182 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7183 } else if(sps->poc_type == 1){//FIXME #define
7184 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7185 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7186 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7187 tmp= get_ue_golomb(&s->gb);
7189 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7190 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7193 sps->poc_cycle_length= tmp;
7195 for(i=0; i<sps->poc_cycle_length; i++)
7196 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7197 }else if(sps->poc_type != 2){
7198 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7202 tmp= get_ue_golomb(&s->gb);
7203 if(tmp > MAX_PICTURE_COUNT-2){
7204 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7206 sps->ref_frame_count= tmp;
7207 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7208 mb_width= get_ue_golomb(&s->gb) + 1;
7209 mb_height= get_ue_golomb(&s->gb) + 1;
7210 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7211 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7212 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7215 sps->mb_width = mb_width;
7216 sps->mb_height= mb_height;
7218 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7219 if(!sps->frame_mbs_only_flag)
7220 sps->mb_aff= get_bits1(&s->gb);
7224 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7226 #ifndef ALLOW_INTERLACE
7228 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7230 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7231 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7233 sps->crop= get_bits1(&s->gb);
7235 sps->crop_left = get_ue_golomb(&s->gb);
7236 sps->crop_right = get_ue_golomb(&s->gb);
7237 sps->crop_top = get_ue_golomb(&s->gb);
7238 sps->crop_bottom= get_ue_golomb(&s->gb);
7239 if(sps->crop_left || sps->crop_top){
7240 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7246 sps->crop_bottom= 0;
7249 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7250 if( sps->vui_parameters_present_flag )
7251 decode_vui_parameters(h, sps);
7253 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7254 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7255 sps_id, sps->profile_idc, sps->level_idc,
7257 sps->ref_frame_count,
7258 sps->mb_width, sps->mb_height,
7259 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7260 sps->direct_8x8_inference_flag ? "8B8" : "",
7261 sps->crop_left, sps->crop_right,
7262 sps->crop_top, sps->crop_bottom,
7263 sps->vui_parameters_present_flag ? "VUI" : ""
7270 build_qp_table(PPS *pps, int t, int index)
7273 for(i = 0; i < 255; i++)
7274 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7277 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7278 MpegEncContext * const s = &h->s;
7279 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7282 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7286 tmp= get_ue_golomb(&s->gb);
7287 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7288 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7293 pps->cabac= get_bits1(&s->gb);
7294 pps->pic_order_present= get_bits1(&s->gb);
7295 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7296 if(pps->slice_group_count > 1 ){
7297 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7298 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7299 switch(pps->mb_slice_group_map_type){
7302 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7303 | run_length[ i ] |1 |ue(v) |
7308 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7310 | top_left_mb[ i ] |1 |ue(v) |
7311 | bottom_right_mb[ i ] |1 |ue(v) |
7319 | slice_group_change_direction_flag |1 |u(1) |
7320 | slice_group_change_rate_minus1 |1 |ue(v) |
7325 | slice_group_id_cnt_minus1 |1 |ue(v) |
7326 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7328 | slice_group_id[ i ] |1 |u(v) |
7333 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7334 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7335 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7336 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7337 pps->ref_count[0]= pps->ref_count[1]= 1;
7341 pps->weighted_pred= get_bits1(&s->gb);
7342 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7343 pps->init_qp= get_se_golomb(&s->gb) + 26;
7344 pps->init_qs= get_se_golomb(&s->gb) + 26;
7345 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7346 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7347 pps->constrained_intra_pred= get_bits1(&s->gb);
7348 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7350 pps->transform_8x8_mode= 0;
7351 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7352 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7353 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7355 if(get_bits_count(&s->gb) < bit_length){
7356 pps->transform_8x8_mode= get_bits1(&s->gb);
7357 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7358 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7360 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7363 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7364 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7365 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7366 h->pps.chroma_qp_diff= 1;
7368 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7370 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7371 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7372 pps_id, pps->sps_id,
7373 pps->cabac ? "CABAC" : "CAVLC",
7374 pps->slice_group_count,
7375 pps->ref_count[0], pps->ref_count[1],
7376 pps->weighted_pred ? "weighted" : "",
7377 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7378 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7379 pps->constrained_intra_pred ? "CONSTR" : "",
7380 pps->redundant_pic_cnt_present ? "REDU" : "",
7381 pps->transform_8x8_mode ? "8x8DCT" : ""
7389 * Call decode_slice() for each context.
7391 * @param h h264 master context
7392 * @param context_count number of contexts to execute
7394 static void execute_decode_slices(H264Context *h, int context_count){
7395 MpegEncContext * const s = &h->s;
7396 AVCodecContext * const avctx= s->avctx;
7400 if(context_count == 1) {
7401 decode_slice(avctx, h);
7403 for(i = 1; i < context_count; i++) {
7404 hx = h->thread_context[i];
7405 hx->s.error_resilience = avctx->error_resilience;
7406 hx->s.error_count = 0;
7409 avctx->execute(avctx, (void *)decode_slice,
7410 (void **)h->thread_context, NULL, context_count);
7412 /* pull back stuff from slices to master context */
7413 hx = h->thread_context[context_count - 1];
7414 s->mb_x = hx->s.mb_x;
7415 s->mb_y = hx->s.mb_y;
7416 s->dropable = hx->s.dropable;
7417 s->picture_structure = hx->s.picture_structure;
7418 for(i = 1; i < context_count; i++)
7419 h->s.error_count += h->thread_context[i]->s.error_count;
7424 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7425 MpegEncContext * const s = &h->s;
7426 AVCodecContext * const avctx= s->avctx;
7428 H264Context *hx; ///< thread context
7429 int context_count = 0;
7431 h->max_contexts = avctx->thread_count;
7434 for(i=0; i<50; i++){
7435 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7438 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7439 h->current_slice = 0;
7440 if (!s->first_field)
7441 s->current_picture_ptr= NULL;
7453 if(buf_index >= buf_size) break;
7455 for(i = 0; i < h->nal_length_size; i++)
7456 nalsize = (nalsize << 8) | buf[buf_index++];
7457 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7462 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7467 // start code prefix search
7468 for(; buf_index + 3 < buf_size; buf_index++){
7469 // This should always succeed in the first iteration.
7470 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7474 if(buf_index+3 >= buf_size) break;
7479 hx = h->thread_context[context_count];
7481 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7482 if (ptr==NULL || dst_length < 0){
7485 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7487 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7489 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7490 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7493 if (h->is_avc && (nalsize != consumed))
7494 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7496 buf_index += consumed;
7498 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7499 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7504 switch(hx->nal_unit_type){
7506 if (h->nal_unit_type != NAL_IDR_SLICE) {
7507 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7510 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7512 init_get_bits(&hx->s.gb, ptr, bit_length);
7514 hx->inter_gb_ptr= &hx->s.gb;
7515 hx->s.data_partitioning = 0;
7517 if((err = decode_slice_header(hx, h)))
7520 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7521 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7522 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7523 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7524 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7525 && avctx->skip_frame < AVDISCARD_ALL)
7529 init_get_bits(&hx->s.gb, ptr, bit_length);
7531 hx->inter_gb_ptr= NULL;
7532 hx->s.data_partitioning = 1;
7534 err = decode_slice_header(hx, h);
7537 init_get_bits(&hx->intra_gb, ptr, bit_length);
7538 hx->intra_gb_ptr= &hx->intra_gb;
7541 init_get_bits(&hx->inter_gb, ptr, bit_length);
7542 hx->inter_gb_ptr= &hx->inter_gb;
7544 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7545 && s->context_initialized
7547 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7548 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7549 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7550 && avctx->skip_frame < AVDISCARD_ALL)
7554 init_get_bits(&s->gb, ptr, bit_length);
7558 init_get_bits(&s->gb, ptr, bit_length);
7559 decode_seq_parameter_set(h);
7561 if(s->flags& CODEC_FLAG_LOW_DELAY)
7564 if(avctx->has_b_frames < 2)
7565 avctx->has_b_frames= !s->low_delay;
7568 init_get_bits(&s->gb, ptr, bit_length);
7570 decode_picture_parameter_set(h, bit_length);
7574 case NAL_END_SEQUENCE:
7575 case NAL_END_STREAM:
7576 case NAL_FILLER_DATA:
7578 case NAL_AUXILIARY_SLICE:
7581 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7584 if(context_count == h->max_contexts) {
7585 execute_decode_slices(h, context_count);
7590 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7592 /* Slice could not be decoded in parallel mode, copy down
7593 * NAL unit stuff to context 0 and restart. Note that
7594 * rbsp_buffer is not transfered, but since we no longer
7595 * run in parallel mode this should not be an issue. */
7596 h->nal_unit_type = hx->nal_unit_type;
7597 h->nal_ref_idc = hx->nal_ref_idc;
7603 execute_decode_slices(h, context_count);
7608 * returns the number of bytes consumed for building the current frame
7610 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7611 if(s->flags&CODEC_FLAG_TRUNCATED){
7612 pos -= s->parse_context.last_index;
7613 if(pos<0) pos=0; // FIXME remove (unneeded?)
7617 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7618 if(pos+10>buf_size) pos=buf_size; // oops ;)
7624 static int decode_frame(AVCodecContext *avctx,
7625 void *data, int *data_size,
7626 uint8_t *buf, int buf_size)
7628 H264Context *h = avctx->priv_data;
7629 MpegEncContext *s = &h->s;
7630 AVFrame *pict = data;
7633 s->flags= avctx->flags;
7634 s->flags2= avctx->flags2;
7636 /* no supplementary picture */
7637 if (buf_size == 0) {
7641 //FIXME factorize this with the output code below
7642 out = h->delayed_pic[0];
7644 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7645 if(h->delayed_pic[i]->poc < out->poc){
7646 out = h->delayed_pic[i];
7650 for(i=out_idx; h->delayed_pic[i]; i++)
7651 h->delayed_pic[i] = h->delayed_pic[i+1];
7654 *data_size = sizeof(AVFrame);
7655 *pict= *(AVFrame*)out;
7661 if(s->flags&CODEC_FLAG_TRUNCATED){
7662 int next= ff_h264_find_frame_end(h, buf, buf_size);
7664 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7666 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7669 if(h->is_avc && !h->got_avcC) {
7670 int i, cnt, nalsize;
7671 unsigned char *p = avctx->extradata;
7672 if(avctx->extradata_size < 7) {
7673 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7677 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7680 /* sps and pps in the avcC always have length coded with 2 bytes,
7681 so put a fake nal_length_size = 2 while parsing them */
7682 h->nal_length_size = 2;
7683 // Decode sps from avcC
7684 cnt = *(p+5) & 0x1f; // Number of sps
7686 for (i = 0; i < cnt; i++) {
7687 nalsize = AV_RB16(p) + 2;
7688 if(decode_nal_units(h, p, nalsize) < 0) {
7689 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7694 // Decode pps from avcC
7695 cnt = *(p++); // Number of pps
7696 for (i = 0; i < cnt; i++) {
7697 nalsize = AV_RB16(p) + 2;
7698 if(decode_nal_units(h, p, nalsize) != nalsize) {
7699 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7704 // Now store right nal length size, that will be use to parse all other nals
7705 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7706 // Do not reparse avcC
7710 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7711 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7715 buf_index=decode_nal_units(h, buf, buf_size);
7719 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7720 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7721 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7725 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7726 Picture *out = s->current_picture_ptr;
7727 Picture *cur = s->current_picture_ptr;
7728 Picture *prev = h->delayed_output_pic;
7729 int i, pics, cross_idr, out_of_order, out_idx;
7733 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7734 s->current_picture_ptr->pict_type= s->pict_type;
7736 h->prev_frame_num_offset= h->frame_num_offset;
7737 h->prev_frame_num= h->frame_num;
7739 h->prev_poc_msb= h->poc_msb;
7740 h->prev_poc_lsb= h->poc_lsb;
7741 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7745 * FIXME: Error handling code does not seem to support interlaced
7746 * when slices span multiple rows
7747 * The ff_er_add_slice calls don't work right for bottom
7748 * fields; they cause massive erroneous error concealing
7749 * Error marking covers both fields (top and bottom).
7750 * This causes a mismatched s->error_count
7751 * and a bad error table. Further, the error count goes to
7752 * INT_MAX when called for bottom field, because mb_y is
7753 * past end by one (callers fault) and resync_mb_y != 0
7754 * causes problems for the first MB line, too.
7761 if (s->first_field) {
7762 /* Wait for second field. */
7766 //FIXME do something with unavailable reference frames
7768 #if 0 //decode order
7769 *data_size = sizeof(AVFrame);
7771 /* Sort B-frames into display order */
7773 if(h->sps.bitstream_restriction_flag
7774 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7775 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7780 while(h->delayed_pic[pics]) pics++;
7782 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7784 h->delayed_pic[pics++] = cur;
7785 if(cur->reference == 0)
7786 cur->reference = DELAYED_PIC_REF;
7789 for(i=0; h->delayed_pic[i]; i++)
7790 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7793 out = h->delayed_pic[0];
7795 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7796 if(h->delayed_pic[i]->poc < out->poc){
7797 out = h->delayed_pic[i];
7801 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7802 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7804 else if(prev && pics <= s->avctx->has_b_frames)
7806 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7808 ((!cross_idr && prev && out->poc > prev->poc + 2)
7809 || cur->pict_type == B_TYPE)))
7812 s->avctx->has_b_frames++;
7815 else if(out_of_order)
7818 if(out_of_order || pics > s->avctx->has_b_frames){
7819 for(i=out_idx; h->delayed_pic[i]; i++)
7820 h->delayed_pic[i] = h->delayed_pic[i+1];
7826 *data_size = sizeof(AVFrame);
7827 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7828 prev->reference = 0;
7829 h->delayed_output_pic = out;
7833 *pict= *(AVFrame*)out;
7835 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7839 assert(pict->data[0] || !*data_size);
7840 ff_print_debug_info(s, pict);
7841 //printf("out %d\n", (int)pict->data[0]);
7844 /* Return the Picture timestamp as the frame number */
7845 /* we substract 1 because it is added on utils.c */
7846 avctx->frame_number = s->picture_number - 1;
7848 pict->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7849 return get_consumed_bytes(s, buf_index, buf_size);
7852 static inline void fill_mb_avail(H264Context *h){
7853 MpegEncContext * const s = &h->s;
7854 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7857 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7858 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7859 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7865 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7866 h->mb_avail[4]= 1; //FIXME move out
7867 h->mb_avail[5]= 0; //FIXME move out
7874 #define SIZE (COUNT*40)
7880 // int int_temp[10000];
7882 AVCodecContext avctx;
7884 dsputil_init(&dsp, &avctx);
7886 init_put_bits(&pb, temp, SIZE);
7887 printf("testing unsigned exp golomb\n");
7888 for(i=0; i<COUNT; i++){
7890 set_ue_golomb(&pb, i);
7891 STOP_TIMER("set_ue_golomb");
7893 flush_put_bits(&pb);
7895 init_get_bits(&gb, temp, 8*SIZE);
7896 for(i=0; i<COUNT; i++){
7899 s= show_bits(&gb, 24);
7902 j= get_ue_golomb(&gb);
7904 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7907 STOP_TIMER("get_ue_golomb");
7911 init_put_bits(&pb, temp, SIZE);
7912 printf("testing signed exp golomb\n");
7913 for(i=0; i<COUNT; i++){
7915 set_se_golomb(&pb, i - COUNT/2);
7916 STOP_TIMER("set_se_golomb");
7918 flush_put_bits(&pb);
7920 init_get_bits(&gb, temp, 8*SIZE);
7921 for(i=0; i<COUNT; i++){
7924 s= show_bits(&gb, 24);
7927 j= get_se_golomb(&gb);
7928 if(j != i - COUNT/2){
7929 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7932 STOP_TIMER("get_se_golomb");
7935 printf("testing 4x4 (I)DCT\n");
7938 uint8_t src[16], ref[16];
7939 uint64_t error= 0, max_error=0;
7941 for(i=0; i<COUNT; i++){
7943 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7944 for(j=0; j<16; j++){
7945 ref[j]= random()%255;
7946 src[j]= random()%255;
7949 h264_diff_dct_c(block, src, ref, 4);
7952 for(j=0; j<16; j++){
7953 // printf("%d ", block[j]);
7954 block[j]= block[j]*4;
7955 if(j&1) block[j]= (block[j]*4 + 2)/5;
7956 if(j&4) block[j]= (block[j]*4 + 2)/5;
7960 s->dsp.h264_idct_add(ref, block, 4);
7961 /* for(j=0; j<16; j++){
7962 printf("%d ", ref[j]);
7966 for(j=0; j<16; j++){
7967 int diff= FFABS(src[j] - ref[j]);
7970 max_error= FFMAX(max_error, diff);
7973 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7975 printf("testing quantizer\n");
7976 for(qp=0; qp<52; qp++){
7978 src1_block[i]= src2_block[i]= random()%255;
7982 printf("Testing NAL layer\n");
7984 uint8_t bitstream[COUNT];
7985 uint8_t nal[COUNT*2];
7987 memset(&h, 0, sizeof(H264Context));
7989 for(i=0; i<COUNT; i++){
7997 for(j=0; j<COUNT; j++){
7998 bitstream[j]= (random() % 255) + 1;
8001 for(j=0; j<zeros; j++){
8002 int pos= random() % COUNT;
8003 while(bitstream[pos] == 0){
8012 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8014 printf("encoding failed\n");
8018 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8022 if(out_length != COUNT){
8023 printf("incorrect length %d %d\n", out_length, COUNT);
8027 if(consumed != nal_length){
8028 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8032 if(memcmp(bitstream, out, COUNT)){
8033 printf("mismatch\n");
8038 printf("Testing RBSP\n");
8046 static int decode_end(AVCodecContext *avctx)
8048 H264Context *h = avctx->priv_data;
8049 MpegEncContext *s = &h->s;
8051 av_freep(&h->rbsp_buffer[0]);
8052 av_freep(&h->rbsp_buffer[1]);
8053 free_tables(h); //FIXME cleanup init stuff perhaps
8056 // memset(h, 0, sizeof(H264Context));
8062 AVCodec h264_decoder = {
8066 sizeof(H264Context),
8071 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,