2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
42 * Value of Picture.reference when Picture is not a reference picture, but
43 * is held for delayed output.
45 #define DELAYED_PIC_REF 4
47 static VLC coeff_token_vlc[4];
48 static VLC chroma_dc_coeff_token_vlc;
50 static VLC total_zeros_vlc[15];
51 static VLC chroma_dc_total_zeros_vlc[3];
53 static VLC run_vlc[6];
56 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
57 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
58 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
59 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
61 static av_always_inline uint32_t pack16to32(int a, int b){
62 #ifdef WORDS_BIGENDIAN
63 return (b&0xFFFF) + (a<<16);
65 return (a&0xFFFF) + (b<<16);
69 const uint8_t ff_rem6[52]={
70 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
73 const uint8_t ff_div6[52]={
74 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
80 * @param h height of the rectangle, should be a constant
81 * @param w width of the rectangle, should be a constant
82 * @param size the size of val (1 or 4), should be a constant
84 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
85 uint8_t *p= (uint8_t*)vp;
86 assert(size==1 || size==4);
92 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
93 assert((stride&(w-1))==0);
95 const uint16_t v= size==4 ? val : val*0x0101;
96 *(uint16_t*)(p + 0*stride)= v;
98 *(uint16_t*)(p + 1*stride)= v;
100 *(uint16_t*)(p + 2*stride)= v;
101 *(uint16_t*)(p + 3*stride)= v;
103 const uint32_t v= size==4 ? val : val*0x01010101;
104 *(uint32_t*)(p + 0*stride)= v;
106 *(uint32_t*)(p + 1*stride)= v;
108 *(uint32_t*)(p + 2*stride)= v;
109 *(uint32_t*)(p + 3*stride)= v;
111 //gcc can't optimize 64bit math on x86_32
112 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
113 const uint64_t v= val*0x0100000001ULL;
114 *(uint64_t*)(p + 0*stride)= v;
116 *(uint64_t*)(p + 1*stride)= v;
118 *(uint64_t*)(p + 2*stride)= v;
119 *(uint64_t*)(p + 3*stride)= v;
121 const uint64_t v= val*0x0100000001ULL;
122 *(uint64_t*)(p + 0+0*stride)= v;
123 *(uint64_t*)(p + 8+0*stride)= v;
124 *(uint64_t*)(p + 0+1*stride)= v;
125 *(uint64_t*)(p + 8+1*stride)= v;
127 *(uint64_t*)(p + 0+2*stride)= v;
128 *(uint64_t*)(p + 8+2*stride)= v;
129 *(uint64_t*)(p + 0+3*stride)= v;
130 *(uint64_t*)(p + 8+3*stride)= v;
132 *(uint32_t*)(p + 0+0*stride)= val;
133 *(uint32_t*)(p + 4+0*stride)= val;
135 *(uint32_t*)(p + 0+1*stride)= val;
136 *(uint32_t*)(p + 4+1*stride)= val;
138 *(uint32_t*)(p + 0+2*stride)= val;
139 *(uint32_t*)(p + 4+2*stride)= val;
140 *(uint32_t*)(p + 0+3*stride)= val;
141 *(uint32_t*)(p + 4+3*stride)= val;
143 *(uint32_t*)(p + 0+0*stride)= val;
144 *(uint32_t*)(p + 4+0*stride)= val;
145 *(uint32_t*)(p + 8+0*stride)= val;
146 *(uint32_t*)(p +12+0*stride)= val;
147 *(uint32_t*)(p + 0+1*stride)= val;
148 *(uint32_t*)(p + 4+1*stride)= val;
149 *(uint32_t*)(p + 8+1*stride)= val;
150 *(uint32_t*)(p +12+1*stride)= val;
152 *(uint32_t*)(p + 0+2*stride)= val;
153 *(uint32_t*)(p + 4+2*stride)= val;
154 *(uint32_t*)(p + 8+2*stride)= val;
155 *(uint32_t*)(p +12+2*stride)= val;
156 *(uint32_t*)(p + 0+3*stride)= val;
157 *(uint32_t*)(p + 4+3*stride)= val;
158 *(uint32_t*)(p + 8+3*stride)= val;
159 *(uint32_t*)(p +12+3*stride)= val;
166 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
167 MpegEncContext * const s = &h->s;
168 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
169 int topleft_xy, top_xy, topright_xy, left_xy[2];
170 int topleft_type, top_type, topright_type, left_type[2];
174 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
176 //FIXME deblocking could skip the intra and nnz parts.
177 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
180 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
182 topleft_xy = top_xy - 1;
183 topright_xy= top_xy + 1;
184 left_xy[1] = left_xy[0] = mb_xy-1;
194 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
195 const int top_pair_xy = pair_xy - s->mb_stride;
196 const int topleft_pair_xy = top_pair_xy - 1;
197 const int topright_pair_xy = top_pair_xy + 1;
198 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
199 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
200 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
201 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
202 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
203 const int bottom = (s->mb_y & 1);
204 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
206 ? !curr_mb_frame_flag // bottom macroblock
207 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
209 top_xy -= s->mb_stride;
212 ? !curr_mb_frame_flag // bottom macroblock
213 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
215 topleft_xy -= s->mb_stride;
218 ? !curr_mb_frame_flag // bottom macroblock
219 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
221 topright_xy -= s->mb_stride;
223 if (left_mb_frame_flag != curr_mb_frame_flag) {
224 left_xy[1] = left_xy[0] = pair_xy - 1;
225 if (curr_mb_frame_flag) {
246 left_xy[1] += s->mb_stride;
259 h->top_mb_xy = top_xy;
260 h->left_mb_xy[0] = left_xy[0];
261 h->left_mb_xy[1] = left_xy[1];
265 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
266 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
267 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
269 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
271 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
273 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
274 for(list=0; list<h->list_count; list++){
275 if(USES_LIST(mb_type,list)){
276 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
277 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
278 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
279 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
285 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
286 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
288 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
289 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
291 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
292 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
297 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
298 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
299 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
300 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
301 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
304 if(IS_INTRA(mb_type)){
305 h->topleft_samples_available=
306 h->top_samples_available=
307 h->left_samples_available= 0xFFFF;
308 h->topright_samples_available= 0xEEEA;
310 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
311 h->topleft_samples_available= 0xB3FF;
312 h->top_samples_available= 0x33FF;
313 h->topright_samples_available= 0x26EA;
316 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
317 h->topleft_samples_available&= 0xDF5F;
318 h->left_samples_available&= 0x5F5F;
322 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
323 h->topleft_samples_available&= 0x7FFF;
325 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
326 h->topright_samples_available&= 0xFBFF;
328 if(IS_INTRA4x4(mb_type)){
329 if(IS_INTRA4x4(top_type)){
330 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
331 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
332 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
333 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
336 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
341 h->intra4x4_pred_mode_cache[4+8*0]=
342 h->intra4x4_pred_mode_cache[5+8*0]=
343 h->intra4x4_pred_mode_cache[6+8*0]=
344 h->intra4x4_pred_mode_cache[7+8*0]= pred;
347 if(IS_INTRA4x4(left_type[i])){
348 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
349 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
352 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
357 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
358 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
373 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
375 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
376 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
377 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
378 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
380 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
381 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
383 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
384 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
387 h->non_zero_count_cache[4+8*0]=
388 h->non_zero_count_cache[5+8*0]=
389 h->non_zero_count_cache[6+8*0]=
390 h->non_zero_count_cache[7+8*0]=
392 h->non_zero_count_cache[1+8*0]=
393 h->non_zero_count_cache[2+8*0]=
395 h->non_zero_count_cache[1+8*3]=
396 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
400 for (i=0; i<2; i++) {
402 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
403 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
404 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
405 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
407 h->non_zero_count_cache[3+8*1 + 2*8*i]=
408 h->non_zero_count_cache[3+8*2 + 2*8*i]=
409 h->non_zero_count_cache[0+8*1 + 8*i]=
410 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
417 h->top_cbp = h->cbp_table[top_xy];
418 } else if(IS_INTRA(mb_type)) {
425 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
426 } else if(IS_INTRA(mb_type)) {
432 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
435 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
440 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
442 for(list=0; list<h->list_count; list++){
443 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
444 /*if(!h->mv_cache_clean[list]){
445 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
446 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
447 h->mv_cache_clean[list]= 1;
451 h->mv_cache_clean[list]= 0;
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
456 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
457 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
458 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
459 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
460 h->ref_cache[list][scan8[0] + 0 - 1*8]=
461 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
462 h->ref_cache[list][scan8[0] + 2 - 1*8]=
463 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
465 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
466 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
467 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
468 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
469 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
473 int cache_idx = scan8[0] - 1 + i*2*8;
474 if(USES_LIST(left_type[i], list)){
475 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
476 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
477 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
478 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
479 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
480 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
482 *(uint32_t*)h->mv_cache [list][cache_idx ]=
483 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
484 h->ref_cache[list][cache_idx ]=
485 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
489 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
492 if(USES_LIST(topleft_type, list)){
493 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
494 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
495 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
496 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
498 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
499 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
502 if(USES_LIST(topright_type, list)){
503 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
504 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
505 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
506 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
508 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
509 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
512 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
515 h->ref_cache[list][scan8[5 ]+1] =
516 h->ref_cache[list][scan8[7 ]+1] =
517 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
518 h->ref_cache[list][scan8[4 ]] =
519 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
520 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
521 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
522 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
523 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
524 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
527 /* XXX beurk, Load mvd */
528 if(USES_LIST(top_type, list)){
529 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
530 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
531 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
532 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
533 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
535 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
536 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
537 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
538 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
540 if(USES_LIST(left_type[0], list)){
541 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
542 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
543 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
545 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
546 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
548 if(USES_LIST(left_type[1], list)){
549 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
550 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
551 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
553 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
554 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
556 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
557 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
558 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
559 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
560 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
562 if(h->slice_type == B_TYPE){
563 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
565 if(IS_DIRECT(top_type)){
566 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
567 }else if(IS_8X8(top_type)){
568 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
569 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
570 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
572 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
575 if(IS_DIRECT(left_type[0]))
576 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
577 else if(IS_8X8(left_type[0]))
578 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
580 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
582 if(IS_DIRECT(left_type[1]))
583 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
584 else if(IS_8X8(left_type[1]))
585 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
587 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
593 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
594 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
595 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
596 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
597 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
598 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
599 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
600 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
601 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
602 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
604 #define MAP_F2F(idx, mb_type)\
605 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
606 h->ref_cache[list][idx] <<= 1;\
607 h->mv_cache[list][idx][1] /= 2;\
608 h->mvd_cache[list][idx][1] /= 2;\
613 #define MAP_F2F(idx, mb_type)\
614 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
615 h->ref_cache[list][idx] >>= 1;\
616 h->mv_cache[list][idx][1] <<= 1;\
617 h->mvd_cache[list][idx][1] <<= 1;\
627 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
630 static inline void write_back_intra_pred_mode(H264Context *h){
631 MpegEncContext * const s = &h->s;
632 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
634 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
635 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
636 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
637 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
638 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
639 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
640 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
644 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
646 static inline int check_intra4x4_pred_mode(H264Context *h){
647 MpegEncContext * const s = &h->s;
648 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
649 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
652 if(!(h->top_samples_available&0x8000)){
654 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
656 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
659 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
664 if(!(h->left_samples_available&0x8000)){
666 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
668 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
671 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
677 } //FIXME cleanup like next
680 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
682 static inline int check_intra_pred_mode(H264Context *h, int mode){
683 MpegEncContext * const s = &h->s;
684 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
685 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
688 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
692 if(!(h->top_samples_available&0x8000)){
695 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
700 if(!(h->left_samples_available&0x8000)){
703 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
712 * gets the predicted intra4x4 prediction mode.
714 static inline int pred_intra_mode(H264Context *h, int n){
715 const int index8= scan8[n];
716 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
717 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
718 const int min= FFMIN(left, top);
720 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
722 if(min<0) return DC_PRED;
726 static inline void write_back_non_zero_count(H264Context *h){
727 MpegEncContext * const s = &h->s;
728 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
730 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
731 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
732 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
733 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
734 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
735 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
736 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
738 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
739 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
740 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
742 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
743 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
744 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
747 // store all luma nnzs, for deblocking
750 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
751 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
756 * gets the predicted number of non zero coefficients.
757 * @param n block index
759 static inline int pred_non_zero_count(H264Context *h, int n){
760 const int index8= scan8[n];
761 const int left= h->non_zero_count_cache[index8 - 1];
762 const int top = h->non_zero_count_cache[index8 - 8];
765 if(i<64) i= (i+1)>>1;
767 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
772 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
773 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
774 MpegEncContext *s = &h->s;
776 /* there is no consistent mapping of mvs to neighboring locations that will
777 * make mbaff happy, so we can't move all this logic to fill_caches */
779 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
781 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
782 *C = h->mv_cache[list][scan8[0]-2];
785 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
786 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
787 if(IS_INTERLACED(mb_types[topright_xy])){
788 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
789 const int x4 = X4, y4 = Y4;\
790 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
791 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
792 return LIST_NOT_USED;\
793 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
794 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
795 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
796 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
798 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
801 if(topright_ref == PART_NOT_AVAILABLE
802 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
803 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
805 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
806 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
809 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
811 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
812 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
818 if(topright_ref != PART_NOT_AVAILABLE){
819 *C= h->mv_cache[list][ i - 8 + part_width ];
822 tprintf(s->avctx, "topright MV not available\n");
824 *C= h->mv_cache[list][ i - 8 - 1 ];
825 return h->ref_cache[list][ i - 8 - 1 ];
830 * gets the predicted MV.
831 * @param n the block index
832 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
833 * @param mx the x component of the predicted motion vector
834 * @param my the y component of the predicted motion vector
836 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
837 const int index8= scan8[n];
838 const int top_ref= h->ref_cache[list][ index8 - 8 ];
839 const int left_ref= h->ref_cache[list][ index8 - 1 ];
840 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
841 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
843 int diagonal_ref, match_count;
845 assert(part_width==1 || part_width==2 || part_width==4);
855 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
856 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
857 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
858 if(match_count > 1){ //most common
859 *mx= mid_pred(A[0], B[0], C[0]);
860 *my= mid_pred(A[1], B[1], C[1]);
861 }else if(match_count==1){
865 }else if(top_ref==ref){
873 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
877 *mx= mid_pred(A[0], B[0], C[0]);
878 *my= mid_pred(A[1], B[1], C[1]);
882 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
886 * gets the directionally predicted 16x8 MV.
887 * @param n the block index
888 * @param mx the x component of the predicted motion vector
889 * @param my the y component of the predicted motion vector
891 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
893 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
894 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
896 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
904 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
905 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
907 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
917 pred_motion(h, n, 4, list, ref, mx, my);
921 * gets the directionally predicted 8x16 MV.
922 * @param n the block index
923 * @param mx the x component of the predicted motion vector
924 * @param my the y component of the predicted motion vector
926 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
928 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
929 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
931 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
942 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
944 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
946 if(diagonal_ref == ref){
954 pred_motion(h, n, 2, list, ref, mx, my);
957 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
958 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
959 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
961 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
963 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
964 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
965 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
971 pred_motion(h, 0, 4, 0, 0, mx, my);
976 static inline void direct_dist_scale_factor(H264Context * const h){
977 const int poc = h->s.current_picture_ptr->poc;
978 const int poc1 = h->ref_list[1][0].poc;
980 for(i=0; i<h->ref_count[0]; i++){
981 int poc0 = h->ref_list[0][i].poc;
982 int td = av_clip(poc1 - poc0, -128, 127);
983 if(td == 0 /* FIXME || pic0 is a long-term ref */){
984 h->dist_scale_factor[i] = 256;
986 int tb = av_clip(poc - poc0, -128, 127);
987 int tx = (16384 + (FFABS(td) >> 1)) / td;
988 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
992 for(i=0; i<h->ref_count[0]; i++){
993 h->dist_scale_factor_field[2*i] =
994 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
998 static inline void direct_ref_list_init(H264Context * const h){
999 MpegEncContext * const s = &h->s;
1000 Picture * const ref1 = &h->ref_list[1][0];
1001 Picture * const cur = s->current_picture_ptr;
1003 if(cur->pict_type == I_TYPE)
1004 cur->ref_count[0] = 0;
1005 if(cur->pict_type != B_TYPE)
1006 cur->ref_count[1] = 0;
1007 for(list=0; list<2; list++){
1008 cur->ref_count[list] = h->ref_count[list];
1009 for(j=0; j<h->ref_count[list]; j++)
1010 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1012 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1014 for(list=0; list<2; list++){
1015 for(i=0; i<ref1->ref_count[list]; i++){
1016 const int poc = ref1->ref_poc[list][i];
1017 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1018 for(j=0; j<h->ref_count[list]; j++)
1019 if(h->ref_list[list][j].poc == poc){
1020 h->map_col_to_list0[list][i] = j;
1026 for(list=0; list<2; list++){
1027 for(i=0; i<ref1->ref_count[list]; i++){
1028 j = h->map_col_to_list0[list][i];
1029 h->map_col_to_list0_field[list][2*i] = 2*j;
1030 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1036 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1037 MpegEncContext * const s = &h->s;
1038 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1039 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1040 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1041 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1042 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1043 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1044 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1045 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1046 const int is_b8x8 = IS_8X8(*mb_type);
1047 unsigned int sub_mb_type;
1050 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1051 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1052 /* FIXME save sub mb types from previous frames (or derive from MVs)
1053 * so we know exactly what block size to use */
1054 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1055 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1056 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1057 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1058 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1060 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1061 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1064 *mb_type |= MB_TYPE_DIRECT2;
1066 *mb_type |= MB_TYPE_INTERLACED;
1068 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1070 if(h->direct_spatial_mv_pred){
1075 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1077 /* ref = min(neighbors) */
1078 for(list=0; list<2; list++){
1079 int refa = h->ref_cache[list][scan8[0] - 1];
1080 int refb = h->ref_cache[list][scan8[0] - 8];
1081 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1083 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1085 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1087 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1093 if(ref[0] < 0 && ref[1] < 0){
1094 ref[0] = ref[1] = 0;
1095 mv[0][0] = mv[0][1] =
1096 mv[1][0] = mv[1][1] = 0;
1098 for(list=0; list<2; list++){
1100 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1102 mv[list][0] = mv[list][1] = 0;
1107 *mb_type &= ~MB_TYPE_P0L1;
1108 sub_mb_type &= ~MB_TYPE_P0L1;
1109 }else if(ref[0] < 0){
1110 *mb_type &= ~MB_TYPE_P0L0;
1111 sub_mb_type &= ~MB_TYPE_P0L0;
1114 if(IS_16X16(*mb_type)){
1117 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1118 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1119 if(!IS_INTRA(mb_type_col)
1120 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1121 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1122 && (h->x264_build>33 || !h->x264_build)))){
1124 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 a= pack16to32(mv[0][0],mv[0][1]);
1129 b= pack16to32(mv[1][0],mv[1][1]);
1131 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1132 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1134 for(i8=0; i8<4; i8++){
1135 const int x8 = i8&1;
1136 const int y8 = i8>>1;
1138 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1140 h->sub_mb_type[i8] = sub_mb_type;
1142 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1143 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1144 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1145 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1148 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1149 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1150 && (h->x264_build>33 || !h->x264_build)))){
1151 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1152 if(IS_SUB_8X8(sub_mb_type)){
1153 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1154 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1156 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1158 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1161 for(i4=0; i4<4; i4++){
1162 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1163 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1165 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1167 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1173 }else{ /* direct temporal mv pred */
1174 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1175 const int *dist_scale_factor = h->dist_scale_factor;
1178 if(IS_INTERLACED(*mb_type)){
1179 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1180 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1181 dist_scale_factor = h->dist_scale_factor_field;
1183 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1184 /* FIXME assumes direct_8x8_inference == 1 */
1185 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1186 int mb_types_col[2];
1189 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1190 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1191 | (*mb_type & MB_TYPE_INTERLACED);
1192 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1194 if(IS_INTERLACED(*mb_type)){
1195 /* frame to field scaling */
1196 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1197 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1199 l1ref0 -= 2*h->b8_stride;
1200 l1ref1 -= 2*h->b8_stride;
1201 l1mv0 -= 4*h->b_stride;
1202 l1mv1 -= 4*h->b_stride;
1206 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1207 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1209 *mb_type |= MB_TYPE_16x8;
1211 *mb_type |= MB_TYPE_8x8;
1213 /* field to frame scaling */
1214 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1215 * but in MBAFF, top and bottom POC are equal */
1216 int dy = (s->mb_y&1) ? 1 : 2;
1218 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1219 l1ref0 += dy*h->b8_stride;
1220 l1ref1 += dy*h->b8_stride;
1221 l1mv0 += 2*dy*h->b_stride;
1222 l1mv1 += 2*dy*h->b_stride;
1225 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1227 *mb_type |= MB_TYPE_16x16;
1229 *mb_type |= MB_TYPE_8x8;
1232 for(i8=0; i8<4; i8++){
1233 const int x8 = i8&1;
1234 const int y8 = i8>>1;
1236 const int16_t (*l1mv)[2]= l1mv0;
1238 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1240 h->sub_mb_type[i8] = sub_mb_type;
1242 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1243 if(IS_INTRA(mb_types_col[y8])){
1244 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1245 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1246 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1250 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1252 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1254 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1257 scale = dist_scale_factor[ref0];
1258 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1261 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1262 int my_col = (mv_col[1]<<y_shift)/2;
1263 int mx = (scale * mv_col[0] + 128) >> 8;
1264 int my = (scale * my_col + 128) >> 8;
1265 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1266 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1273 /* one-to-one mv scaling */
1275 if(IS_16X16(*mb_type)){
1278 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1279 if(IS_INTRA(mb_type_col)){
1282 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1283 : map_col_to_list0[1][l1ref1[0]];
1284 const int scale = dist_scale_factor[ref0];
1285 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1287 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1288 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1290 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1291 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1293 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1294 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1295 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1297 for(i8=0; i8<4; i8++){
1298 const int x8 = i8&1;
1299 const int y8 = i8>>1;
1301 const int16_t (*l1mv)[2]= l1mv0;
1303 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1305 h->sub_mb_type[i8] = sub_mb_type;
1306 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1307 if(IS_INTRA(mb_type_col)){
1308 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1309 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1310 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1314 ref0 = l1ref0[x8 + y8*h->b8_stride];
1316 ref0 = map_col_to_list0[0][ref0];
1318 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1321 scale = dist_scale_factor[ref0];
1323 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1324 if(IS_SUB_8X8(sub_mb_type)){
1325 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1326 int mx = (scale * mv_col[0] + 128) >> 8;
1327 int my = (scale * mv_col[1] + 128) >> 8;
1328 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1329 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1331 for(i4=0; i4<4; i4++){
1332 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1333 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1334 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1335 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1336 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1337 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1344 static inline void write_back_motion(H264Context *h, int mb_type){
1345 MpegEncContext * const s = &h->s;
1346 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1347 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1350 if(!USES_LIST(mb_type, 0))
1351 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1353 for(list=0; list<h->list_count; list++){
1355 if(!USES_LIST(mb_type, list))
1359 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1360 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1362 if( h->pps.cabac ) {
1363 if(IS_SKIP(mb_type))
1364 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1367 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1368 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1373 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1374 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1375 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1376 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1377 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1381 if(h->slice_type == B_TYPE && h->pps.cabac){
1382 if(IS_8X8(mb_type)){
1383 uint8_t *direct_table = &h->direct_table[b8_xy];
1384 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1385 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1386 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1392 * Decodes a network abstraction layer unit.
1393 * @param consumed is the number of bytes used as input
1394 * @param length is the length of the array
1395 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1396 * @returns decoded bytes, might be src+1 if no escapes
1398 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1403 // src[0]&0x80; //forbidden bit
1404 h->nal_ref_idc= src[0]>>5;
1405 h->nal_unit_type= src[0]&0x1F;
1409 for(i=0; i<length; i++)
1410 printf("%2X ", src[i]);
1412 for(i=0; i+1<length; i+=2){
1413 if(src[i]) continue;
1414 if(i>0 && src[i-1]==0) i--;
1415 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1417 /* startcode, so we must be past the end */
1424 if(i>=length-1){ //no escaped 0
1425 *dst_length= length;
1426 *consumed= length+1; //+1 for the header
1430 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1431 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1432 dst= h->rbsp_buffer[bufidx];
1438 //printf("decoding esc\n");
1441 //remove escapes (very rare 1:2^22)
1442 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1443 if(src[si+2]==3){ //escape
1448 }else //next start code
1452 dst[di++]= src[si++];
1456 *consumed= si + 1;//+1 for the header
1457 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1462 * identifies the exact end of the bitstream
1463 * @return the length of the trailing, or 0 if damaged
1465 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1469 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1479 * idct tranforms the 16 dc values and dequantize them.
1480 * @param qp quantization parameter
1482 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1485 int temp[16]; //FIXME check if this is a good idea
1486 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1487 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1489 //memset(block, 64, 2*256);
1492 const int offset= y_offset[i];
1493 const int z0= block[offset+stride*0] + block[offset+stride*4];
1494 const int z1= block[offset+stride*0] - block[offset+stride*4];
1495 const int z2= block[offset+stride*1] - block[offset+stride*5];
1496 const int z3= block[offset+stride*1] + block[offset+stride*5];
1505 const int offset= x_offset[i];
1506 const int z0= temp[4*0+i] + temp[4*2+i];
1507 const int z1= temp[4*0+i] - temp[4*2+i];
1508 const int z2= temp[4*1+i] - temp[4*3+i];
1509 const int z3= temp[4*1+i] + temp[4*3+i];
1511 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1512 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1513 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1514 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1520 * dct tranforms the 16 dc values.
1521 * @param qp quantization parameter ??? FIXME
1523 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1524 // const int qmul= dequant_coeff[qp][0];
1526 int temp[16]; //FIXME check if this is a good idea
1527 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1528 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1531 const int offset= y_offset[i];
1532 const int z0= block[offset+stride*0] + block[offset+stride*4];
1533 const int z1= block[offset+stride*0] - block[offset+stride*4];
1534 const int z2= block[offset+stride*1] - block[offset+stride*5];
1535 const int z3= block[offset+stride*1] + block[offset+stride*5];
1544 const int offset= x_offset[i];
1545 const int z0= temp[4*0+i] + temp[4*2+i];
1546 const int z1= temp[4*0+i] - temp[4*2+i];
1547 const int z2= temp[4*1+i] - temp[4*3+i];
1548 const int z3= temp[4*1+i] + temp[4*3+i];
1550 block[stride*0 +offset]= (z0 + z3)>>1;
1551 block[stride*2 +offset]= (z1 + z2)>>1;
1552 block[stride*8 +offset]= (z1 - z2)>>1;
1553 block[stride*10+offset]= (z0 - z3)>>1;
1561 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1577 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1578 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1579 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1583 static void chroma_dc_dct_c(DCTELEM *block){
1584 const int stride= 16*2;
1585 const int xStride= 16;
1588 a= block[stride*0 + xStride*0];
1589 b= block[stride*0 + xStride*1];
1590 c= block[stride*1 + xStride*0];
1591 d= block[stride*1 + xStride*1];
1598 block[stride*0 + xStride*0]= (a+c);
1599 block[stride*0 + xStride*1]= (e+b);
1600 block[stride*1 + xStride*0]= (a-c);
1601 block[stride*1 + xStride*1]= (e-b);
1606 * gets the chroma qp.
1608 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1609 return h->pps.chroma_qp_table[t][qscale & 0xff];
1612 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1613 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1614 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1616 const int * const quant_table= quant_coeff[qscale];
1617 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1618 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1619 const unsigned int threshold2= (threshold1<<1);
1625 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1626 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1627 const unsigned int dc_threshold2= (dc_threshold1<<1);
1629 int level= block[0]*quant_coeff[qscale+18][0];
1630 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1632 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1635 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1638 // last_non_zero = i;
1643 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1644 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1645 const unsigned int dc_threshold2= (dc_threshold1<<1);
1647 int level= block[0]*quant_table[0];
1648 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1650 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1653 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1656 // last_non_zero = i;
1669 const int j= scantable[i];
1670 int level= block[j]*quant_table[j];
1672 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1673 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1674 if(((unsigned)(level+threshold1))>threshold2){
1676 level= (bias + level)>>QUANT_SHIFT;
1679 level= (bias - level)>>QUANT_SHIFT;
1688 return last_non_zero;
1691 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1692 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1693 int src_x_offset, int src_y_offset,
1694 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1695 MpegEncContext * const s = &h->s;
1696 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1697 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1698 const int luma_xy= (mx&3) + ((my&3)<<2);
1699 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1700 uint8_t * src_cb, * src_cr;
1701 int extra_width= h->emu_edge_width;
1702 int extra_height= h->emu_edge_height;
1704 const int full_mx= mx>>2;
1705 const int full_my= my>>2;
1706 const int pic_width = 16*s->mb_width;
1707 const int pic_height = 16*s->mb_height >> MB_FIELD;
1709 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1712 if(mx&7) extra_width -= 3;
1713 if(my&7) extra_height -= 3;
1715 if( full_mx < 0-extra_width
1716 || full_my < 0-extra_height
1717 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1718 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1719 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1720 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1724 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1726 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1729 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1732 // chroma offset when predicting from a field of opposite parity
1733 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1734 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1736 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1737 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1740 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1741 src_cb= s->edge_emu_buffer;
1743 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1746 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1747 src_cr= s->edge_emu_buffer;
1749 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1752 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1753 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1754 int x_offset, int y_offset,
1755 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1756 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1757 int list0, int list1){
1758 MpegEncContext * const s = &h->s;
1759 qpel_mc_func *qpix_op= qpix_put;
1760 h264_chroma_mc_func chroma_op= chroma_put;
1762 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1763 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1764 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1765 x_offset += 8*s->mb_x;
1766 y_offset += 8*(s->mb_y >> MB_FIELD);
1769 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1770 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1771 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1772 qpix_op, chroma_op);
1775 chroma_op= chroma_avg;
1779 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1780 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1781 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1782 qpix_op, chroma_op);
1786 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1787 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1788 int x_offset, int y_offset,
1789 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1790 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1791 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1792 int list0, int list1){
1793 MpegEncContext * const s = &h->s;
1795 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1796 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1797 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1798 x_offset += 8*s->mb_x;
1799 y_offset += 8*(s->mb_y >> MB_FIELD);
1802 /* don't optimize for luma-only case, since B-frames usually
1803 * use implicit weights => chroma too. */
1804 uint8_t *tmp_cb = s->obmc_scratchpad;
1805 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1806 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1807 int refn0 = h->ref_cache[0][ scan8[n] ];
1808 int refn1 = h->ref_cache[1][ scan8[n] ];
1810 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1811 dest_y, dest_cb, dest_cr,
1812 x_offset, y_offset, qpix_put, chroma_put);
1813 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1814 tmp_y, tmp_cb, tmp_cr,
1815 x_offset, y_offset, qpix_put, chroma_put);
1817 if(h->use_weight == 2){
1818 int weight0 = h->implicit_weight[refn0][refn1];
1819 int weight1 = 64 - weight0;
1820 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1821 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1822 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1824 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1825 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1826 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1827 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1828 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1829 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1830 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1831 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1832 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1835 int list = list1 ? 1 : 0;
1836 int refn = h->ref_cache[list][ scan8[n] ];
1837 Picture *ref= &h->ref_list[list][refn];
1838 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1839 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1840 qpix_put, chroma_put);
1842 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1843 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1844 if(h->use_weight_chroma){
1845 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1846 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1847 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1848 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1853 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1854 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1855 int x_offset, int y_offset,
1856 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1857 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1858 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1859 int list0, int list1){
1860 if((h->use_weight==2 && list0 && list1
1861 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1862 || h->use_weight==1)
1863 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1864 x_offset, y_offset, qpix_put, chroma_put,
1865 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1867 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1868 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1871 static inline void prefetch_motion(H264Context *h, int list){
1872 /* fetch pixels for estimated mv 4 macroblocks ahead
1873 * optimized for 64byte cache lines */
1874 MpegEncContext * const s = &h->s;
1875 const int refn = h->ref_cache[list][scan8[0]];
1877 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1878 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1879 uint8_t **src= h->ref_list[list][refn].data;
1880 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1881 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1882 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1883 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1887 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1888 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1889 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1890 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1891 MpegEncContext * const s = &h->s;
1892 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1893 const int mb_type= s->current_picture.mb_type[mb_xy];
1895 assert(IS_INTER(mb_type));
1897 prefetch_motion(h, 0);
1899 if(IS_16X16(mb_type)){
1900 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1901 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1902 &weight_op[0], &weight_avg[0],
1903 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1904 }else if(IS_16X8(mb_type)){
1905 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1906 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1907 &weight_op[1], &weight_avg[1],
1908 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1909 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1910 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1911 &weight_op[1], &weight_avg[1],
1912 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1913 }else if(IS_8X16(mb_type)){
1914 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1915 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1916 &weight_op[2], &weight_avg[2],
1917 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1918 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1919 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1920 &weight_op[2], &weight_avg[2],
1921 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1925 assert(IS_8X8(mb_type));
1928 const int sub_mb_type= h->sub_mb_type[i];
1930 int x_offset= (i&1)<<2;
1931 int y_offset= (i&2)<<1;
1933 if(IS_SUB_8X8(sub_mb_type)){
1934 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1935 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1936 &weight_op[3], &weight_avg[3],
1937 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1938 }else if(IS_SUB_8X4(sub_mb_type)){
1939 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1940 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1941 &weight_op[4], &weight_avg[4],
1942 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1943 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1944 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1945 &weight_op[4], &weight_avg[4],
1946 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1947 }else if(IS_SUB_4X8(sub_mb_type)){
1948 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1949 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1950 &weight_op[5], &weight_avg[5],
1951 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1952 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1953 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1954 &weight_op[5], &weight_avg[5],
1955 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1958 assert(IS_SUB_4X4(sub_mb_type));
1960 int sub_x_offset= x_offset + 2*(j&1);
1961 int sub_y_offset= y_offset + (j&2);
1962 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1963 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1964 &weight_op[6], &weight_avg[6],
1965 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1971 prefetch_motion(h, 1);
1974 static void decode_init_vlc(void){
1975 static int done = 0;
1981 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1982 &chroma_dc_coeff_token_len [0], 1, 1,
1983 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1986 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1987 &coeff_token_len [i][0], 1, 1,
1988 &coeff_token_bits[i][0], 1, 1, 1);
1992 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1993 &chroma_dc_total_zeros_len [i][0], 1, 1,
1994 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1996 for(i=0; i<15; i++){
1997 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1998 &total_zeros_len [i][0], 1, 1,
1999 &total_zeros_bits[i][0], 1, 1, 1);
2003 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2004 &run_len [i][0], 1, 1,
2005 &run_bits[i][0], 1, 1, 1);
2007 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2008 &run_len [6][0], 1, 1,
2009 &run_bits[6][0], 1, 1, 1);
2013 static void free_tables(H264Context *h){
2016 av_freep(&h->intra4x4_pred_mode);
2017 av_freep(&h->chroma_pred_mode_table);
2018 av_freep(&h->cbp_table);
2019 av_freep(&h->mvd_table[0]);
2020 av_freep(&h->mvd_table[1]);
2021 av_freep(&h->direct_table);
2022 av_freep(&h->non_zero_count);
2023 av_freep(&h->slice_table_base);
2024 h->slice_table= NULL;
2026 av_freep(&h->mb2b_xy);
2027 av_freep(&h->mb2b8_xy);
2029 for(i = 0; i < MAX_SPS_COUNT; i++)
2030 av_freep(h->sps_buffers + i);
2032 for(i = 0; i < MAX_PPS_COUNT; i++)
2033 av_freep(h->pps_buffers + i);
2035 for(i = 0; i < h->s.avctx->thread_count; i++) {
2036 hx = h->thread_context[i];
2038 av_freep(&hx->top_borders[1]);
2039 av_freep(&hx->top_borders[0]);
2040 av_freep(&hx->s.obmc_scratchpad);
2041 av_freep(&hx->s.allocated_edge_emu_buffer);
2045 static void init_dequant8_coeff_table(H264Context *h){
2047 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2048 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2049 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2051 for(i=0; i<2; i++ ){
2052 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2053 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2057 for(q=0; q<52; q++){
2058 int shift = ff_div6[q];
2059 int idx = ff_rem6[q];
2061 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2062 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2063 h->pps.scaling_matrix8[i][x]) << shift;
2068 static void init_dequant4_coeff_table(H264Context *h){
2070 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2071 for(i=0; i<6; i++ ){
2072 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2074 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2075 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2082 for(q=0; q<52; q++){
2083 int shift = ff_div6[q] + 2;
2084 int idx = ff_rem6[q];
2086 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2087 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2088 h->pps.scaling_matrix4[i][x]) << shift;
2093 static void init_dequant_tables(H264Context *h){
2095 init_dequant4_coeff_table(h);
2096 if(h->pps.transform_8x8_mode)
2097 init_dequant8_coeff_table(h);
2098 if(h->sps.transform_bypass){
2101 h->dequant4_coeff[i][0][x] = 1<<6;
2102 if(h->pps.transform_8x8_mode)
2105 h->dequant8_coeff[i][0][x] = 1<<6;
2112 * needs width/height
2114 static int alloc_tables(H264Context *h){
2115 MpegEncContext * const s = &h->s;
2116 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2119 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2121 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2123 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2125 if( h->pps.cabac ) {
2126 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2127 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2129 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2132 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2133 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2135 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2136 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2137 for(y=0; y<s->mb_height; y++){
2138 for(x=0; x<s->mb_width; x++){
2139 const int mb_xy= x + y*s->mb_stride;
2140 const int b_xy = 4*x + 4*y*h->b_stride;
2141 const int b8_xy= 2*x + 2*y*h->b8_stride;
2143 h->mb2b_xy [mb_xy]= b_xy;
2144 h->mb2b8_xy[mb_xy]= b8_xy;
2148 s->obmc_scratchpad = NULL;
2150 if(!h->dequant4_coeff[0])
2151 init_dequant_tables(h);
2160 * Mimic alloc_tables(), but for every context thread.
2162 static void clone_tables(H264Context *dst, H264Context *src){
2163 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2164 dst->non_zero_count = src->non_zero_count;
2165 dst->slice_table = src->slice_table;
2166 dst->cbp_table = src->cbp_table;
2167 dst->mb2b_xy = src->mb2b_xy;
2168 dst->mb2b8_xy = src->mb2b8_xy;
2169 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2170 dst->mvd_table[0] = src->mvd_table[0];
2171 dst->mvd_table[1] = src->mvd_table[1];
2172 dst->direct_table = src->direct_table;
2174 dst->s.obmc_scratchpad = NULL;
2175 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2180 * Allocate buffers which are not shared amongst multiple threads.
2182 static int context_init(H264Context *h){
2183 MpegEncContext * const s = &h->s;
2185 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2186 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2188 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
2189 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
2190 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
2191 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
2194 return -1; // free_tables will clean up for us
2197 static void common_init(H264Context *h){
2198 MpegEncContext * const s = &h->s;
2200 s->width = s->avctx->width;
2201 s->height = s->avctx->height;
2202 s->codec_id= s->avctx->codec->id;
2204 ff_h264_pred_init(&h->hpc, s->codec_id);
2206 h->dequant_coeff_pps= -1;
2207 s->unrestricted_mv=1;
2208 s->decode=1; //FIXME
2210 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2211 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2214 static int decode_init(AVCodecContext *avctx){
2215 H264Context *h= avctx->priv_data;
2216 MpegEncContext * const s = &h->s;
2218 MPV_decode_defaults(s);
2223 s->out_format = FMT_H264;
2224 s->workaround_bugs= avctx->workaround_bugs;
2227 // s->decode_mb= ff_h263_decode_mb;
2228 s->quarter_sample = 1;
2230 avctx->pix_fmt= PIX_FMT_YUV420P;
2234 if(avctx->extradata_size > 0 && avctx->extradata &&
2235 *(char *)avctx->extradata == 1){
2242 h->thread_context[0] = h;
2246 static int frame_start(H264Context *h){
2247 MpegEncContext * const s = &h->s;
2250 if(MPV_frame_start(s, s->avctx) < 0)
2252 ff_er_frame_start(s);
2254 * MPV_frame_start uses pict_type to derive key_frame.
2255 * This is incorrect for H.264; IDR markings must be used.
2256 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2257 * See decode_nal_units().
2259 s->current_picture_ptr->key_frame= 0;
2261 assert(s->linesize && s->uvlinesize);
2263 for(i=0; i<16; i++){
2264 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2265 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2268 h->block_offset[16+i]=
2269 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2270 h->block_offset[24+16+i]=
2271 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2274 /* can't be in alloc_tables because linesize isn't known there.
2275 * FIXME: redo bipred weight to not require extra buffer? */
2276 for(i = 0; i < s->avctx->thread_count; i++)
2277 if(!h->thread_context[i]->s.obmc_scratchpad)
2278 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2280 /* some macroblocks will be accessed before they're available */
2281 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2282 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2284 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2288 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2289 MpegEncContext * const s = &h->s;
2293 src_cb -= uvlinesize;
2294 src_cr -= uvlinesize;
2296 // There are two lines saved, the line above the the top macroblock of a pair,
2297 // and the line above the bottom macroblock
2298 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2299 for(i=1; i<17; i++){
2300 h->left_border[i]= src_y[15+i* linesize];
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2304 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2306 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2307 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2308 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2310 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2311 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2313 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2314 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2318 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2319 MpegEncContext * const s = &h->s;
2326 if(h->deblocking_filter == 2) {
2327 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2328 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2329 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2331 deblock_left = (s->mb_x > 0);
2332 deblock_top = (s->mb_y > 0);
2335 src_y -= linesize + 1;
2336 src_cb -= uvlinesize + 1;
2337 src_cr -= uvlinesize + 1;
2339 #define XCHG(a,b,t,xchg)\
2346 for(i = !deblock_top; i<17; i++){
2347 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2352 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2353 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2354 if(s->mb_x+1 < s->mb_width){
2355 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2359 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2361 for(i = !deblock_top; i<9; i++){
2362 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2363 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2367 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2368 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2373 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2374 MpegEncContext * const s = &h->s;
2377 src_y -= 2 * linesize;
2378 src_cb -= 2 * uvlinesize;
2379 src_cr -= 2 * uvlinesize;
2381 // There are two lines saved, the line above the the top macroblock of a pair,
2382 // and the line above the bottom macroblock
2383 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2384 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2385 for(i=2; i<34; i++){
2386 h->left_border[i]= src_y[15+i* linesize];
2389 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2390 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2391 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2392 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2394 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2395 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2396 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2397 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2398 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2399 for(i=2; i<18; i++){
2400 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2401 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2403 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2404 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2405 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2406 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2410 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2411 MpegEncContext * const s = &h->s;
2414 int deblock_left = (s->mb_x > 0);
2415 int deblock_top = (s->mb_y > 1);
2417 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2419 src_y -= 2 * linesize + 1;
2420 src_cb -= 2 * uvlinesize + 1;
2421 src_cr -= 2 * uvlinesize + 1;
2423 #define XCHG(a,b,t,xchg)\
2430 for(i = (!deblock_top)<<1; i<34; i++){
2431 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2437 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2439 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2440 if(s->mb_x+1 < s->mb_width){
2441 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2442 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2446 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2448 for(i = (!deblock_top) << 1; i<18; i++){
2449 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2450 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2454 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2455 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2456 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2457 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2462 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2463 MpegEncContext * const s = &h->s;
2464 const int mb_x= s->mb_x;
2465 const int mb_y= s->mb_y;
2466 const int mb_xy= mb_x + mb_y*s->mb_stride;
2467 const int mb_type= s->current_picture.mb_type[mb_xy];
2468 uint8_t *dest_y, *dest_cb, *dest_cr;
2469 int linesize, uvlinesize /*dct_offset*/;
2471 int *block_offset = &h->block_offset[0];
2472 const unsigned int bottom = mb_y & 1;
2473 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2474 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2475 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2477 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2478 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2479 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2481 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2482 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2484 if (!simple && MB_FIELD) {
2485 linesize = h->mb_linesize = s->linesize * 2;
2486 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2487 block_offset = &h->block_offset[24];
2488 if(mb_y&1){ //FIXME move out of this func?
2489 dest_y -= s->linesize*15;
2490 dest_cb-= s->uvlinesize*7;
2491 dest_cr-= s->uvlinesize*7;
2495 for(list=0; list<h->list_count; list++){
2496 if(!USES_LIST(mb_type, list))
2498 if(IS_16X16(mb_type)){
2499 int8_t *ref = &h->ref_cache[list][scan8[0]];
2500 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
2502 for(i=0; i<16; i+=4){
2503 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2504 int ref = h->ref_cache[list][scan8[i]];
2506 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
2512 linesize = h->mb_linesize = s->linesize;
2513 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2514 // dct_offset = s->linesize * 16;
2517 if(transform_bypass){
2519 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2520 }else if(IS_8x8DCT(mb_type)){
2521 idct_dc_add = s->dsp.h264_idct8_dc_add;
2522 idct_add = s->dsp.h264_idct8_add;
2524 idct_dc_add = s->dsp.h264_idct_dc_add;
2525 idct_add = s->dsp.h264_idct_add;
2528 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2529 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2530 int mbt_y = mb_y&~1;
2531 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2532 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2533 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2534 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2537 if (!simple && IS_INTRA_PCM(mb_type)) {
2540 // The pixels are stored in h->mb array in the same order as levels,
2541 // copy them in output in the correct order.
2542 for(i=0; i<16; i++) {
2543 for (y=0; y<4; y++) {
2544 for (x=0; x<4; x++) {
2545 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2549 for(i=16; i<16+4; i++) {
2550 for (y=0; y<4; y++) {
2551 for (x=0; x<4; x++) {
2552 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2556 for(i=20; i<20+4; i++) {
2557 for (y=0; y<4; y++) {
2558 for (x=0; x<4; x++) {
2559 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2564 if(IS_INTRA(mb_type)){
2565 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2566 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2568 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2569 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2570 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2573 if(IS_INTRA4x4(mb_type)){
2574 if(simple || !s->encoding){
2575 if(IS_8x8DCT(mb_type)){
2576 for(i=0; i<16; i+=4){
2577 uint8_t * const ptr= dest_y + block_offset[i];
2578 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2579 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2580 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2581 (h->topright_samples_available<<i)&0x4000, linesize);
2583 if(nnz == 1 && h->mb[i*16])
2584 idct_dc_add(ptr, h->mb + i*16, linesize);
2586 idct_add(ptr, h->mb + i*16, linesize);
2590 for(i=0; i<16; i++){
2591 uint8_t * const ptr= dest_y + block_offset[i];
2593 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2596 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2597 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2598 assert(mb_y || linesize <= block_offset[i]);
2599 if(!topright_avail){
2600 tr= ptr[3 - linesize]*0x01010101;
2601 topright= (uint8_t*) &tr;
2603 topright= ptr + 4 - linesize;
2607 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2608 nnz = h->non_zero_count_cache[ scan8[i] ];
2611 if(nnz == 1 && h->mb[i*16])
2612 idct_dc_add(ptr, h->mb + i*16, linesize);
2614 idct_add(ptr, h->mb + i*16, linesize);
2616 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2621 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2623 if(!transform_bypass)
2624 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2626 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2628 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2629 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2631 hl_motion(h, dest_y, dest_cb, dest_cr,
2632 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2633 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2634 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2638 if(!IS_INTRA4x4(mb_type)){
2640 if(IS_INTRA16x16(mb_type)){
2641 for(i=0; i<16; i++){
2642 if(h->non_zero_count_cache[ scan8[i] ])
2643 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2644 else if(h->mb[i*16])
2645 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2648 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2649 for(i=0; i<16; i+=di){
2650 int nnz = h->non_zero_count_cache[ scan8[i] ];
2652 if(nnz==1 && h->mb[i*16])
2653 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2655 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2660 for(i=0; i<16; i++){
2661 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2662 uint8_t * const ptr= dest_y + block_offset[i];
2663 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2669 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2670 uint8_t *dest[2] = {dest_cb, dest_cr};
2671 if(transform_bypass){
2672 idct_add = idct_dc_add = s->dsp.add_pixels4;
2674 idct_add = s->dsp.h264_idct_add;
2675 idct_dc_add = s->dsp.h264_idct_dc_add;
2676 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2677 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2680 for(i=16; i<16+8; i++){
2681 if(h->non_zero_count_cache[ scan8[i] ])
2682 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2683 else if(h->mb[i*16])
2684 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2687 for(i=16; i<16+8; i++){
2688 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2689 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2690 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2696 if(h->deblocking_filter) {
2697 if (!simple && FRAME_MBAFF) {
2698 //FIXME try deblocking one mb at a time?
2699 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2700 const int mb_y = s->mb_y - 1;
2701 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2702 const int mb_xy= mb_x + mb_y*s->mb_stride;
2703 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2704 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2705 if (!bottom) return;
2706 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2707 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2708 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2710 if(IS_INTRA(mb_type_top | mb_type_bottom))
2711 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2713 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2717 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2718 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2719 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2720 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2721 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2724 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2725 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2726 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2727 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2728 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2730 tprintf(h->s.avctx, "call filter_mb\n");
2731 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2732 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2733 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2739 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2741 static void hl_decode_mb_simple(H264Context *h){
2742 hl_decode_mb_internal(h, 1);
2746 * Process a macroblock; this handles edge cases, such as interlacing.
2748 static void av_noinline hl_decode_mb_complex(H264Context *h){
2749 hl_decode_mb_internal(h, 0);
2752 static void hl_decode_mb(H264Context *h){
2753 MpegEncContext * const s = &h->s;
2754 const int mb_x= s->mb_x;
2755 const int mb_y= s->mb_y;
2756 const int mb_xy= mb_x + mb_y*s->mb_stride;
2757 const int mb_type= s->current_picture.mb_type[mb_xy];
2758 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2764 hl_decode_mb_complex(h);
2765 else hl_decode_mb_simple(h);
2768 static void pic_as_field(Picture *pic, const int parity){
2770 for (i = 0; i < 4; ++i) {
2771 if (parity == PICT_BOTTOM_FIELD)
2772 pic->data[i] += pic->linesize[i];
2773 pic->reference = parity;
2774 pic->linesize[i] *= 2;
2778 static int split_field_copy(Picture *dest, Picture *src,
2779 int parity, int id_add){
2780 int match = !!(src->reference & parity);
2784 pic_as_field(dest, parity);
2786 dest->pic_id += id_add;
2793 * Split one reference list into field parts, interleaving by parity
2794 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2795 * set to look at the actual start of data for that field.
2797 * @param dest output list
2798 * @param dest_len maximum number of fields to put in dest
2799 * @param src the source reference list containing fields and/or field pairs
2800 * (aka short_ref/long_ref, or
2801 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2802 * @param src_len number of Picture's in source (pairs and unmatched fields)
2803 * @param parity the parity of the picture being decoded/needing
2804 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2805 * @return number of fields placed in dest
2807 static int split_field_half_ref_list(Picture *dest, int dest_len,
2808 Picture *src, int src_len, int parity){
2809 int same_parity = 1;
2815 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2816 if (same_parity && same_i < src_len) {
2817 field_output = split_field_copy(dest + out_i, src + same_i,
2819 same_parity = !field_output;
2822 } else if (opp_i < src_len) {
2823 field_output = split_field_copy(dest + out_i, src + opp_i,
2824 PICT_FRAME - parity, 0);
2825 same_parity = field_output;
2837 * Split the reference frame list into a reference field list.
2838 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2839 * The input list contains both reference field pairs and
2840 * unmatched reference fields; it is ordered as spec describes
2841 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2842 * unmatched field pairs are also present. Conceptually this is equivalent
2843 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2845 * @param dest output reference list where ordered fields are to be placed
2846 * @param dest_len max number of fields to place at dest
2847 * @param src source reference list, as described above
2848 * @param src_len number of pictures (pairs and unmatched fields) in src
2849 * @param parity parity of field being currently decoded
2850 * (one of PICT_{TOP,BOTTOM}_FIELD)
2851 * @param long_i index into src array that holds first long reference picture,
2852 * or src_len if no long refs present.
2854 static int split_field_ref_list(Picture *dest, int dest_len,
2855 Picture *src, int src_len,
2856 int parity, int long_i){
2858 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2862 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2863 src_len - long_i, parity);
2868 * fills the default_ref_list.
2870 static int fill_default_ref_list(H264Context *h){
2871 MpegEncContext * const s = &h->s;
2873 int smallest_poc_greater_than_current = -1;
2875 Picture sorted_short_ref[32];
2876 Picture field_entry_list[2][32];
2877 Picture *frame_list[2];
2879 if (FIELD_PICTURE) {
2880 structure_sel = PICT_FRAME;
2881 frame_list[0] = field_entry_list[0];
2882 frame_list[1] = field_entry_list[1];
2885 frame_list[0] = h->default_ref_list[0];
2886 frame_list[1] = h->default_ref_list[1];
2889 if(h->slice_type==B_TYPE){
2896 /* sort frame according to poc in B slice */
2897 for(out_i=0; out_i<h->short_ref_count; out_i++){
2899 int best_poc=INT_MAX;
2901 for(i=0; i<h->short_ref_count; i++){
2902 const int poc= h->short_ref[i]->poc;
2903 if(poc > limit && poc < best_poc){
2909 assert(best_i != INT_MIN);
2912 sorted_short_ref[out_i]= *h->short_ref[best_i];
2913 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2914 if (-1 == smallest_poc_greater_than_current) {
2915 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2916 smallest_poc_greater_than_current = out_i;
2921 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2923 // find the largest poc
2924 for(list=0; list<2; list++){
2927 int step= list ? -1 : 1;
2929 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2931 while(j<0 || j>= h->short_ref_count){
2932 if(j != -99 && step == (list ? -1 : 1))
2935 j= smallest_poc_greater_than_current + (step>>1);
2937 sel = sorted_short_ref[j].reference | structure_sel;
2938 if(sel != PICT_FRAME) continue;
2939 frame_list[list][index ]= sorted_short_ref[j];
2940 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2942 short_len[list] = index;
2944 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2946 if(h->long_ref[i] == NULL) continue;
2947 sel = h->long_ref[i]->reference | structure_sel;
2948 if(sel != PICT_FRAME) continue;
2950 frame_list[ list ][index ]= *h->long_ref[i];
2951 frame_list[ list ][index++].pic_id= i;;
2955 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
2956 // swap the two first elements of L1 when
2957 // L0 and L1 are identical
2958 Picture temp= frame_list[1][0];
2959 frame_list[1][0] = frame_list[1][1];
2960 frame_list[1][1] = temp;
2965 for(list=0; list<2; list++){
2967 len[list] = split_field_ref_list(h->default_ref_list[list],
2971 s->picture_structure,
2974 if(len[list] < h->ref_count[ list ])
2975 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2982 for(i=0; i<h->short_ref_count; i++){
2984 sel = h->short_ref[i]->reference | structure_sel;
2985 if(sel != PICT_FRAME) continue;
2986 frame_list[0][index ]= *h->short_ref[i];
2987 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2990 for(i = 0; i < 16; i++){
2992 if(h->long_ref[i] == NULL) continue;
2993 sel = h->long_ref[i]->reference | structure_sel;
2994 if(sel != PICT_FRAME) continue;
2995 frame_list[0][index ]= *h->long_ref[i];
2996 frame_list[0][index++].pic_id= i;;
3000 index = split_field_ref_list(h->default_ref_list[0],
3001 h->ref_count[0], frame_list[0],
3002 index, s->picture_structure,
3005 if(index < h->ref_count[0])
3006 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3009 for (i=0; i<h->ref_count[0]; i++) {
3010 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3012 if(h->slice_type==B_TYPE){
3013 for (i=0; i<h->ref_count[1]; i++) {
3014 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3021 static void print_short_term(H264Context *h);
3022 static void print_long_term(H264Context *h);
3025 * Extract structure information about the picture described by pic_num in
3026 * the current decoding context (frame or field). Note that pic_num is
3027 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3028 * @param pic_num picture number for which to extract structure information
3029 * @param structure one of PICT_XXX describing structure of picture
3031 * @return frame number (short term) or long term index of picture
3032 * described by pic_num
3034 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3035 MpegEncContext * const s = &h->s;
3037 *structure = s->picture_structure;
3040 /* opposite field */
3041 *structure ^= PICT_FRAME;
3048 static int decode_ref_pic_list_reordering(H264Context *h){
3049 MpegEncContext * const s = &h->s;
3050 int list, index, pic_structure;
3052 print_short_term(h);
3054 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3056 for(list=0; list<h->list_count; list++){
3057 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3059 if(get_bits1(&s->gb)){
3060 int pred= h->curr_pic_num;
3062 for(index=0; ; index++){
3063 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3064 unsigned int pic_id;
3066 Picture *ref = NULL;
3068 if(reordering_of_pic_nums_idc==3)
3071 if(index >= h->ref_count[list]){
3072 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3076 if(reordering_of_pic_nums_idc<3){
3077 if(reordering_of_pic_nums_idc<2){
3078 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3081 if(abs_diff_pic_num > h->max_pic_num){
3082 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3086 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3087 else pred+= abs_diff_pic_num;
3088 pred &= h->max_pic_num - 1;
3090 frame_num = pic_num_extract(h, pred, &pic_structure);
3092 for(i= h->short_ref_count-1; i>=0; i--){
3093 ref = h->short_ref[i];
3094 assert(ref->reference);
3095 assert(!ref->long_ref);
3096 if(ref->data[0] != NULL &&
3097 ref->frame_num == frame_num &&
3098 (ref->reference & pic_structure) &&
3099 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3106 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3108 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3111 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3114 ref = h->long_ref[long_idx];
3115 assert(!(ref && !ref->reference));
3116 if(ref && (ref->reference & pic_structure)){
3117 ref->pic_id= pic_id;
3118 assert(ref->long_ref);
3126 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3127 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3129 for(i=index; i+1<h->ref_count[list]; i++){
3130 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3133 for(; i > index; i--){
3134 h->ref_list[list][i]= h->ref_list[list][i-1];
3136 h->ref_list[list][index]= *ref;
3138 pic_as_field(&h->ref_list[list][index], pic_structure);
3142 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3148 for(list=0; list<h->list_count; list++){
3149 for(index= 0; index < h->ref_count[list]; index++){
3150 if(!h->ref_list[list][index].data[0])
3151 h->ref_list[list][index]= s->current_picture;
3155 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3156 direct_dist_scale_factor(h);
3157 direct_ref_list_init(h);
3161 static void fill_mbaff_ref_list(H264Context *h){
3163 for(list=0; list<2; list++){ //FIXME try list_count
3164 for(i=0; i<h->ref_count[list]; i++){
3165 Picture *frame = &h->ref_list[list][i];
3166 Picture *field = &h->ref_list[list][16+2*i];
3169 field[0].linesize[j] <<= 1;
3170 field[0].reference = PICT_TOP_FIELD;
3171 field[1] = field[0];
3173 field[1].data[j] += frame->linesize[j];
3174 field[1].reference = PICT_BOTTOM_FIELD;
3176 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3177 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3179 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3180 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3184 for(j=0; j<h->ref_count[1]; j++){
3185 for(i=0; i<h->ref_count[0]; i++)
3186 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3187 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3188 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3192 static int pred_weight_table(H264Context *h){
3193 MpegEncContext * const s = &h->s;
3195 int luma_def, chroma_def;
3198 h->use_weight_chroma= 0;
3199 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3200 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3201 luma_def = 1<<h->luma_log2_weight_denom;
3202 chroma_def = 1<<h->chroma_log2_weight_denom;
3204 for(list=0; list<2; list++){
3205 for(i=0; i<h->ref_count[list]; i++){
3206 int luma_weight_flag, chroma_weight_flag;
3208 luma_weight_flag= get_bits1(&s->gb);
3209 if(luma_weight_flag){
3210 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3211 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3212 if( h->luma_weight[list][i] != luma_def
3213 || h->luma_offset[list][i] != 0)
3216 h->luma_weight[list][i]= luma_def;
3217 h->luma_offset[list][i]= 0;
3220 chroma_weight_flag= get_bits1(&s->gb);
3221 if(chroma_weight_flag){
3224 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3225 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3226 if( h->chroma_weight[list][i][j] != chroma_def
3227 || h->chroma_offset[list][i][j] != 0)
3228 h->use_weight_chroma= 1;
3233 h->chroma_weight[list][i][j]= chroma_def;
3234 h->chroma_offset[list][i][j]= 0;
3238 if(h->slice_type != B_TYPE) break;
3240 h->use_weight= h->use_weight || h->use_weight_chroma;
3244 static void implicit_weight_table(H264Context *h){
3245 MpegEncContext * const s = &h->s;
3247 int cur_poc = s->current_picture_ptr->poc;
3249 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3250 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3252 h->use_weight_chroma= 0;
3257 h->use_weight_chroma= 2;
3258 h->luma_log2_weight_denom= 5;
3259 h->chroma_log2_weight_denom= 5;
3261 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3262 int poc0 = h->ref_list[0][ref0].poc;
3263 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3264 int poc1 = h->ref_list[1][ref1].poc;
3265 int td = av_clip(poc1 - poc0, -128, 127);
3267 int tb = av_clip(cur_poc - poc0, -128, 127);
3268 int tx = (16384 + (FFABS(td) >> 1)) / td;
3269 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3270 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3271 h->implicit_weight[ref0][ref1] = 32;
3273 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3275 h->implicit_weight[ref0][ref1] = 32;
3281 * Mark a picture as no longer needed for reference. The refmask
3282 * argument allows unreferencing of individual fields or the whole frame.
3283 * If the picture becomes entirely unreferenced, but is being held for
3284 * display purposes, it is marked as such.
3285 * @param refmask mask of fields to unreference; the mask is bitwise
3286 * anded with the reference marking of pic
3287 * @return non-zero if pic becomes entirely unreferenced (except possibly
3288 * for display purposes) zero if one of the fields remains in
3291 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3293 if (pic->reference &= refmask) {
3296 if(pic == h->delayed_output_pic)
3297 pic->reference=DELAYED_PIC_REF;
3299 for(i = 0; h->delayed_pic[i]; i++)
3300 if(pic == h->delayed_pic[i]){
3301 pic->reference=DELAYED_PIC_REF;
3310 * instantaneous decoder refresh.
3312 static void idr(H264Context *h){
3315 for(i=0; i<16; i++){
3316 if (h->long_ref[i] != NULL) {
3317 unreference_pic(h, h->long_ref[i], 0);
3318 h->long_ref[i]= NULL;
3321 h->long_ref_count=0;
3323 for(i=0; i<h->short_ref_count; i++){
3324 unreference_pic(h, h->short_ref[i], 0);
3325 h->short_ref[i]= NULL;
3327 h->short_ref_count=0;
3330 /* forget old pics after a seek */
3331 static void flush_dpb(AVCodecContext *avctx){
3332 H264Context *h= avctx->priv_data;
3334 for(i=0; i<16; i++) {
3335 if(h->delayed_pic[i])
3336 h->delayed_pic[i]->reference= 0;
3337 h->delayed_pic[i]= NULL;
3339 if(h->delayed_output_pic)
3340 h->delayed_output_pic->reference= 0;
3341 h->delayed_output_pic= NULL;
3343 if(h->s.current_picture_ptr)
3344 h->s.current_picture_ptr->reference= 0;
3345 h->s.first_field= 0;
3346 ff_mpeg_flush(avctx);
3350 * Find a Picture in the short term reference list by frame number.
3351 * @param frame_num frame number to search for
3352 * @param idx the index into h->short_ref where returned picture is found
3353 * undefined if no picture found.
3354 * @return pointer to the found picture, or NULL if no pic with the provided
3355 * frame number is found
3357 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3358 MpegEncContext * const s = &h->s;
3361 for(i=0; i<h->short_ref_count; i++){
3362 Picture *pic= h->short_ref[i];
3363 if(s->avctx->debug&FF_DEBUG_MMCO)
3364 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3365 if(pic->frame_num == frame_num) {
3374 * Remove a picture from the short term reference list by its index in
3375 * that list. This does no checking on the provided index; it is assumed
3376 * to be valid. Other list entries are shifted down.
3377 * @param i index into h->short_ref of picture to remove.
3379 static void remove_short_at_index(H264Context *h, int i){
3380 assert(i > 0 && i < h->short_ref_count);
3381 h->short_ref[i]= NULL;
3382 if (--h->short_ref_count)
3383 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3388 * @return the removed picture or NULL if an error occurs
3390 static Picture * remove_short(H264Context *h, int frame_num){
3391 MpegEncContext * const s = &h->s;
3395 if(s->avctx->debug&FF_DEBUG_MMCO)
3396 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3398 pic = find_short(h, frame_num, &i);
3400 remove_short_at_index(h, i);
3406 * Remove a picture from the long term reference list by its index in
3407 * that list. This does no checking on the provided index; it is assumed
3408 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3409 * @param i index into h->long_ref of picture to remove.
3411 static void remove_long_at_index(H264Context *h, int i){
3412 h->long_ref[i]= NULL;
3413 h->long_ref_count--;
3418 * @return the removed picture or NULL if an error occurs
3420 static Picture * remove_long(H264Context *h, int i){
3423 pic= h->long_ref[i];
3425 remove_long_at_index(h, i);
3431 * print short term list
3433 static void print_short_term(H264Context *h) {
3435 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3436 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3437 for(i=0; i<h->short_ref_count; i++){
3438 Picture *pic= h->short_ref[i];
3439 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3445 * print long term list
3447 static void print_long_term(H264Context *h) {
3449 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3450 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3451 for(i = 0; i < 16; i++){
3452 Picture *pic= h->long_ref[i];
3454 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3461 * Executes the reference picture marking (memory management control operations).
3463 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3464 MpegEncContext * const s = &h->s;
3466 int current_ref_assigned=0;
3469 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3470 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3472 for(i=0; i<mmco_count; i++){
3473 int structure, frame_num, unref_pic;
3474 if(s->avctx->debug&FF_DEBUG_MMCO)
3475 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3477 switch(mmco[i].opcode){
3478 case MMCO_SHORT2UNUSED:
3479 if(s->avctx->debug&FF_DEBUG_MMCO)
3480 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3481 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3482 pic = find_short(h, frame_num, &j);
3484 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3485 remove_short_at_index(h, j);
3486 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3487 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3489 case MMCO_SHORT2LONG:
3490 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3491 h->long_ref[mmco[i].long_arg]->frame_num ==
3492 mmco[i].short_pic_num / 2) {
3493 /* do nothing, we've already moved this field pair. */
3495 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3497 pic= remove_long(h, mmco[i].long_arg);
3498 if(pic) unreference_pic(h, pic, 0);
3500 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3501 if (h->long_ref[ mmco[i].long_arg ]){
3502 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3503 h->long_ref_count++;
3507 case MMCO_LONG2UNUSED:
3508 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3509 pic = h->long_ref[j];
3511 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3512 remove_long_at_index(h, j);
3513 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3514 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3518 if (FIELD_PICTURE && !s->first_field) {
3519 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3520 /* Just mark second field as referenced */
3522 } else if (s->current_picture_ptr->reference) {
3523 /* First field in pair is in short term list or
3524 * at a different long term index.
3525 * This is not allowed; see 7.4.3, notes 2 and 3.
3526 * Report the problem and keep the pair where it is,
3527 * and mark this field valid.
3529 av_log(h->s.avctx, AV_LOG_ERROR,
3530 "illegal long term reference assignment for second "
3531 "field in complementary field pair (first field is "
3532 "short term or has non-matching long index)\n");
3538 pic= remove_long(h, mmco[i].long_arg);
3539 if(pic) unreference_pic(h, pic, 0);
3541 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3542 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3543 h->long_ref_count++;
3546 s->current_picture_ptr->reference |= s->picture_structure;
3547 current_ref_assigned=1;
3549 case MMCO_SET_MAX_LONG:
3550 assert(mmco[i].long_arg <= 16);
3551 // just remove the long term which index is greater than new max
3552 for(j = mmco[i].long_arg; j<16; j++){
3553 pic = remove_long(h, j);
3554 if (pic) unreference_pic(h, pic, 0);
3558 while(h->short_ref_count){
3559 pic= remove_short(h, h->short_ref[0]->frame_num);
3560 if(pic) unreference_pic(h, pic, 0);
3562 for(j = 0; j < 16; j++) {
3563 pic= remove_long(h, j);
3564 if(pic) unreference_pic(h, pic, 0);
3571 if (!current_ref_assigned && FIELD_PICTURE &&
3572 !s->first_field && s->current_picture_ptr->reference) {
3574 /* Second field of complementary field pair; the first field of
3575 * which is already referenced. If short referenced, it
3576 * should be first entry in short_ref. If not, it must exist
3577 * in long_ref; trying to put it on the short list here is an
3578 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3580 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3581 /* Just mark the second field valid */
3582 s->current_picture_ptr->reference = PICT_FRAME;
3583 } else if (s->current_picture_ptr->long_ref) {
3584 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3585 "assignment for second field "
3586 "in complementary field pair "
3587 "(first field is long term)\n");
3590 * First field in reference, but not in any sensible place on our
3591 * reference lists. This shouldn't happen unless reference
3592 * handling somewhere else is wrong.
3596 current_ref_assigned = 1;
3599 if(!current_ref_assigned){
3600 pic= remove_short(h, s->current_picture_ptr->frame_num);
3602 unreference_pic(h, pic, 0);
3603 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3606 if(h->short_ref_count)
3607 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3609 h->short_ref[0]= s->current_picture_ptr;
3610 h->short_ref[0]->long_ref=0;
3611 h->short_ref_count++;
3612 s->current_picture_ptr->reference |= s->picture_structure;
3615 print_short_term(h);
3620 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3621 MpegEncContext * const s = &h->s;
3624 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3625 s->broken_link= get_bits1(gb) -1;
3626 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3627 if(h->mmco[0].long_arg == -1)
3630 h->mmco[0].opcode= MMCO_LONG;
3634 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3635 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3636 MMCOOpcode opcode= get_ue_golomb(gb);
3638 h->mmco[i].opcode= opcode;
3639 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3640 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3641 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3642 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3646 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3647 unsigned int long_arg= get_ue_golomb(gb);
3648 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3649 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3652 h->mmco[i].long_arg= long_arg;
3655 if(opcode > (unsigned)MMCO_LONG){
3656 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3659 if(opcode == MMCO_END)
3664 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3666 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3667 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3668 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3669 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3671 if (FIELD_PICTURE) {
3672 h->mmco[0].short_pic_num *= 2;
3673 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3674 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3685 static int init_poc(H264Context *h){
3686 MpegEncContext * const s = &h->s;
3687 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3690 if(h->nal_unit_type == NAL_IDR_SLICE){
3691 h->frame_num_offset= 0;
3693 if(h->frame_num < h->prev_frame_num)
3694 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3696 h->frame_num_offset= h->prev_frame_num_offset;
3699 if(h->sps.poc_type==0){
3700 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3702 if(h->nal_unit_type == NAL_IDR_SLICE){
3707 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3708 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3709 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3710 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3712 h->poc_msb = h->prev_poc_msb;
3713 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3715 field_poc[1] = h->poc_msb + h->poc_lsb;
3716 if(s->picture_structure == PICT_FRAME)
3717 field_poc[1] += h->delta_poc_bottom;
3718 }else if(h->sps.poc_type==1){
3719 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3722 if(h->sps.poc_cycle_length != 0)
3723 abs_frame_num = h->frame_num_offset + h->frame_num;
3727 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3730 expected_delta_per_poc_cycle = 0;
3731 for(i=0; i < h->sps.poc_cycle_length; i++)
3732 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3734 if(abs_frame_num > 0){
3735 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3736 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3738 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3739 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3740 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3744 if(h->nal_ref_idc == 0)
3745 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3747 field_poc[0] = expectedpoc + h->delta_poc[0];
3748 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3750 if(s->picture_structure == PICT_FRAME)
3751 field_poc[1] += h->delta_poc[1];
3754 if(h->nal_unit_type == NAL_IDR_SLICE){
3757 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3758 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3764 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3765 s->current_picture_ptr->field_poc[0]= field_poc[0];
3766 s->current_picture_ptr->poc = field_poc[0];
3768 if(s->picture_structure != PICT_TOP_FIELD) {
3769 s->current_picture_ptr->field_poc[1]= field_poc[1];
3770 s->current_picture_ptr->poc = field_poc[1];
3772 if(!FIELD_PICTURE || !s->first_field) {
3773 Picture *cur = s->current_picture_ptr;
3774 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3782 * initialize scan tables
3784 static void init_scan_tables(H264Context *h){
3785 MpegEncContext * const s = &h->s;
3787 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3788 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3789 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3791 for(i=0; i<16; i++){
3792 #define T(x) (x>>2) | ((x<<2) & 0xF)
3793 h->zigzag_scan[i] = T(zigzag_scan[i]);
3794 h-> field_scan[i] = T( field_scan[i]);
3798 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3799 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3800 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3801 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3802 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3804 for(i=0; i<64; i++){
3805 #define T(x) (x>>3) | ((x&7)<<3)
3806 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3807 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3808 h->field_scan8x8[i] = T(field_scan8x8[i]);
3809 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3813 if(h->sps.transform_bypass){ //FIXME same ugly
3814 h->zigzag_scan_q0 = zigzag_scan;
3815 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3816 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3817 h->field_scan_q0 = field_scan;
3818 h->field_scan8x8_q0 = field_scan8x8;
3819 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3821 h->zigzag_scan_q0 = h->zigzag_scan;
3822 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3823 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3824 h->field_scan_q0 = h->field_scan;
3825 h->field_scan8x8_q0 = h->field_scan8x8;
3826 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3831 * Replicates H264 "master" context to thread contexts.
3833 static void clone_slice(H264Context *dst, H264Context *src)
3835 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3836 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3837 dst->s.current_picture = src->s.current_picture;
3838 dst->s.linesize = src->s.linesize;
3839 dst->s.uvlinesize = src->s.uvlinesize;
3840 dst->s.first_field = src->s.first_field;
3842 dst->prev_poc_msb = src->prev_poc_msb;
3843 dst->prev_poc_lsb = src->prev_poc_lsb;
3844 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3845 dst->prev_frame_num = src->prev_frame_num;
3846 dst->short_ref_count = src->short_ref_count;
3848 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3849 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3850 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3851 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3853 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3854 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3858 * decodes a slice header.
3859 * this will allso call MPV_common_init() and frame_start() as needed
3861 * @param h h264context
3862 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3864 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3866 static int decode_slice_header(H264Context *h, H264Context *h0){
3867 MpegEncContext * const s = &h->s;
3868 MpegEncContext * const s0 = &h0->s;
3869 unsigned int first_mb_in_slice;
3870 unsigned int pps_id;
3871 int num_ref_idx_active_override_flag;
3872 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3873 unsigned int slice_type, tmp, i;
3874 int default_ref_list_done = 0;
3875 int last_pic_structure;
3877 s->dropable= h->nal_ref_idc == 0;
3879 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3880 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3881 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3883 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3884 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3887 first_mb_in_slice= get_ue_golomb(&s->gb);
3889 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3890 h0->current_slice = 0;
3891 if (!s0->first_field)
3892 s->current_picture_ptr= NULL;
3895 slice_type= get_ue_golomb(&s->gb);
3897 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3902 h->slice_type_fixed=1;
3904 h->slice_type_fixed=0;
3906 slice_type= slice_type_map[ slice_type ];
3907 if (slice_type == I_TYPE
3908 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3909 default_ref_list_done = 1;
3911 h->slice_type= slice_type;
3913 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3915 pps_id= get_ue_golomb(&s->gb);
3916 if(pps_id>=MAX_PPS_COUNT){
3917 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3920 if(!h0->pps_buffers[pps_id]) {
3921 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3924 h->pps= *h0->pps_buffers[pps_id];
3926 if(!h0->sps_buffers[h->pps.sps_id]) {
3927 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3930 h->sps = *h0->sps_buffers[h->pps.sps_id];
3932 if(h == h0 && h->dequant_coeff_pps != pps_id){
3933 h->dequant_coeff_pps = pps_id;
3934 init_dequant_tables(h);
3937 s->mb_width= h->sps.mb_width;
3938 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3940 h->b_stride= s->mb_width*4;
3941 h->b8_stride= s->mb_width*2;
3943 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3944 if(h->sps.frame_mbs_only_flag)
3945 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3947 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3949 if (s->context_initialized
3950 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3952 return -1; // width / height changed during parallelized decoding
3956 if (!s->context_initialized) {
3958 return -1; // we cant (re-)initialize context during parallel decoding
3959 if (MPV_common_init(s) < 0)
3963 init_scan_tables(h);
3966 for(i = 1; i < s->avctx->thread_count; i++) {
3968 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3969 memcpy(c, h, sizeof(MpegEncContext));
3970 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3973 init_scan_tables(c);
3977 for(i = 0; i < s->avctx->thread_count; i++)
3978 if(context_init(h->thread_context[i]) < 0)
3981 s->avctx->width = s->width;
3982 s->avctx->height = s->height;
3983 s->avctx->sample_aspect_ratio= h->sps.sar;
3984 if(!s->avctx->sample_aspect_ratio.den)
3985 s->avctx->sample_aspect_ratio.den = 1;
3987 if(h->sps.timing_info_present_flag){
3988 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3989 if(h->x264_build > 0 && h->x264_build < 44)
3990 s->avctx->time_base.den *= 2;
3991 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3992 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3996 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3999 h->mb_aff_frame = 0;
4000 last_pic_structure = s0->picture_structure;
4001 if(h->sps.frame_mbs_only_flag){
4002 s->picture_structure= PICT_FRAME;
4004 if(get_bits1(&s->gb)) { //field_pic_flag
4005 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4007 s->picture_structure= PICT_FRAME;
4008 h->mb_aff_frame = h->sps.mb_aff;
4012 if(h0->current_slice == 0){
4013 /* See if we have a decoded first field looking for a pair... */
4014 if (s0->first_field) {
4015 assert(s0->current_picture_ptr);
4016 assert(s0->current_picture_ptr->data[0]);
4017 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4019 /* figure out if we have a complementary field pair */
4020 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4022 * Previous field is unmatched. Don't display it, but let it
4023 * remain for reference if marked as such.
4025 s0->current_picture_ptr = NULL;
4026 s0->first_field = FIELD_PICTURE;
4029 if (h->nal_ref_idc &&
4030 s0->current_picture_ptr->reference &&
4031 s0->current_picture_ptr->frame_num != h->frame_num) {
4033 * This and previous field were reference, but had
4034 * different frame_nums. Consider this field first in
4035 * pair. Throw away previous field except for reference
4038 s0->first_field = 1;
4039 s0->current_picture_ptr = NULL;
4042 /* Second field in complementary pair */
4043 s0->first_field = 0;
4048 /* Frame or first field in a potentially complementary pair */
4049 assert(!s0->current_picture_ptr);
4050 s0->first_field = FIELD_PICTURE;
4053 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4054 s0->first_field = 0;
4061 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4063 assert(s->mb_num == s->mb_width * s->mb_height);
4064 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4065 first_mb_in_slice >= s->mb_num){
4066 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4069 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4070 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4071 if (s->picture_structure == PICT_BOTTOM_FIELD)
4072 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4073 assert(s->mb_y < s->mb_height);
4075 if(s->picture_structure==PICT_FRAME){
4076 h->curr_pic_num= h->frame_num;
4077 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4079 h->curr_pic_num= 2*h->frame_num + 1;
4080 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4083 if(h->nal_unit_type == NAL_IDR_SLICE){
4084 get_ue_golomb(&s->gb); /* idr_pic_id */
4087 if(h->sps.poc_type==0){
4088 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4090 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4091 h->delta_poc_bottom= get_se_golomb(&s->gb);
4095 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4096 h->delta_poc[0]= get_se_golomb(&s->gb);
4098 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4099 h->delta_poc[1]= get_se_golomb(&s->gb);
4104 if(h->pps.redundant_pic_cnt_present){
4105 h->redundant_pic_count= get_ue_golomb(&s->gb);
4108 //set defaults, might be overriden a few line later
4109 h->ref_count[0]= h->pps.ref_count[0];
4110 h->ref_count[1]= h->pps.ref_count[1];
4112 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4113 if(h->slice_type == B_TYPE){
4114 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4115 if(FIELD_OR_MBAFF_PICTURE && h->direct_spatial_mv_pred)
4116 av_log(h->s.avctx, AV_LOG_ERROR, "Interlaced pictures + spatial direct mode is not implemented\n");
4118 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4120 if(num_ref_idx_active_override_flag){
4121 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4122 if(h->slice_type==B_TYPE)
4123 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4125 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4126 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4127 h->ref_count[0]= h->ref_count[1]= 1;
4131 if(h->slice_type == B_TYPE)
4138 if(!default_ref_list_done){
4139 fill_default_ref_list(h);
4142 if(decode_ref_pic_list_reordering(h) < 0)
4145 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4146 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4147 pred_weight_table(h);
4148 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4149 implicit_weight_table(h);
4154 decode_ref_pic_marking(h0, &s->gb);
4157 fill_mbaff_ref_list(h);
4159 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4160 tmp = get_ue_golomb(&s->gb);
4162 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4165 h->cabac_init_idc= tmp;
4168 h->last_qscale_diff = 0;
4169 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4171 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4175 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4176 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4177 //FIXME qscale / qp ... stuff
4178 if(h->slice_type == SP_TYPE){
4179 get_bits1(&s->gb); /* sp_for_switch_flag */
4181 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4182 get_se_golomb(&s->gb); /* slice_qs_delta */
4185 h->deblocking_filter = 1;
4186 h->slice_alpha_c0_offset = 0;
4187 h->slice_beta_offset = 0;
4188 if( h->pps.deblocking_filter_parameters_present ) {
4189 tmp= get_ue_golomb(&s->gb);
4191 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4194 h->deblocking_filter= tmp;
4195 if(h->deblocking_filter < 2)
4196 h->deblocking_filter^= 1; // 1<->0
4198 if( h->deblocking_filter ) {
4199 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4200 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4204 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4205 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4206 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4207 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4208 h->deblocking_filter= 0;
4210 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4211 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4212 /* Cheat slightly for speed:
4213 Dont bother to deblock across slices */
4214 h->deblocking_filter = 2;
4216 h0->max_contexts = 1;
4217 if(!h0->single_decode_warning) {
4218 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4219 h0->single_decode_warning = 1;
4222 return 1; // deblocking switched inside frame
4227 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4228 slice_group_change_cycle= get_bits(&s->gb, ?);
4231 h0->last_slice_type = slice_type;
4232 h->slice_num = ++h0->current_slice;
4234 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4235 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4237 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4238 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4240 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4242 av_get_pict_type_char(h->slice_type),
4243 pps_id, h->frame_num,
4244 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4245 h->ref_count[0], h->ref_count[1],
4247 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4249 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4259 static inline int get_level_prefix(GetBitContext *gb){
4263 OPEN_READER(re, gb);
4264 UPDATE_CACHE(re, gb);
4265 buf=GET_CACHE(re, gb);
4267 log= 32 - av_log2(buf);
4269 print_bin(buf>>(32-log), log);
4270 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4273 LAST_SKIP_BITS(re, gb, log);
4274 CLOSE_READER(re, gb);
4279 static inline int get_dct8x8_allowed(H264Context *h){
4282 if(!IS_SUB_8X8(h->sub_mb_type[i])
4283 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4290 * decodes a residual block.
4291 * @param n block index
4292 * @param scantable scantable
4293 * @param max_coeff number of coefficients in the block
4294 * @return <0 if an error occured
4296 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4297 MpegEncContext * const s = &h->s;
4298 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4300 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4302 //FIXME put trailing_onex into the context
4304 if(n == CHROMA_DC_BLOCK_INDEX){
4305 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4306 total_coeff= coeff_token>>2;
4308 if(n == LUMA_DC_BLOCK_INDEX){
4309 total_coeff= pred_non_zero_count(h, 0);
4310 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4311 total_coeff= coeff_token>>2;
4313 total_coeff= pred_non_zero_count(h, n);
4314 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4315 total_coeff= coeff_token>>2;
4316 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4320 //FIXME set last_non_zero?
4324 if(total_coeff > (unsigned)max_coeff) {
4325 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4329 trailing_ones= coeff_token&3;
4330 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4331 assert(total_coeff<=16);
4333 for(i=0; i<trailing_ones; i++){
4334 level[i]= 1 - 2*get_bits1(gb);
4338 int level_code, mask;
4339 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4340 int prefix= get_level_prefix(gb);
4342 //first coefficient has suffix_length equal to 0 or 1
4343 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4345 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4347 level_code= (prefix<<suffix_length); //part
4348 }else if(prefix==14){
4350 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4352 level_code= prefix + get_bits(gb, 4); //part
4353 }else if(prefix==15){
4354 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4355 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4357 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4361 if(trailing_ones < 3) level_code += 2;
4366 mask= -(level_code&1);
4367 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4370 //remaining coefficients have suffix_length > 0
4371 for(;i<total_coeff;i++) {
4372 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4373 prefix = get_level_prefix(gb);
4375 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4376 }else if(prefix==15){
4377 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4379 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4382 mask= -(level_code&1);
4383 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4384 if(level_code > suffix_limit[suffix_length])
4389 if(total_coeff == max_coeff)
4392 if(n == CHROMA_DC_BLOCK_INDEX)
4393 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4395 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4398 coeff_num = zeros_left + total_coeff - 1;
4399 j = scantable[coeff_num];
4401 block[j] = level[0];
4402 for(i=1;i<total_coeff;i++) {
4405 else if(zeros_left < 7){
4406 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4408 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4410 zeros_left -= run_before;
4411 coeff_num -= 1 + run_before;
4412 j= scantable[ coeff_num ];
4417 block[j] = (level[0] * qmul[j] + 32)>>6;
4418 for(i=1;i<total_coeff;i++) {
4421 else if(zeros_left < 7){
4422 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4424 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4426 zeros_left -= run_before;
4427 coeff_num -= 1 + run_before;
4428 j= scantable[ coeff_num ];
4430 block[j]= (level[i] * qmul[j] + 32)>>6;
4435 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4442 static void predict_field_decoding_flag(H264Context *h){
4443 MpegEncContext * const s = &h->s;
4444 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4445 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4446 ? s->current_picture.mb_type[mb_xy-1]
4447 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4448 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4450 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4454 * decodes a P_SKIP or B_SKIP macroblock
4456 static void decode_mb_skip(H264Context *h){
4457 MpegEncContext * const s = &h->s;
4458 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4461 memset(h->non_zero_count[mb_xy], 0, 16);
4462 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4465 mb_type|= MB_TYPE_INTERLACED;
4467 if( h->slice_type == B_TYPE )
4469 // just for fill_caches. pred_direct_motion will set the real mb_type
4470 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4472 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4473 pred_direct_motion(h, &mb_type);
4474 mb_type|= MB_TYPE_SKIP;
4479 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4481 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4482 pred_pskip_motion(h, &mx, &my);
4483 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4484 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4487 write_back_motion(h, mb_type);
4488 s->current_picture.mb_type[mb_xy]= mb_type;
4489 s->current_picture.qscale_table[mb_xy]= s->qscale;
4490 h->slice_table[ mb_xy ]= h->slice_num;
4491 h->prev_mb_skipped= 1;
4495 * decodes a macroblock
4496 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4498 static int decode_mb_cavlc(H264Context *h){
4499 MpegEncContext * const s = &h->s;
4500 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4501 int partition_count;
4502 unsigned int mb_type, cbp;
4503 int dct8x8_allowed= h->pps.transform_8x8_mode;
4505 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4507 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4508 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4510 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4511 if(s->mb_skip_run==-1)
4512 s->mb_skip_run= get_ue_golomb(&s->gb);
4514 if (s->mb_skip_run--) {
4515 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4516 if(s->mb_skip_run==0)
4517 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4519 predict_field_decoding_flag(h);
4526 if( (s->mb_y&1) == 0 )
4527 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4529 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4531 h->prev_mb_skipped= 0;
4533 mb_type= get_ue_golomb(&s->gb);
4534 if(h->slice_type == B_TYPE){
4536 partition_count= b_mb_type_info[mb_type].partition_count;
4537 mb_type= b_mb_type_info[mb_type].type;
4540 goto decode_intra_mb;
4542 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4544 partition_count= p_mb_type_info[mb_type].partition_count;
4545 mb_type= p_mb_type_info[mb_type].type;
4548 goto decode_intra_mb;
4551 assert(h->slice_type == I_TYPE);
4554 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4558 cbp= i_mb_type_info[mb_type].cbp;
4559 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4560 mb_type= i_mb_type_info[mb_type].type;
4564 mb_type |= MB_TYPE_INTERLACED;
4566 h->slice_table[ mb_xy ]= h->slice_num;
4568 if(IS_INTRA_PCM(mb_type)){
4571 // We assume these blocks are very rare so we do not optimize it.
4572 align_get_bits(&s->gb);
4574 // The pixels are stored in the same order as levels in h->mb array.
4575 for(y=0; y<16; y++){
4576 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4577 for(x=0; x<16; x++){
4578 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4579 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4583 const int index= 256 + 4*(y&3) + 32*(y>>2);
4585 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4586 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4590 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4592 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4593 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4597 // In deblocking, the quantizer is 0
4598 s->current_picture.qscale_table[mb_xy]= 0;
4599 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4600 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4601 // All coeffs are present
4602 memset(h->non_zero_count[mb_xy], 16, 16);
4604 s->current_picture.mb_type[mb_xy]= mb_type;
4609 h->ref_count[0] <<= 1;
4610 h->ref_count[1] <<= 1;
4613 fill_caches(h, mb_type, 0);
4616 if(IS_INTRA(mb_type)){
4618 // init_top_left_availability(h);
4619 if(IS_INTRA4x4(mb_type)){
4622 if(dct8x8_allowed && get_bits1(&s->gb)){
4623 mb_type |= MB_TYPE_8x8DCT;
4627 // fill_intra4x4_pred_table(h);
4628 for(i=0; i<16; i+=di){
4629 int mode= pred_intra_mode(h, i);
4631 if(!get_bits1(&s->gb)){
4632 const int rem_mode= get_bits(&s->gb, 3);
4633 mode = rem_mode + (rem_mode >= mode);
4637 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4639 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4641 write_back_intra_pred_mode(h);
4642 if( check_intra4x4_pred_mode(h) < 0)
4645 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4646 if(h->intra16x16_pred_mode < 0)
4650 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4653 h->chroma_pred_mode= pred_mode;
4654 }else if(partition_count==4){
4655 int i, j, sub_partition_count[4], list, ref[2][4];
4657 if(h->slice_type == B_TYPE){
4659 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4660 if(h->sub_mb_type[i] >=13){
4661 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4664 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4665 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4667 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4668 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4669 pred_direct_motion(h, &mb_type);
4670 h->ref_cache[0][scan8[4]] =
4671 h->ref_cache[1][scan8[4]] =
4672 h->ref_cache[0][scan8[12]] =
4673 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4676 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4678 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4679 if(h->sub_mb_type[i] >=4){
4680 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4683 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4684 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4688 for(list=0; list<h->list_count; list++){
4689 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4691 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4692 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4693 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4695 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4707 dct8x8_allowed = get_dct8x8_allowed(h);
4709 for(list=0; list<h->list_count; list++){
4711 if(IS_DIRECT(h->sub_mb_type[i])) {
4712 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4715 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4716 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4718 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4719 const int sub_mb_type= h->sub_mb_type[i];
4720 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4721 for(j=0; j<sub_partition_count[i]; j++){
4723 const int index= 4*i + block_width*j;
4724 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4725 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4726 mx += get_se_golomb(&s->gb);
4727 my += get_se_golomb(&s->gb);
4728 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4730 if(IS_SUB_8X8(sub_mb_type)){
4732 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4734 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4735 }else if(IS_SUB_8X4(sub_mb_type)){
4736 mv_cache[ 1 ][0]= mx;
4737 mv_cache[ 1 ][1]= my;
4738 }else if(IS_SUB_4X8(sub_mb_type)){
4739 mv_cache[ 8 ][0]= mx;
4740 mv_cache[ 8 ][1]= my;
4742 mv_cache[ 0 ][0]= mx;
4743 mv_cache[ 0 ][1]= my;
4746 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4752 }else if(IS_DIRECT(mb_type)){
4753 pred_direct_motion(h, &mb_type);
4754 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4756 int list, mx, my, i;
4757 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4758 if(IS_16X16(mb_type)){
4759 for(list=0; list<h->list_count; list++){
4761 if(IS_DIR(mb_type, 0, list)){
4762 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4763 if(val >= h->ref_count[list]){
4764 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4768 val= LIST_NOT_USED&0xFF;
4769 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4771 for(list=0; list<h->list_count; list++){
4773 if(IS_DIR(mb_type, 0, list)){
4774 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4775 mx += get_se_golomb(&s->gb);
4776 my += get_se_golomb(&s->gb);
4777 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4779 val= pack16to32(mx,my);
4782 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4785 else if(IS_16X8(mb_type)){
4786 for(list=0; list<h->list_count; list++){
4789 if(IS_DIR(mb_type, i, list)){
4790 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4791 if(val >= h->ref_count[list]){
4792 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4796 val= LIST_NOT_USED&0xFF;
4797 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4800 for(list=0; list<h->list_count; list++){
4803 if(IS_DIR(mb_type, i, list)){
4804 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4805 mx += get_se_golomb(&s->gb);
4806 my += get_se_golomb(&s->gb);
4807 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4809 val= pack16to32(mx,my);
4812 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4816 assert(IS_8X16(mb_type));
4817 for(list=0; list<h->list_count; list++){
4820 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4821 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4822 if(val >= h->ref_count[list]){
4823 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4827 val= LIST_NOT_USED&0xFF;
4828 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4831 for(list=0; list<h->list_count; list++){
4834 if(IS_DIR(mb_type, i, list)){
4835 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4836 mx += get_se_golomb(&s->gb);
4837 my += get_se_golomb(&s->gb);
4838 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4840 val= pack16to32(mx,my);
4843 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4849 if(IS_INTER(mb_type))
4850 write_back_motion(h, mb_type);
4852 if(!IS_INTRA16x16(mb_type)){
4853 cbp= get_ue_golomb(&s->gb);
4855 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4859 if(IS_INTRA4x4(mb_type))
4860 cbp= golomb_to_intra4x4_cbp[cbp];
4862 cbp= golomb_to_inter_cbp[cbp];
4866 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4867 if(get_bits1(&s->gb))
4868 mb_type |= MB_TYPE_8x8DCT;
4870 s->current_picture.mb_type[mb_xy]= mb_type;
4872 if(cbp || IS_INTRA16x16(mb_type)){
4873 int i8x8, i4x4, chroma_idx;
4875 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4876 const uint8_t *scan, *scan8x8, *dc_scan;
4878 // fill_non_zero_count_cache(h);
4880 if(IS_INTERLACED(mb_type)){
4881 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4882 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4883 dc_scan= luma_dc_field_scan;
4885 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4886 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4887 dc_scan= luma_dc_zigzag_scan;
4890 dquant= get_se_golomb(&s->gb);
4892 if( dquant > 25 || dquant < -26 ){
4893 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4897 s->qscale += dquant;
4898 if(((unsigned)s->qscale) > 51){
4899 if(s->qscale<0) s->qscale+= 52;
4900 else s->qscale-= 52;
4903 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4904 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4905 if(IS_INTRA16x16(mb_type)){
4906 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4907 return -1; //FIXME continue if partitioned and other return -1 too
4910 assert((cbp&15) == 0 || (cbp&15) == 15);
4913 for(i8x8=0; i8x8<4; i8x8++){
4914 for(i4x4=0; i4x4<4; i4x4++){
4915 const int index= i4x4 + 4*i8x8;
4916 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4922 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4925 for(i8x8=0; i8x8<4; i8x8++){
4926 if(cbp & (1<<i8x8)){
4927 if(IS_8x8DCT(mb_type)){
4928 DCTELEM *buf = &h->mb[64*i8x8];
4930 for(i4x4=0; i4x4<4; i4x4++){
4931 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4932 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4935 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4936 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4938 for(i4x4=0; i4x4<4; i4x4++){
4939 const int index= i4x4 + 4*i8x8;
4941 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4947 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4948 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4954 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4955 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4961 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4962 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4963 for(i4x4=0; i4x4<4; i4x4++){
4964 const int index= 16 + 4*chroma_idx + i4x4;
4965 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4971 uint8_t * const nnz= &h->non_zero_count_cache[0];
4972 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4973 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4976 uint8_t * const nnz= &h->non_zero_count_cache[0];
4977 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4978 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4979 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4981 s->current_picture.qscale_table[mb_xy]= s->qscale;
4982 write_back_non_zero_count(h);
4985 h->ref_count[0] >>= 1;
4986 h->ref_count[1] >>= 1;
4992 static int decode_cabac_field_decoding_flag(H264Context *h) {
4993 MpegEncContext * const s = &h->s;
4994 const int mb_x = s->mb_x;
4995 const int mb_y = s->mb_y & ~1;
4996 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4997 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4999 unsigned int ctx = 0;
5001 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5004 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5008 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5011 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5012 uint8_t *state= &h->cabac_state[ctx_base];
5016 MpegEncContext * const s = &h->s;
5017 const int mba_xy = h->left_mb_xy[0];
5018 const int mbb_xy = h->top_mb_xy;
5020 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5022 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5024 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5025 return 0; /* I4x4 */
5028 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5029 return 0; /* I4x4 */
5032 if( get_cabac_terminate( &h->cabac ) )
5033 return 25; /* PCM */
5035 mb_type = 1; /* I16x16 */
5036 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5037 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5038 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5039 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5040 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5044 static int decode_cabac_mb_type( H264Context *h ) {
5045 MpegEncContext * const s = &h->s;
5047 if( h->slice_type == I_TYPE ) {
5048 return decode_cabac_intra_mb_type(h, 3, 1);
5049 } else if( h->slice_type == P_TYPE ) {
5050 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5052 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5053 /* P_L0_D16x16, P_8x8 */
5054 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5056 /* P_L0_D8x16, P_L0_D16x8 */
5057 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5060 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5062 } else if( h->slice_type == B_TYPE ) {
5063 const int mba_xy = h->left_mb_xy[0];
5064 const int mbb_xy = h->top_mb_xy;
5068 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5070 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5073 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5074 return 0; /* B_Direct_16x16 */
5076 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5077 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5080 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5081 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5082 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5083 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5085 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5086 else if( bits == 13 ) {
5087 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5088 } else if( bits == 14 )
5089 return 11; /* B_L1_L0_8x16 */
5090 else if( bits == 15 )
5091 return 22; /* B_8x8 */
5093 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5094 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5096 /* TODO SI/SP frames? */
5101 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5102 MpegEncContext * const s = &h->s;
5106 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5107 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5110 && h->slice_table[mba_xy] == h->slice_num
5111 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5112 mba_xy += s->mb_stride;
5114 mbb_xy = mb_xy - s->mb_stride;
5116 && h->slice_table[mbb_xy] == h->slice_num
5117 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5118 mbb_xy -= s->mb_stride;
5120 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5122 int mb_xy = mb_x + mb_y*s->mb_stride;
5124 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5127 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5129 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5132 if( h->slice_type == B_TYPE )
5134 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5137 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5140 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5143 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5144 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5145 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5147 if( mode >= pred_mode )
5153 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5154 const int mba_xy = h->left_mb_xy[0];
5155 const int mbb_xy = h->top_mb_xy;
5159 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5160 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5163 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5166 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5169 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5171 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5177 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5178 int cbp_b, cbp_a, ctx, cbp = 0;
5180 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5181 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5183 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5184 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5185 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5186 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5187 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5188 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5189 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5190 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5193 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5197 cbp_a = (h->left_cbp>>4)&0x03;
5198 cbp_b = (h-> top_cbp>>4)&0x03;
5201 if( cbp_a > 0 ) ctx++;
5202 if( cbp_b > 0 ) ctx += 2;
5203 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5207 if( cbp_a == 2 ) ctx++;
5208 if( cbp_b == 2 ) ctx += 2;
5209 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5211 static int decode_cabac_mb_dqp( H264Context *h) {
5215 if( h->last_qscale_diff != 0 )
5218 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5224 if(val > 102) //prevent infinite loop
5231 return -(val + 1)/2;
5233 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5234 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5236 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5238 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5242 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5244 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5245 return 0; /* B_Direct_8x8 */
5246 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5247 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5249 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5250 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5251 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5254 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5255 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5259 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5260 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5263 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5264 int refa = h->ref_cache[list][scan8[n] - 1];
5265 int refb = h->ref_cache[list][scan8[n] - 8];
5269 if( h->slice_type == B_TYPE) {
5270 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5272 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5281 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5287 if(ref >= 32 /*h->ref_list[list]*/){
5288 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5289 return 0; //FIXME we should return -1 and check the return everywhere
5295 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5296 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5297 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5298 int ctxbase = (l == 0) ? 40 : 47;
5303 else if( amvd > 32 )
5308 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5313 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5321 while( get_cabac_bypass( &h->cabac ) ) {
5325 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5330 if( get_cabac_bypass( &h->cabac ) )
5334 return get_cabac_bypass_sign( &h->cabac, -mvd );
5337 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5342 nza = h->left_cbp&0x100;
5343 nzb = h-> top_cbp&0x100;
5344 } else if( cat == 1 || cat == 2 ) {
5345 nza = h->non_zero_count_cache[scan8[idx] - 1];
5346 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5347 } else if( cat == 3 ) {
5348 nza = (h->left_cbp>>(6+idx))&0x01;
5349 nzb = (h-> top_cbp>>(6+idx))&0x01;
5352 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5353 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5362 return ctx + 4 * cat;
5365 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5366 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5367 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5368 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5369 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5372 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5373 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5374 static const int significant_coeff_flag_offset[2][6] = {
5375 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5376 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5378 static const int last_coeff_flag_offset[2][6] = {
5379 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5380 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5382 static const int coeff_abs_level_m1_offset[6] = {
5383 227+0, 227+10, 227+20, 227+30, 227+39, 426
5385 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5386 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5387 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5388 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5389 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5390 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5391 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5392 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5393 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5399 int coeff_count = 0;
5402 int abslevelgt1 = 0;
5404 uint8_t *significant_coeff_ctx_base;
5405 uint8_t *last_coeff_ctx_base;
5406 uint8_t *abs_level_m1_ctx_base;
5409 #define CABAC_ON_STACK
5411 #ifdef CABAC_ON_STACK
5414 cc.range = h->cabac.range;
5415 cc.low = h->cabac.low;
5416 cc.bytestream= h->cabac.bytestream;
5418 #define CC &h->cabac
5422 /* cat: 0-> DC 16x16 n = 0
5423 * 1-> AC 16x16 n = luma4x4idx
5424 * 2-> Luma4x4 n = luma4x4idx
5425 * 3-> DC Chroma n = iCbCr
5426 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5427 * 5-> Luma8x8 n = 4 * luma8x8idx
5430 /* read coded block flag */
5432 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5433 if( cat == 1 || cat == 2 )
5434 h->non_zero_count_cache[scan8[n]] = 0;
5436 h->non_zero_count_cache[scan8[16+n]] = 0;
5437 #ifdef CABAC_ON_STACK
5438 h->cabac.range = cc.range ;
5439 h->cabac.low = cc.low ;
5440 h->cabac.bytestream= cc.bytestream;
5446 significant_coeff_ctx_base = h->cabac_state
5447 + significant_coeff_flag_offset[MB_FIELD][cat];
5448 last_coeff_ctx_base = h->cabac_state
5449 + last_coeff_flag_offset[MB_FIELD][cat];
5450 abs_level_m1_ctx_base = h->cabac_state
5451 + coeff_abs_level_m1_offset[cat];
5454 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5455 for(last= 0; last < coefs; last++) { \
5456 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5457 if( get_cabac( CC, sig_ctx )) { \
5458 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5459 index[coeff_count++] = last; \
5460 if( get_cabac( CC, last_ctx ) ) { \
5466 if( last == max_coeff -1 ) {\
5467 index[coeff_count++] = last;\
5469 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5470 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5471 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5473 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5475 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5477 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5480 assert(coeff_count > 0);
5483 h->cbp_table[mb_xy] |= 0x100;
5484 else if( cat == 1 || cat == 2 )
5485 h->non_zero_count_cache[scan8[n]] = coeff_count;
5487 h->cbp_table[mb_xy] |= 0x40 << n;
5489 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5492 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5495 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5496 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5497 int j= scantable[index[coeff_count]];
5499 if( get_cabac( CC, ctx ) == 0 ) {
5501 block[j] = get_cabac_bypass_sign( CC, -1);
5503 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5509 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5510 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5514 if( coeff_abs >= 15 ) {
5516 while( get_cabac_bypass( CC ) ) {
5522 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5528 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5529 else block[j] = coeff_abs;
5531 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5532 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5538 #ifdef CABAC_ON_STACK
5539 h->cabac.range = cc.range ;
5540 h->cabac.low = cc.low ;
5541 h->cabac.bytestream= cc.bytestream;
5546 static inline void compute_mb_neighbors(H264Context *h)
5548 MpegEncContext * const s = &h->s;
5549 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5550 h->top_mb_xy = mb_xy - s->mb_stride;
5551 h->left_mb_xy[0] = mb_xy - 1;
5553 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5554 const int top_pair_xy = pair_xy - s->mb_stride;
5555 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5556 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5557 const int curr_mb_frame_flag = !MB_FIELD;
5558 const int bottom = (s->mb_y & 1);
5560 ? !curr_mb_frame_flag // bottom macroblock
5561 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5563 h->top_mb_xy -= s->mb_stride;
5565 if (left_mb_frame_flag != curr_mb_frame_flag) {
5566 h->left_mb_xy[0] = pair_xy - 1;
5568 } else if (FIELD_PICTURE) {
5569 h->top_mb_xy -= s->mb_stride;
5575 * decodes a macroblock
5576 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5578 static int decode_mb_cabac(H264Context *h) {
5579 MpegEncContext * const s = &h->s;
5580 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5581 int mb_type, partition_count, cbp = 0;
5582 int dct8x8_allowed= h->pps.transform_8x8_mode;
5584 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5586 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5587 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5589 /* a skipped mb needs the aff flag from the following mb */
5590 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5591 predict_field_decoding_flag(h);
5592 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5593 skip = h->next_mb_skipped;
5595 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5596 /* read skip flags */
5598 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5599 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5600 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5601 if(h->next_mb_skipped)
5602 predict_field_decoding_flag(h);
5604 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5609 h->cbp_table[mb_xy] = 0;
5610 h->chroma_pred_mode_table[mb_xy] = 0;
5611 h->last_qscale_diff = 0;
5618 if( (s->mb_y&1) == 0 )
5620 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5622 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5624 h->prev_mb_skipped = 0;
5626 compute_mb_neighbors(h);
5627 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5628 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5632 if( h->slice_type == B_TYPE ) {
5634 partition_count= b_mb_type_info[mb_type].partition_count;
5635 mb_type= b_mb_type_info[mb_type].type;
5638 goto decode_intra_mb;
5640 } else if( h->slice_type == P_TYPE ) {
5642 partition_count= p_mb_type_info[mb_type].partition_count;
5643 mb_type= p_mb_type_info[mb_type].type;
5646 goto decode_intra_mb;
5649 assert(h->slice_type == I_TYPE);
5651 partition_count = 0;
5652 cbp= i_mb_type_info[mb_type].cbp;
5653 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5654 mb_type= i_mb_type_info[mb_type].type;
5657 mb_type |= MB_TYPE_INTERLACED;
5659 h->slice_table[ mb_xy ]= h->slice_num;
5661 if(IS_INTRA_PCM(mb_type)) {
5665 // We assume these blocks are very rare so we do not optimize it.
5666 // FIXME The two following lines get the bitstream position in the cabac
5667 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5668 ptr= h->cabac.bytestream;
5669 if(h->cabac.low&0x1) ptr--;
5671 if(h->cabac.low&0x1FF) ptr--;
5674 // The pixels are stored in the same order as levels in h->mb array.
5675 for(y=0; y<16; y++){
5676 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5677 for(x=0; x<16; x++){
5678 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5679 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5683 const int index= 256 + 4*(y&3) + 32*(y>>2);
5685 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5686 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5690 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5692 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5693 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5697 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5699 // All blocks are present
5700 h->cbp_table[mb_xy] = 0x1ef;
5701 h->chroma_pred_mode_table[mb_xy] = 0;
5702 // In deblocking, the quantizer is 0
5703 s->current_picture.qscale_table[mb_xy]= 0;
5704 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5705 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5706 // All coeffs are present
5707 memset(h->non_zero_count[mb_xy], 16, 16);
5708 s->current_picture.mb_type[mb_xy]= mb_type;
5713 h->ref_count[0] <<= 1;
5714 h->ref_count[1] <<= 1;
5717 fill_caches(h, mb_type, 0);
5719 if( IS_INTRA( mb_type ) ) {
5721 if( IS_INTRA4x4( mb_type ) ) {
5722 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5723 mb_type |= MB_TYPE_8x8DCT;
5724 for( i = 0; i < 16; i+=4 ) {
5725 int pred = pred_intra_mode( h, i );
5726 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5727 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5730 for( i = 0; i < 16; i++ ) {
5731 int pred = pred_intra_mode( h, i );
5732 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5734 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5737 write_back_intra_pred_mode(h);
5738 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5740 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5741 if( h->intra16x16_pred_mode < 0 ) return -1;
5743 h->chroma_pred_mode_table[mb_xy] =
5744 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5746 pred_mode= check_intra_pred_mode( h, pred_mode );
5747 if( pred_mode < 0 ) return -1;
5748 h->chroma_pred_mode= pred_mode;
5749 } else if( partition_count == 4 ) {
5750 int i, j, sub_partition_count[4], list, ref[2][4];
5752 if( h->slice_type == B_TYPE ) {
5753 for( i = 0; i < 4; i++ ) {
5754 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5755 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5756 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5758 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5759 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5760 pred_direct_motion(h, &mb_type);
5761 h->ref_cache[0][scan8[4]] =
5762 h->ref_cache[1][scan8[4]] =
5763 h->ref_cache[0][scan8[12]] =
5764 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5765 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5766 for( i = 0; i < 4; i++ )
5767 if( IS_DIRECT(h->sub_mb_type[i]) )
5768 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5772 for( i = 0; i < 4; i++ ) {
5773 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5774 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5775 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5779 for( list = 0; list < h->list_count; list++ ) {
5780 for( i = 0; i < 4; i++ ) {
5781 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5782 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5783 if( h->ref_count[list] > 1 )
5784 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5790 h->ref_cache[list][ scan8[4*i]+1 ]=
5791 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5796 dct8x8_allowed = get_dct8x8_allowed(h);
5798 for(list=0; list<h->list_count; list++){
5800 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5801 if(IS_DIRECT(h->sub_mb_type[i])){
5802 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5806 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5807 const int sub_mb_type= h->sub_mb_type[i];
5808 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5809 for(j=0; j<sub_partition_count[i]; j++){
5812 const int index= 4*i + block_width*j;
5813 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5814 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5815 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5817 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5818 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5819 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5821 if(IS_SUB_8X8(sub_mb_type)){
5823 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5825 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5828 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5830 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5831 }else if(IS_SUB_8X4(sub_mb_type)){
5832 mv_cache[ 1 ][0]= mx;
5833 mv_cache[ 1 ][1]= my;
5835 mvd_cache[ 1 ][0]= mx - mpx;
5836 mvd_cache[ 1 ][1]= my - mpy;
5837 }else if(IS_SUB_4X8(sub_mb_type)){
5838 mv_cache[ 8 ][0]= mx;
5839 mv_cache[ 8 ][1]= my;
5841 mvd_cache[ 8 ][0]= mx - mpx;
5842 mvd_cache[ 8 ][1]= my - mpy;
5844 mv_cache[ 0 ][0]= mx;
5845 mv_cache[ 0 ][1]= my;
5847 mvd_cache[ 0 ][0]= mx - mpx;
5848 mvd_cache[ 0 ][1]= my - mpy;
5851 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5852 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5853 p[0] = p[1] = p[8] = p[9] = 0;
5854 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5858 } else if( IS_DIRECT(mb_type) ) {
5859 pred_direct_motion(h, &mb_type);
5860 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5861 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5862 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5864 int list, mx, my, i, mpx, mpy;
5865 if(IS_16X16(mb_type)){
5866 for(list=0; list<h->list_count; list++){
5867 if(IS_DIR(mb_type, 0, list)){
5868 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5869 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5871 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5873 for(list=0; list<h->list_count; list++){
5874 if(IS_DIR(mb_type, 0, list)){
5875 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5877 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5878 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5879 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5881 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5882 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5884 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5887 else if(IS_16X8(mb_type)){
5888 for(list=0; list<h->list_count; list++){
5890 if(IS_DIR(mb_type, i, list)){
5891 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5892 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5894 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5897 for(list=0; list<h->list_count; list++){
5899 if(IS_DIR(mb_type, i, list)){
5900 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5901 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5902 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5903 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5905 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5906 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5908 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5909 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5914 assert(IS_8X16(mb_type));
5915 for(list=0; list<h->list_count; list++){
5917 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5918 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5919 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5921 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5924 for(list=0; list<h->list_count; list++){
5926 if(IS_DIR(mb_type, i, list)){
5927 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5928 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5929 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5931 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5932 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5933 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5935 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5936 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5943 if( IS_INTER( mb_type ) ) {
5944 h->chroma_pred_mode_table[mb_xy] = 0;
5945 write_back_motion( h, mb_type );
5948 if( !IS_INTRA16x16( mb_type ) ) {
5949 cbp = decode_cabac_mb_cbp_luma( h );
5950 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5953 h->cbp_table[mb_xy] = h->cbp = cbp;
5955 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5956 if( decode_cabac_mb_transform_size( h ) )
5957 mb_type |= MB_TYPE_8x8DCT;
5959 s->current_picture.mb_type[mb_xy]= mb_type;
5961 if( cbp || IS_INTRA16x16( mb_type ) ) {
5962 const uint8_t *scan, *scan8x8, *dc_scan;
5963 const uint32_t *qmul;
5966 if(IS_INTERLACED(mb_type)){
5967 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5968 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5969 dc_scan= luma_dc_field_scan;
5971 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5972 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5973 dc_scan= luma_dc_zigzag_scan;
5976 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5977 if( dqp == INT_MIN ){
5978 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5982 if(((unsigned)s->qscale) > 51){
5983 if(s->qscale<0) s->qscale+= 52;
5984 else s->qscale-= 52;
5986 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5987 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5989 if( IS_INTRA16x16( mb_type ) ) {
5991 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5992 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5995 qmul = h->dequant4_coeff[0][s->qscale];
5996 for( i = 0; i < 16; i++ ) {
5997 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5998 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6001 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6005 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6006 if( cbp & (1<<i8x8) ) {
6007 if( IS_8x8DCT(mb_type) ) {
6008 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6009 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6011 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6012 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6013 const int index = 4*i8x8 + i4x4;
6014 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6016 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6017 //STOP_TIMER("decode_residual")
6021 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6022 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6029 for( c = 0; c < 2; c++ ) {
6030 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6031 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6037 for( c = 0; c < 2; c++ ) {
6038 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6039 for( i = 0; i < 4; i++ ) {
6040 const int index = 16 + 4 * c + i;
6041 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6042 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6046 uint8_t * const nnz= &h->non_zero_count_cache[0];
6047 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6048 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6051 uint8_t * const nnz= &h->non_zero_count_cache[0];
6052 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6053 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6054 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6055 h->last_qscale_diff = 0;
6058 s->current_picture.qscale_table[mb_xy]= s->qscale;
6059 write_back_non_zero_count(h);
6062 h->ref_count[0] >>= 1;
6063 h->ref_count[1] >>= 1;
6070 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6072 const int index_a = qp + h->slice_alpha_c0_offset;
6073 const int alpha = (alpha_table+52)[index_a];
6074 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6079 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6080 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6082 /* 16px edge length, because bS=4 is triggered by being at
6083 * the edge of an intra MB, so all 4 bS are the same */
6084 for( d = 0; d < 16; d++ ) {
6085 const int p0 = pix[-1];
6086 const int p1 = pix[-2];
6087 const int p2 = pix[-3];
6089 const int q0 = pix[0];
6090 const int q1 = pix[1];
6091 const int q2 = pix[2];
6093 if( FFABS( p0 - q0 ) < alpha &&
6094 FFABS( p1 - p0 ) < beta &&
6095 FFABS( q1 - q0 ) < beta ) {
6097 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6098 if( FFABS( p2 - p0 ) < beta)
6100 const int p3 = pix[-4];
6102 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6103 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6104 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6107 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6109 if( FFABS( q2 - q0 ) < beta)
6111 const int q3 = pix[3];
6113 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6114 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6115 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6118 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6122 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6123 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6125 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6131 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6133 const int index_a = qp + h->slice_alpha_c0_offset;
6134 const int alpha = (alpha_table+52)[index_a];
6135 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6140 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6141 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6143 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6147 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6149 for( i = 0; i < 16; i++, pix += stride) {
6155 int bS_index = (i >> 1);
6158 bS_index |= (i & 1);
6161 if( bS[bS_index] == 0 ) {
6165 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6166 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6167 alpha = (alpha_table+52)[index_a];
6168 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6170 if( bS[bS_index] < 4 ) {
6171 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6172 const int p0 = pix[-1];
6173 const int p1 = pix[-2];
6174 const int p2 = pix[-3];
6175 const int q0 = pix[0];
6176 const int q1 = pix[1];
6177 const int q2 = pix[2];
6179 if( FFABS( p0 - q0 ) < alpha &&
6180 FFABS( p1 - p0 ) < beta &&
6181 FFABS( q1 - q0 ) < beta ) {
6185 if( FFABS( p2 - p0 ) < beta ) {
6186 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6189 if( FFABS( q2 - q0 ) < beta ) {
6190 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6194 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6195 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6196 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6197 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6200 const int p0 = pix[-1];
6201 const int p1 = pix[-2];
6202 const int p2 = pix[-3];
6204 const int q0 = pix[0];
6205 const int q1 = pix[1];
6206 const int q2 = pix[2];
6208 if( FFABS( p0 - q0 ) < alpha &&
6209 FFABS( p1 - p0 ) < beta &&
6210 FFABS( q1 - q0 ) < beta ) {
6212 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6213 if( FFABS( p2 - p0 ) < beta)
6215 const int p3 = pix[-4];
6217 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6218 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6219 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6222 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6224 if( FFABS( q2 - q0 ) < beta)
6226 const int q3 = pix[3];
6228 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6229 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6230 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6233 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6237 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6238 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6240 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6245 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6247 for( i = 0; i < 8; i++, pix += stride) {
6255 if( bS[bS_index] == 0 ) {
6259 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6260 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6261 alpha = (alpha_table+52)[index_a];
6262 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6264 if( bS[bS_index] < 4 ) {
6265 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6266 const int p0 = pix[-1];
6267 const int p1 = pix[-2];
6268 const int q0 = pix[0];
6269 const int q1 = pix[1];
6271 if( FFABS( p0 - q0 ) < alpha &&
6272 FFABS( p1 - p0 ) < beta &&
6273 FFABS( q1 - q0 ) < beta ) {
6274 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6276 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6277 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6278 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6281 const int p0 = pix[-1];
6282 const int p1 = pix[-2];
6283 const int q0 = pix[0];
6284 const int q1 = pix[1];
6286 if( FFABS( p0 - q0 ) < alpha &&
6287 FFABS( p1 - p0 ) < beta &&
6288 FFABS( q1 - q0 ) < beta ) {
6290 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6291 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6292 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6298 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6300 const int index_a = qp + h->slice_alpha_c0_offset;
6301 const int alpha = (alpha_table+52)[index_a];
6302 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6303 const int pix_next = stride;
6308 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6309 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6311 /* 16px edge length, see filter_mb_edgev */
6312 for( d = 0; d < 16; d++ ) {
6313 const int p0 = pix[-1*pix_next];
6314 const int p1 = pix[-2*pix_next];
6315 const int p2 = pix[-3*pix_next];
6316 const int q0 = pix[0];
6317 const int q1 = pix[1*pix_next];
6318 const int q2 = pix[2*pix_next];
6320 if( FFABS( p0 - q0 ) < alpha &&
6321 FFABS( p1 - p0 ) < beta &&
6322 FFABS( q1 - q0 ) < beta ) {
6324 const int p3 = pix[-4*pix_next];
6325 const int q3 = pix[ 3*pix_next];
6327 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6328 if( FFABS( p2 - p0 ) < beta) {
6330 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6331 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6332 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6335 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6337 if( FFABS( q2 - q0 ) < beta) {
6339 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6340 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6341 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6344 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6348 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6349 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6351 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6358 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6360 const int index_a = qp + h->slice_alpha_c0_offset;
6361 const int alpha = (alpha_table+52)[index_a];
6362 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6367 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6368 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6370 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6374 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6375 MpegEncContext * const s = &h->s;
6376 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6378 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6380 mb_xy = mb_x + mb_y*s->mb_stride;
6382 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6383 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6384 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6385 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6388 assert(!FRAME_MBAFF);
6390 mb_type = s->current_picture.mb_type[mb_xy];
6391 qp = s->current_picture.qscale_table[mb_xy];
6392 qp0 = s->current_picture.qscale_table[mb_xy-1];
6393 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6394 qpc = get_chroma_qp( h, 0, qp );
6395 qpc0 = get_chroma_qp( h, 0, qp0 );
6396 qpc1 = get_chroma_qp( h, 0, qp1 );
6397 qp0 = (qp + qp0 + 1) >> 1;
6398 qp1 = (qp + qp1 + 1) >> 1;
6399 qpc0 = (qpc + qpc0 + 1) >> 1;
6400 qpc1 = (qpc + qpc1 + 1) >> 1;
6401 qp_thresh = 15 - h->slice_alpha_c0_offset;
6402 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6403 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6406 if( IS_INTRA(mb_type) ) {
6407 int16_t bS4[4] = {4,4,4,4};
6408 int16_t bS3[4] = {3,3,3,3};
6409 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6410 if( IS_8x8DCT(mb_type) ) {
6411 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6412 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6413 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6414 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6416 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6417 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6418 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6419 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6420 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6421 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6422 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6423 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6425 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6426 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6427 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6428 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6429 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6430 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6431 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6432 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6435 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6436 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6438 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6440 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6442 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6443 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6444 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6445 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6447 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6448 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6449 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6450 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6452 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6453 bSv[0][0] = 0x0004000400040004ULL;
6454 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6455 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6457 #define FILTER(hv,dir,edge)\
6458 if(bSv[dir][edge]) {\
6459 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6461 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6462 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6468 } else if( IS_8x8DCT(mb_type) ) {
6487 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6488 MpegEncContext * const s = &h->s;
6489 const int mb_xy= mb_x + mb_y*s->mb_stride;
6490 const int mb_type = s->current_picture.mb_type[mb_xy];
6491 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6492 int first_vertical_edge_done = 0;
6494 /* FIXME: A given frame may occupy more than one position in
6495 * the reference list. So ref2frm should be populated with
6496 * frame numbers, not indices. */
6497 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6498 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6500 //for sufficiently low qp, filtering wouldn't do anything
6501 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6503 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6504 int qp = s->current_picture.qscale_table[mb_xy];
6506 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6507 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6513 // left mb is in picture
6514 && h->slice_table[mb_xy-1] != 255
6515 // and current and left pair do not have the same interlaced type
6516 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6517 // and left mb is in the same slice if deblocking_filter == 2
6518 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6519 /* First vertical edge is different in MBAFF frames
6520 * There are 8 different bS to compute and 2 different Qp
6522 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6523 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6528 int mb_qp, mbn0_qp, mbn1_qp;
6530 first_vertical_edge_done = 1;
6532 if( IS_INTRA(mb_type) )
6533 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6535 for( i = 0; i < 8; i++ ) {
6536 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6538 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6540 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6541 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6542 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6549 mb_qp = s->current_picture.qscale_table[mb_xy];
6550 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6551 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6552 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6553 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6554 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6555 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6556 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6557 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6558 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6559 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6560 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6561 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6564 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6565 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6566 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6567 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6568 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6570 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6571 for( dir = 0; dir < 2; dir++ )
6574 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6575 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6576 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6578 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6579 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6580 // how often to recheck mv-based bS when iterating between edges
6581 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6582 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6583 // how often to recheck mv-based bS when iterating along each edge
6584 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6586 if (first_vertical_edge_done) {
6588 first_vertical_edge_done = 0;
6591 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6594 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6595 && !IS_INTERLACED(mb_type)
6596 && IS_INTERLACED(mbm_type)
6598 // This is a special case in the norm where the filtering must
6599 // be done twice (one each of the field) even if we are in a
6600 // frame macroblock.
6602 static const int nnz_idx[4] = {4,5,6,3};
6603 unsigned int tmp_linesize = 2 * linesize;
6604 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6605 int mbn_xy = mb_xy - 2 * s->mb_stride;
6610 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6611 if( IS_INTRA(mb_type) ||
6612 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6613 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6615 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6616 for( i = 0; i < 4; i++ ) {
6617 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6618 mbn_nnz[nnz_idx[i]] != 0 )
6624 // Do not use s->qscale as luma quantizer because it has not the same
6625 // value in IPCM macroblocks.
6626 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6627 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6628 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6629 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6630 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6631 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6632 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6633 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6640 for( edge = start; edge < edges; edge++ ) {
6641 /* mbn_xy: neighbor macroblock */
6642 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6643 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6647 if( (edge&1) && IS_8x8DCT(mb_type) )
6650 if( IS_INTRA(mb_type) ||
6651 IS_INTRA(mbn_type) ) {
6654 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6655 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6664 bS[0] = bS[1] = bS[2] = bS[3] = value;
6669 if( edge & mask_edge ) {
6670 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6673 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6674 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6677 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6678 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6679 int bn_idx= b_idx - (dir ? 8:1);
6681 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6682 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6683 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6684 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6686 bS[0] = bS[1] = bS[2] = bS[3] = v;
6692 for( i = 0; i < 4; i++ ) {
6693 int x = dir == 0 ? edge : i;
6694 int y = dir == 0 ? i : edge;
6695 int b_idx= 8 + 4 + x + 8*y;
6696 int bn_idx= b_idx - (dir ? 8:1);
6698 if( h->non_zero_count_cache[b_idx] != 0 ||
6699 h->non_zero_count_cache[bn_idx] != 0 ) {
6705 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6706 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6707 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6708 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6716 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6721 // Do not use s->qscale as luma quantizer because it has not the same
6722 // value in IPCM macroblocks.
6723 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6724 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6725 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6726 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6728 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6729 if( (edge&1) == 0 ) {
6730 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6731 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6732 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6733 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6736 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6737 if( (edge&1) == 0 ) {
6738 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6739 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6740 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6741 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6748 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6749 MpegEncContext * const s = &h->s;
6750 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6754 if( h->pps.cabac ) {
6758 align_get_bits( &s->gb );
6761 ff_init_cabac_states( &h->cabac);
6762 ff_init_cabac_decoder( &h->cabac,
6763 s->gb.buffer + get_bits_count(&s->gb)/8,
6764 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6765 /* calculate pre-state */
6766 for( i= 0; i < 460; i++ ) {
6768 if( h->slice_type == I_TYPE )
6769 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6771 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6774 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6776 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6781 int ret = decode_mb_cabac(h);
6783 //STOP_TIMER("decode_mb_cabac")
6785 if(ret>=0) hl_decode_mb(h);
6787 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6790 if(ret>=0) ret = decode_mb_cabac(h);
6792 if(ret>=0) hl_decode_mb(h);
6795 eos = get_cabac_terminate( &h->cabac );
6797 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6798 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6799 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6803 if( ++s->mb_x >= s->mb_width ) {
6805 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6807 if(FIELD_OR_MBAFF_PICTURE) {
6812 if( eos || s->mb_y >= s->mb_height ) {
6813 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6814 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6821 int ret = decode_mb_cavlc(h);
6823 if(ret>=0) hl_decode_mb(h);
6825 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6827 ret = decode_mb_cavlc(h);
6829 if(ret>=0) hl_decode_mb(h);
6834 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6835 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6840 if(++s->mb_x >= s->mb_width){
6842 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6844 if(FIELD_OR_MBAFF_PICTURE) {
6847 if(s->mb_y >= s->mb_height){
6848 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6850 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6851 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6855 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6862 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6863 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6864 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6865 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6869 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6878 for(;s->mb_y < s->mb_height; s->mb_y++){
6879 for(;s->mb_x < s->mb_width; s->mb_x++){
6880 int ret= decode_mb(h);
6885 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6886 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6891 if(++s->mb_x >= s->mb_width){
6893 if(++s->mb_y >= s->mb_height){
6894 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6895 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6899 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6906 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6907 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6908 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6912 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6919 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6922 return -1; //not reached
6925 static int decode_unregistered_user_data(H264Context *h, int size){
6926 MpegEncContext * const s = &h->s;
6927 uint8_t user_data[16+256];
6933 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6934 user_data[i]= get_bits(&s->gb, 8);
6938 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6939 if(e==1 && build>=0)
6940 h->x264_build= build;
6942 if(s->avctx->debug & FF_DEBUG_BUGS)
6943 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6946 skip_bits(&s->gb, 8);
6951 static int decode_sei(H264Context *h){
6952 MpegEncContext * const s = &h->s;
6954 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6959 type+= show_bits(&s->gb, 8);
6960 }while(get_bits(&s->gb, 8) == 255);
6964 size+= show_bits(&s->gb, 8);
6965 }while(get_bits(&s->gb, 8) == 255);
6969 if(decode_unregistered_user_data(h, size) < 0)
6973 skip_bits(&s->gb, 8*size);
6976 //FIXME check bits here
6977 align_get_bits(&s->gb);
6983 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6984 MpegEncContext * const s = &h->s;
6986 cpb_count = get_ue_golomb(&s->gb) + 1;
6987 get_bits(&s->gb, 4); /* bit_rate_scale */
6988 get_bits(&s->gb, 4); /* cpb_size_scale */
6989 for(i=0; i<cpb_count; i++){
6990 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6991 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6992 get_bits1(&s->gb); /* cbr_flag */
6994 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6995 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6996 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6997 get_bits(&s->gb, 5); /* time_offset_length */
7000 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7001 MpegEncContext * const s = &h->s;
7002 int aspect_ratio_info_present_flag;
7003 unsigned int aspect_ratio_idc;
7004 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7006 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7008 if( aspect_ratio_info_present_flag ) {
7009 aspect_ratio_idc= get_bits(&s->gb, 8);
7010 if( aspect_ratio_idc == EXTENDED_SAR ) {
7011 sps->sar.num= get_bits(&s->gb, 16);
7012 sps->sar.den= get_bits(&s->gb, 16);
7013 }else if(aspect_ratio_idc < 14){
7014 sps->sar= pixel_aspect[aspect_ratio_idc];
7016 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7023 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7025 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7026 get_bits1(&s->gb); /* overscan_appropriate_flag */
7029 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7030 get_bits(&s->gb, 3); /* video_format */
7031 get_bits1(&s->gb); /* video_full_range_flag */
7032 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7033 get_bits(&s->gb, 8); /* colour_primaries */
7034 get_bits(&s->gb, 8); /* transfer_characteristics */
7035 get_bits(&s->gb, 8); /* matrix_coefficients */
7039 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7040 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7041 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7044 sps->timing_info_present_flag = get_bits1(&s->gb);
7045 if(sps->timing_info_present_flag){
7046 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7047 sps->time_scale = get_bits_long(&s->gb, 32);
7048 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7051 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7052 if(nal_hrd_parameters_present_flag)
7053 decode_hrd_parameters(h, sps);
7054 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7055 if(vcl_hrd_parameters_present_flag)
7056 decode_hrd_parameters(h, sps);
7057 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7058 get_bits1(&s->gb); /* low_delay_hrd_flag */
7059 get_bits1(&s->gb); /* pic_struct_present_flag */
7061 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7062 if(sps->bitstream_restriction_flag){
7063 unsigned int num_reorder_frames;
7064 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7065 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7066 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7067 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7068 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7069 num_reorder_frames= get_ue_golomb(&s->gb);
7070 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7072 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7073 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7077 sps->num_reorder_frames= num_reorder_frames;
7083 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7084 const uint8_t *jvt_list, const uint8_t *fallback_list){
7085 MpegEncContext * const s = &h->s;
7086 int i, last = 8, next = 8;
7087 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7088 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7089 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7091 for(i=0;i<size;i++){
7093 next = (last + get_se_golomb(&s->gb)) & 0xff;
7094 if(!i && !next){ /* matrix not written, we use the preset one */
7095 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7098 last = factors[scan[i]] = next ? next : last;
7102 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7103 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7104 MpegEncContext * const s = &h->s;
7105 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7106 const uint8_t *fallback[4] = {
7107 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7108 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7109 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7110 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7112 if(get_bits1(&s->gb)){
7113 sps->scaling_matrix_present |= is_sps;
7114 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7115 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7116 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7117 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7118 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7119 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7120 if(is_sps || pps->transform_8x8_mode){
7121 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7122 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7124 } else if(fallback_sps) {
7125 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7126 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7131 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7134 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7135 const size_t size, const char *name)
7138 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7143 vec[id] = av_mallocz(size);
7145 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7150 static inline int decode_seq_parameter_set(H264Context *h){
7151 MpegEncContext * const s = &h->s;
7152 int profile_idc, level_idc;
7153 unsigned int sps_id, tmp, mb_width, mb_height;
7157 profile_idc= get_bits(&s->gb, 8);
7158 get_bits1(&s->gb); //constraint_set0_flag
7159 get_bits1(&s->gb); //constraint_set1_flag
7160 get_bits1(&s->gb); //constraint_set2_flag
7161 get_bits1(&s->gb); //constraint_set3_flag
7162 get_bits(&s->gb, 4); // reserved
7163 level_idc= get_bits(&s->gb, 8);
7164 sps_id= get_ue_golomb(&s->gb);
7166 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7170 sps->profile_idc= profile_idc;
7171 sps->level_idc= level_idc;
7173 if(sps->profile_idc >= 100){ //high profile
7174 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7175 get_bits1(&s->gb); //residual_color_transform_flag
7176 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7177 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7178 sps->transform_bypass = get_bits1(&s->gb);
7179 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7181 sps->scaling_matrix_present = 0;
7183 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7184 sps->poc_type= get_ue_golomb(&s->gb);
7186 if(sps->poc_type == 0){ //FIXME #define
7187 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7188 } else if(sps->poc_type == 1){//FIXME #define
7189 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7190 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7191 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7192 tmp= get_ue_golomb(&s->gb);
7194 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7195 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7198 sps->poc_cycle_length= tmp;
7200 for(i=0; i<sps->poc_cycle_length; i++)
7201 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7202 }else if(sps->poc_type != 2){
7203 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7207 tmp= get_ue_golomb(&s->gb);
7208 if(tmp > MAX_PICTURE_COUNT-2){
7209 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7211 sps->ref_frame_count= tmp;
7212 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7213 mb_width= get_ue_golomb(&s->gb) + 1;
7214 mb_height= get_ue_golomb(&s->gb) + 1;
7215 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7216 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7217 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7220 sps->mb_width = mb_width;
7221 sps->mb_height= mb_height;
7223 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7224 if(!sps->frame_mbs_only_flag)
7225 sps->mb_aff= get_bits1(&s->gb);
7229 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7231 #ifndef ALLOW_INTERLACE
7233 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7235 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7236 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7238 sps->crop= get_bits1(&s->gb);
7240 sps->crop_left = get_ue_golomb(&s->gb);
7241 sps->crop_right = get_ue_golomb(&s->gb);
7242 sps->crop_top = get_ue_golomb(&s->gb);
7243 sps->crop_bottom= get_ue_golomb(&s->gb);
7244 if(sps->crop_left || sps->crop_top){
7245 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7251 sps->crop_bottom= 0;
7254 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7255 if( sps->vui_parameters_present_flag )
7256 decode_vui_parameters(h, sps);
7258 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7259 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7260 sps_id, sps->profile_idc, sps->level_idc,
7262 sps->ref_frame_count,
7263 sps->mb_width, sps->mb_height,
7264 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7265 sps->direct_8x8_inference_flag ? "8B8" : "",
7266 sps->crop_left, sps->crop_right,
7267 sps->crop_top, sps->crop_bottom,
7268 sps->vui_parameters_present_flag ? "VUI" : ""
7275 build_qp_table(PPS *pps, int t, int index)
7278 for(i = 0; i < 255; i++)
7279 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7282 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7283 MpegEncContext * const s = &h->s;
7284 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7287 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7291 tmp= get_ue_golomb(&s->gb);
7292 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7293 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7298 pps->cabac= get_bits1(&s->gb);
7299 pps->pic_order_present= get_bits1(&s->gb);
7300 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7301 if(pps->slice_group_count > 1 ){
7302 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7303 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7304 switch(pps->mb_slice_group_map_type){
7307 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7308 | run_length[ i ] |1 |ue(v) |
7313 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7315 | top_left_mb[ i ] |1 |ue(v) |
7316 | bottom_right_mb[ i ] |1 |ue(v) |
7324 | slice_group_change_direction_flag |1 |u(1) |
7325 | slice_group_change_rate_minus1 |1 |ue(v) |
7330 | slice_group_id_cnt_minus1 |1 |ue(v) |
7331 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7333 | slice_group_id[ i ] |1 |u(v) |
7338 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7339 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7340 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7341 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7342 pps->ref_count[0]= pps->ref_count[1]= 1;
7346 pps->weighted_pred= get_bits1(&s->gb);
7347 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7348 pps->init_qp= get_se_golomb(&s->gb) + 26;
7349 pps->init_qs= get_se_golomb(&s->gb) + 26;
7350 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7351 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7352 pps->constrained_intra_pred= get_bits1(&s->gb);
7353 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7355 pps->transform_8x8_mode= 0;
7356 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7357 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7358 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7360 if(get_bits_count(&s->gb) < bit_length){
7361 pps->transform_8x8_mode= get_bits1(&s->gb);
7362 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7363 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7365 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7368 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7369 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7370 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7371 h->pps.chroma_qp_diff= 1;
7373 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7375 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7376 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7377 pps_id, pps->sps_id,
7378 pps->cabac ? "CABAC" : "CAVLC",
7379 pps->slice_group_count,
7380 pps->ref_count[0], pps->ref_count[1],
7381 pps->weighted_pred ? "weighted" : "",
7382 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7383 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7384 pps->constrained_intra_pred ? "CONSTR" : "",
7385 pps->redundant_pic_cnt_present ? "REDU" : "",
7386 pps->transform_8x8_mode ? "8x8DCT" : ""
7394 * Call decode_slice() for each context.
7396 * @param h h264 master context
7397 * @param context_count number of contexts to execute
7399 static void execute_decode_slices(H264Context *h, int context_count){
7400 MpegEncContext * const s = &h->s;
7401 AVCodecContext * const avctx= s->avctx;
7405 if(context_count == 1) {
7406 decode_slice(avctx, h);
7408 for(i = 1; i < context_count; i++) {
7409 hx = h->thread_context[i];
7410 hx->s.error_resilience = avctx->error_resilience;
7411 hx->s.error_count = 0;
7414 avctx->execute(avctx, (void *)decode_slice,
7415 (void **)h->thread_context, NULL, context_count);
7417 /* pull back stuff from slices to master context */
7418 hx = h->thread_context[context_count - 1];
7419 s->mb_x = hx->s.mb_x;
7420 s->mb_y = hx->s.mb_y;
7421 s->dropable = hx->s.dropable;
7422 s->picture_structure = hx->s.picture_structure;
7423 for(i = 1; i < context_count; i++)
7424 h->s.error_count += h->thread_context[i]->s.error_count;
7429 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7430 MpegEncContext * const s = &h->s;
7431 AVCodecContext * const avctx= s->avctx;
7433 H264Context *hx; ///< thread context
7434 int context_count = 0;
7436 h->max_contexts = avctx->thread_count;
7439 for(i=0; i<50; i++){
7440 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7443 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7444 h->current_slice = 0;
7445 if (!s->first_field)
7446 s->current_picture_ptr= NULL;
7458 if(buf_index >= buf_size) break;
7460 for(i = 0; i < h->nal_length_size; i++)
7461 nalsize = (nalsize << 8) | buf[buf_index++];
7462 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7467 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7472 // start code prefix search
7473 for(; buf_index + 3 < buf_size; buf_index++){
7474 // This should always succeed in the first iteration.
7475 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7479 if(buf_index+3 >= buf_size) break;
7484 hx = h->thread_context[context_count];
7486 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7487 if (ptr==NULL || dst_length < 0){
7490 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7492 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7494 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7495 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7498 if (h->is_avc && (nalsize != consumed))
7499 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7501 buf_index += consumed;
7503 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7504 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7509 switch(hx->nal_unit_type){
7511 if (h->nal_unit_type != NAL_IDR_SLICE) {
7512 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7515 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7517 init_get_bits(&hx->s.gb, ptr, bit_length);
7519 hx->inter_gb_ptr= &hx->s.gb;
7520 hx->s.data_partitioning = 0;
7522 if((err = decode_slice_header(hx, h)))
7525 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7526 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7527 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7528 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7529 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7530 && avctx->skip_frame < AVDISCARD_ALL)
7534 init_get_bits(&hx->s.gb, ptr, bit_length);
7536 hx->inter_gb_ptr= NULL;
7537 hx->s.data_partitioning = 1;
7539 err = decode_slice_header(hx, h);
7542 init_get_bits(&hx->intra_gb, ptr, bit_length);
7543 hx->intra_gb_ptr= &hx->intra_gb;
7546 init_get_bits(&hx->inter_gb, ptr, bit_length);
7547 hx->inter_gb_ptr= &hx->inter_gb;
7549 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7550 && s->context_initialized
7552 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7553 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7554 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7555 && avctx->skip_frame < AVDISCARD_ALL)
7559 init_get_bits(&s->gb, ptr, bit_length);
7563 init_get_bits(&s->gb, ptr, bit_length);
7564 decode_seq_parameter_set(h);
7566 if(s->flags& CODEC_FLAG_LOW_DELAY)
7569 if(avctx->has_b_frames < 2)
7570 avctx->has_b_frames= !s->low_delay;
7573 init_get_bits(&s->gb, ptr, bit_length);
7575 decode_picture_parameter_set(h, bit_length);
7579 case NAL_END_SEQUENCE:
7580 case NAL_END_STREAM:
7581 case NAL_FILLER_DATA:
7583 case NAL_AUXILIARY_SLICE:
7586 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7589 if(context_count == h->max_contexts) {
7590 execute_decode_slices(h, context_count);
7595 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7597 /* Slice could not be decoded in parallel mode, copy down
7598 * NAL unit stuff to context 0 and restart. Note that
7599 * rbsp_buffer is not transfered, but since we no longer
7600 * run in parallel mode this should not be an issue. */
7601 h->nal_unit_type = hx->nal_unit_type;
7602 h->nal_ref_idc = hx->nal_ref_idc;
7608 execute_decode_slices(h, context_count);
7613 * returns the number of bytes consumed for building the current frame
7615 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7616 if(s->flags&CODEC_FLAG_TRUNCATED){
7617 pos -= s->parse_context.last_index;
7618 if(pos<0) pos=0; // FIXME remove (unneeded?)
7622 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7623 if(pos+10>buf_size) pos=buf_size; // oops ;)
7629 static int decode_frame(AVCodecContext *avctx,
7630 void *data, int *data_size,
7631 uint8_t *buf, int buf_size)
7633 H264Context *h = avctx->priv_data;
7634 MpegEncContext *s = &h->s;
7635 AVFrame *pict = data;
7638 s->flags= avctx->flags;
7639 s->flags2= avctx->flags2;
7641 /* no supplementary picture */
7642 if (buf_size == 0) {
7646 //FIXME factorize this with the output code below
7647 out = h->delayed_pic[0];
7649 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7650 if(h->delayed_pic[i]->poc < out->poc){
7651 out = h->delayed_pic[i];
7655 for(i=out_idx; h->delayed_pic[i]; i++)
7656 h->delayed_pic[i] = h->delayed_pic[i+1];
7659 *data_size = sizeof(AVFrame);
7660 *pict= *(AVFrame*)out;
7666 if(s->flags&CODEC_FLAG_TRUNCATED){
7667 int next= ff_h264_find_frame_end(h, buf, buf_size);
7669 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7671 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7674 if(h->is_avc && !h->got_avcC) {
7675 int i, cnt, nalsize;
7676 unsigned char *p = avctx->extradata;
7677 if(avctx->extradata_size < 7) {
7678 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7682 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7685 /* sps and pps in the avcC always have length coded with 2 bytes,
7686 so put a fake nal_length_size = 2 while parsing them */
7687 h->nal_length_size = 2;
7688 // Decode sps from avcC
7689 cnt = *(p+5) & 0x1f; // Number of sps
7691 for (i = 0; i < cnt; i++) {
7692 nalsize = AV_RB16(p) + 2;
7693 if(decode_nal_units(h, p, nalsize) < 0) {
7694 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7699 // Decode pps from avcC
7700 cnt = *(p++); // Number of pps
7701 for (i = 0; i < cnt; i++) {
7702 nalsize = AV_RB16(p) + 2;
7703 if(decode_nal_units(h, p, nalsize) != nalsize) {
7704 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7709 // Now store right nal length size, that will be use to parse all other nals
7710 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7711 // Do not reparse avcC
7715 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7716 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7720 buf_index=decode_nal_units(h, buf, buf_size);
7724 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7725 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7726 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7730 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7731 Picture *out = s->current_picture_ptr;
7732 Picture *cur = s->current_picture_ptr;
7733 Picture *prev = h->delayed_output_pic;
7734 int i, pics, cross_idr, out_of_order, out_idx;
7738 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7739 s->current_picture_ptr->pict_type= s->pict_type;
7741 h->prev_frame_num_offset= h->frame_num_offset;
7742 h->prev_frame_num= h->frame_num;
7744 h->prev_poc_msb= h->poc_msb;
7745 h->prev_poc_lsb= h->poc_lsb;
7746 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7750 * FIXME: Error handling code does not seem to support interlaced
7751 * when slices span multiple rows
7752 * The ff_er_add_slice calls don't work right for bottom
7753 * fields; they cause massive erroneous error concealing
7754 * Error marking covers both fields (top and bottom).
7755 * This causes a mismatched s->error_count
7756 * and a bad error table. Further, the error count goes to
7757 * INT_MAX when called for bottom field, because mb_y is
7758 * past end by one (callers fault) and resync_mb_y != 0
7759 * causes problems for the first MB line, too.
7766 if (s->first_field) {
7767 /* Wait for second field. */
7771 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7772 /* Derive top_field_first from field pocs. */
7773 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7775 //FIXME do something with unavailable reference frames
7777 #if 0 //decode order
7778 *data_size = sizeof(AVFrame);
7780 /* Sort B-frames into display order */
7782 if(h->sps.bitstream_restriction_flag
7783 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7784 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7789 while(h->delayed_pic[pics]) pics++;
7791 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7793 h->delayed_pic[pics++] = cur;
7794 if(cur->reference == 0)
7795 cur->reference = DELAYED_PIC_REF;
7798 for(i=0; h->delayed_pic[i]; i++)
7799 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7802 out = h->delayed_pic[0];
7804 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7805 if(h->delayed_pic[i]->poc < out->poc){
7806 out = h->delayed_pic[i];
7810 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7811 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7813 else if(prev && pics <= s->avctx->has_b_frames)
7815 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7817 ((!cross_idr && prev && out->poc > prev->poc + 2)
7818 || cur->pict_type == B_TYPE)))
7821 s->avctx->has_b_frames++;
7824 else if(out_of_order)
7827 if(out_of_order || pics > s->avctx->has_b_frames){
7828 for(i=out_idx; h->delayed_pic[i]; i++)
7829 h->delayed_pic[i] = h->delayed_pic[i+1];
7835 *data_size = sizeof(AVFrame);
7836 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7837 prev->reference = 0;
7838 h->delayed_output_pic = out;
7842 *pict= *(AVFrame*)out;
7844 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7848 assert(pict->data[0] || !*data_size);
7849 ff_print_debug_info(s, pict);
7850 //printf("out %d\n", (int)pict->data[0]);
7853 /* Return the Picture timestamp as the frame number */
7854 /* we subtract 1 because it is added on utils.c */
7855 avctx->frame_number = s->picture_number - 1;
7857 return get_consumed_bytes(s, buf_index, buf_size);
7860 static inline void fill_mb_avail(H264Context *h){
7861 MpegEncContext * const s = &h->s;
7862 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7865 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7866 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7867 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7873 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7874 h->mb_avail[4]= 1; //FIXME move out
7875 h->mb_avail[5]= 0; //FIXME move out
7882 #define SIZE (COUNT*40)
7888 // int int_temp[10000];
7890 AVCodecContext avctx;
7892 dsputil_init(&dsp, &avctx);
7894 init_put_bits(&pb, temp, SIZE);
7895 printf("testing unsigned exp golomb\n");
7896 for(i=0; i<COUNT; i++){
7898 set_ue_golomb(&pb, i);
7899 STOP_TIMER("set_ue_golomb");
7901 flush_put_bits(&pb);
7903 init_get_bits(&gb, temp, 8*SIZE);
7904 for(i=0; i<COUNT; i++){
7907 s= show_bits(&gb, 24);
7910 j= get_ue_golomb(&gb);
7912 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7915 STOP_TIMER("get_ue_golomb");
7919 init_put_bits(&pb, temp, SIZE);
7920 printf("testing signed exp golomb\n");
7921 for(i=0; i<COUNT; i++){
7923 set_se_golomb(&pb, i - COUNT/2);
7924 STOP_TIMER("set_se_golomb");
7926 flush_put_bits(&pb);
7928 init_get_bits(&gb, temp, 8*SIZE);
7929 for(i=0; i<COUNT; i++){
7932 s= show_bits(&gb, 24);
7935 j= get_se_golomb(&gb);
7936 if(j != i - COUNT/2){
7937 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7940 STOP_TIMER("get_se_golomb");
7943 printf("testing 4x4 (I)DCT\n");
7946 uint8_t src[16], ref[16];
7947 uint64_t error= 0, max_error=0;
7949 for(i=0; i<COUNT; i++){
7951 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7952 for(j=0; j<16; j++){
7953 ref[j]= random()%255;
7954 src[j]= random()%255;
7957 h264_diff_dct_c(block, src, ref, 4);
7960 for(j=0; j<16; j++){
7961 // printf("%d ", block[j]);
7962 block[j]= block[j]*4;
7963 if(j&1) block[j]= (block[j]*4 + 2)/5;
7964 if(j&4) block[j]= (block[j]*4 + 2)/5;
7968 s->dsp.h264_idct_add(ref, block, 4);
7969 /* for(j=0; j<16; j++){
7970 printf("%d ", ref[j]);
7974 for(j=0; j<16; j++){
7975 int diff= FFABS(src[j] - ref[j]);
7978 max_error= FFMAX(max_error, diff);
7981 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7983 printf("testing quantizer\n");
7984 for(qp=0; qp<52; qp++){
7986 src1_block[i]= src2_block[i]= random()%255;
7990 printf("Testing NAL layer\n");
7992 uint8_t bitstream[COUNT];
7993 uint8_t nal[COUNT*2];
7995 memset(&h, 0, sizeof(H264Context));
7997 for(i=0; i<COUNT; i++){
8005 for(j=0; j<COUNT; j++){
8006 bitstream[j]= (random() % 255) + 1;
8009 for(j=0; j<zeros; j++){
8010 int pos= random() % COUNT;
8011 while(bitstream[pos] == 0){
8020 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8022 printf("encoding failed\n");
8026 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8030 if(out_length != COUNT){
8031 printf("incorrect length %d %d\n", out_length, COUNT);
8035 if(consumed != nal_length){
8036 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8040 if(memcmp(bitstream, out, COUNT)){
8041 printf("mismatch\n");
8046 printf("Testing RBSP\n");
8054 static int decode_end(AVCodecContext *avctx)
8056 H264Context *h = avctx->priv_data;
8057 MpegEncContext *s = &h->s;
8059 av_freep(&h->rbsp_buffer[0]);
8060 av_freep(&h->rbsp_buffer[1]);
8061 free_tables(h); //FIXME cleanup init stuff perhaps
8064 // memset(h, 0, sizeof(H264Context));
8070 AVCodec h264_decoder = {
8074 sizeof(H264Context),
8079 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,