2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
42 * Value of Picture.reference when Picture is not a reference picture, but
43 * is held for delayed output.
45 #define DELAYED_PIC_REF 4
47 static VLC coeff_token_vlc[4];
48 static VLC chroma_dc_coeff_token_vlc;
50 static VLC total_zeros_vlc[15];
51 static VLC chroma_dc_total_zeros_vlc[3];
53 static VLC run_vlc[6];
56 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
57 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
58 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
59 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
61 static av_always_inline uint32_t pack16to32(int a, int b){
62 #ifdef WORDS_BIGENDIAN
63 return (b&0xFFFF) + (a<<16);
65 return (a&0xFFFF) + (b<<16);
69 const uint8_t ff_rem6[52]={
70 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
73 const uint8_t ff_div6[52]={
74 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
80 * @param h height of the rectangle, should be a constant
81 * @param w width of the rectangle, should be a constant
82 * @param size the size of val (1 or 4), should be a constant
84 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
85 uint8_t *p= (uint8_t*)vp;
86 assert(size==1 || size==4);
92 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
93 assert((stride&(w-1))==0);
95 const uint16_t v= size==4 ? val : val*0x0101;
96 *(uint16_t*)(p + 0*stride)= v;
98 *(uint16_t*)(p + 1*stride)= v;
100 *(uint16_t*)(p + 2*stride)= v;
101 *(uint16_t*)(p + 3*stride)= v;
103 const uint32_t v= size==4 ? val : val*0x01010101;
104 *(uint32_t*)(p + 0*stride)= v;
106 *(uint32_t*)(p + 1*stride)= v;
108 *(uint32_t*)(p + 2*stride)= v;
109 *(uint32_t*)(p + 3*stride)= v;
111 //gcc can't optimize 64bit math on x86_32
112 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
113 const uint64_t v= val*0x0100000001ULL;
114 *(uint64_t*)(p + 0*stride)= v;
116 *(uint64_t*)(p + 1*stride)= v;
118 *(uint64_t*)(p + 2*stride)= v;
119 *(uint64_t*)(p + 3*stride)= v;
121 const uint64_t v= val*0x0100000001ULL;
122 *(uint64_t*)(p + 0+0*stride)= v;
123 *(uint64_t*)(p + 8+0*stride)= v;
124 *(uint64_t*)(p + 0+1*stride)= v;
125 *(uint64_t*)(p + 8+1*stride)= v;
127 *(uint64_t*)(p + 0+2*stride)= v;
128 *(uint64_t*)(p + 8+2*stride)= v;
129 *(uint64_t*)(p + 0+3*stride)= v;
130 *(uint64_t*)(p + 8+3*stride)= v;
132 *(uint32_t*)(p + 0+0*stride)= val;
133 *(uint32_t*)(p + 4+0*stride)= val;
135 *(uint32_t*)(p + 0+1*stride)= val;
136 *(uint32_t*)(p + 4+1*stride)= val;
138 *(uint32_t*)(p + 0+2*stride)= val;
139 *(uint32_t*)(p + 4+2*stride)= val;
140 *(uint32_t*)(p + 0+3*stride)= val;
141 *(uint32_t*)(p + 4+3*stride)= val;
143 *(uint32_t*)(p + 0+0*stride)= val;
144 *(uint32_t*)(p + 4+0*stride)= val;
145 *(uint32_t*)(p + 8+0*stride)= val;
146 *(uint32_t*)(p +12+0*stride)= val;
147 *(uint32_t*)(p + 0+1*stride)= val;
148 *(uint32_t*)(p + 4+1*stride)= val;
149 *(uint32_t*)(p + 8+1*stride)= val;
150 *(uint32_t*)(p +12+1*stride)= val;
152 *(uint32_t*)(p + 0+2*stride)= val;
153 *(uint32_t*)(p + 4+2*stride)= val;
154 *(uint32_t*)(p + 8+2*stride)= val;
155 *(uint32_t*)(p +12+2*stride)= val;
156 *(uint32_t*)(p + 0+3*stride)= val;
157 *(uint32_t*)(p + 4+3*stride)= val;
158 *(uint32_t*)(p + 8+3*stride)= val;
159 *(uint32_t*)(p +12+3*stride)= val;
166 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
167 MpegEncContext * const s = &h->s;
168 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
169 int topleft_xy, top_xy, topright_xy, left_xy[2];
170 int topleft_type, top_type, topright_type, left_type[2];
174 //FIXME deblocking could skip the intra and nnz parts.
175 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
178 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
180 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
181 topleft_xy = top_xy - 1;
182 topright_xy= top_xy + 1;
183 left_xy[1] = left_xy[0] = mb_xy-1;
193 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
194 const int top_pair_xy = pair_xy - s->mb_stride;
195 const int topleft_pair_xy = top_pair_xy - 1;
196 const int topright_pair_xy = top_pair_xy + 1;
197 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
198 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
199 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
200 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
201 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
202 const int bottom = (s->mb_y & 1);
203 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
208 top_xy -= s->mb_stride;
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
214 topleft_xy -= s->mb_stride;
217 ? !curr_mb_frame_flag // bottom macroblock
218 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
220 topright_xy -= s->mb_stride;
222 if (left_mb_frame_flag != curr_mb_frame_flag) {
223 left_xy[1] = left_xy[0] = pair_xy - 1;
224 if (curr_mb_frame_flag) {
245 left_xy[1] += s->mb_stride;
258 h->top_mb_xy = top_xy;
259 h->left_mb_xy[0] = left_xy[0];
260 h->left_mb_xy[1] = left_xy[1];
264 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
265 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
266 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
268 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
270 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
272 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
273 for(list=0; list<h->list_count; list++){
274 if(USES_LIST(mb_type,list)){
275 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
276 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
277 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
278 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
284 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
285 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
287 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
288 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
290 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
291 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
296 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
297 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
298 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
299 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
300 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
303 if(IS_INTRA(mb_type)){
304 h->topleft_samples_available=
305 h->top_samples_available=
306 h->left_samples_available= 0xFFFF;
307 h->topright_samples_available= 0xEEEA;
309 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available= 0xB3FF;
311 h->top_samples_available= 0x33FF;
312 h->topright_samples_available= 0x26EA;
315 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
316 h->topleft_samples_available&= 0xDF5F;
317 h->left_samples_available&= 0x5F5F;
321 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
322 h->topleft_samples_available&= 0x7FFF;
324 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
325 h->topright_samples_available&= 0xFBFF;
327 if(IS_INTRA4x4(mb_type)){
328 if(IS_INTRA4x4(top_type)){
329 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
330 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
331 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
332 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
335 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
340 h->intra4x4_pred_mode_cache[4+8*0]=
341 h->intra4x4_pred_mode_cache[5+8*0]=
342 h->intra4x4_pred_mode_cache[6+8*0]=
343 h->intra4x4_pred_mode_cache[7+8*0]= pred;
346 if(IS_INTRA4x4(left_type[i])){
347 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
348 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
351 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
356 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
357 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
372 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
374 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
375 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
376 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
377 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
379 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
380 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
382 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
383 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
386 h->non_zero_count_cache[4+8*0]=
387 h->non_zero_count_cache[5+8*0]=
388 h->non_zero_count_cache[6+8*0]=
389 h->non_zero_count_cache[7+8*0]=
391 h->non_zero_count_cache[1+8*0]=
392 h->non_zero_count_cache[2+8*0]=
394 h->non_zero_count_cache[1+8*3]=
395 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
399 for (i=0; i<2; i++) {
401 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
402 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
403 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
404 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
406 h->non_zero_count_cache[3+8*1 + 2*8*i]=
407 h->non_zero_count_cache[3+8*2 + 2*8*i]=
408 h->non_zero_count_cache[0+8*1 + 8*i]=
409 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
416 h->top_cbp = h->cbp_table[top_xy];
417 } else if(IS_INTRA(mb_type)) {
424 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
425 } else if(IS_INTRA(mb_type)) {
431 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
434 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
439 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
441 for(list=0; list<h->list_count; list++){
442 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
443 /*if(!h->mv_cache_clean[list]){
444 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
445 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
446 h->mv_cache_clean[list]= 1;
450 h->mv_cache_clean[list]= 0;
452 if(USES_LIST(top_type, list)){
453 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
454 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
455 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
456 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
457 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
458 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
459 h->ref_cache[list][scan8[0] + 0 - 1*8]=
460 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
461 h->ref_cache[list][scan8[0] + 2 - 1*8]=
462 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
464 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
465 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
466 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
467 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
468 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
472 int cache_idx = scan8[0] - 1 + i*2*8;
473 if(USES_LIST(left_type[i], list)){
474 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
475 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
476 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
477 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
478 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
479 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
481 *(uint32_t*)h->mv_cache [list][cache_idx ]=
482 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
483 h->ref_cache[list][cache_idx ]=
484 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
488 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
491 if(USES_LIST(topleft_type, list)){
492 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
493 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
494 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
495 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
497 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
498 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
501 if(USES_LIST(topright_type, list)){
502 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
503 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
504 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
505 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
507 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
508 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
511 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
514 h->ref_cache[list][scan8[5 ]+1] =
515 h->ref_cache[list][scan8[7 ]+1] =
516 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
517 h->ref_cache[list][scan8[4 ]] =
518 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
519 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
520 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
521 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
522 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
523 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
526 /* XXX beurk, Load mvd */
527 if(USES_LIST(top_type, list)){
528 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
529 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
530 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
531 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
532 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
534 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
535 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
536 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
537 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
539 if(USES_LIST(left_type[0], list)){
540 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
541 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
542 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
544 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
545 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
547 if(USES_LIST(left_type[1], list)){
548 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
549 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
550 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
552 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
553 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
555 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
556 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
557 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
558 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
559 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
561 if(h->slice_type == B_TYPE){
562 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
564 if(IS_DIRECT(top_type)){
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
566 }else if(IS_8X8(top_type)){
567 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
568 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
569 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
571 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
574 if(IS_DIRECT(left_type[0]))
575 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
576 else if(IS_8X8(left_type[0]))
577 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
579 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
581 if(IS_DIRECT(left_type[1]))
582 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
583 else if(IS_8X8(left_type[1]))
584 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
586 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
592 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
593 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
594 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
595 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
596 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
597 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
598 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
599 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
600 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
601 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
603 #define MAP_F2F(idx, mb_type)\
604 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
605 h->ref_cache[list][idx] <<= 1;\
606 h->mv_cache[list][idx][1] /= 2;\
607 h->mvd_cache[list][idx][1] /= 2;\
612 #define MAP_F2F(idx, mb_type)\
613 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
614 h->ref_cache[list][idx] >>= 1;\
615 h->mv_cache[list][idx][1] <<= 1;\
616 h->mvd_cache[list][idx][1] <<= 1;\
626 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
629 static inline void write_back_intra_pred_mode(H264Context *h){
630 MpegEncContext * const s = &h->s;
631 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
633 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
634 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
635 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
636 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
637 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
638 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
639 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
643 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
645 static inline int check_intra4x4_pred_mode(H264Context *h){
646 MpegEncContext * const s = &h->s;
647 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
648 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
651 if(!(h->top_samples_available&0x8000)){
653 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
655 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
658 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
663 if(!(h->left_samples_available&0x8000)){
665 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
667 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
670 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
676 } //FIXME cleanup like next
679 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
681 static inline int check_intra_pred_mode(H264Context *h, int mode){
682 MpegEncContext * const s = &h->s;
683 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
684 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
687 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
691 if(!(h->top_samples_available&0x8000)){
694 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
699 if(!(h->left_samples_available&0x8000)){
702 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
711 * gets the predicted intra4x4 prediction mode.
713 static inline int pred_intra_mode(H264Context *h, int n){
714 const int index8= scan8[n];
715 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
716 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
717 const int min= FFMIN(left, top);
719 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
721 if(min<0) return DC_PRED;
725 static inline void write_back_non_zero_count(H264Context *h){
726 MpegEncContext * const s = &h->s;
727 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
729 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
730 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
731 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
732 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
733 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
734 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
735 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
737 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
738 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
739 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
741 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
742 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
743 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
746 // store all luma nnzs, for deblocking
749 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
750 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
755 * gets the predicted number of non zero coefficients.
756 * @param n block index
758 static inline int pred_non_zero_count(H264Context *h, int n){
759 const int index8= scan8[n];
760 const int left= h->non_zero_count_cache[index8 - 1];
761 const int top = h->non_zero_count_cache[index8 - 8];
764 if(i<64) i= (i+1)>>1;
766 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
771 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
772 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
773 MpegEncContext *s = &h->s;
775 /* there is no consistent mapping of mvs to neighboring locations that will
776 * make mbaff happy, so we can't move all this logic to fill_caches */
778 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
780 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
781 *C = h->mv_cache[list][scan8[0]-2];
784 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
785 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
786 if(IS_INTERLACED(mb_types[topright_xy])){
787 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
788 const int x4 = X4, y4 = Y4;\
789 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
790 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
791 return LIST_NOT_USED;\
792 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
793 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
794 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
795 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
797 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
800 if(topright_ref == PART_NOT_AVAILABLE
801 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
802 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
804 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
805 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
808 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
810 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
811 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
817 if(topright_ref != PART_NOT_AVAILABLE){
818 *C= h->mv_cache[list][ i - 8 + part_width ];
821 tprintf(s->avctx, "topright MV not available\n");
823 *C= h->mv_cache[list][ i - 8 - 1 ];
824 return h->ref_cache[list][ i - 8 - 1 ];
829 * gets the predicted MV.
830 * @param n the block index
831 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
832 * @param mx the x component of the predicted motion vector
833 * @param my the y component of the predicted motion vector
835 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
836 const int index8= scan8[n];
837 const int top_ref= h->ref_cache[list][ index8 - 8 ];
838 const int left_ref= h->ref_cache[list][ index8 - 1 ];
839 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
840 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
842 int diagonal_ref, match_count;
844 assert(part_width==1 || part_width==2 || part_width==4);
854 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
855 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
856 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
857 if(match_count > 1){ //most common
858 *mx= mid_pred(A[0], B[0], C[0]);
859 *my= mid_pred(A[1], B[1], C[1]);
860 }else if(match_count==1){
864 }else if(top_ref==ref){
872 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
876 *mx= mid_pred(A[0], B[0], C[0]);
877 *my= mid_pred(A[1], B[1], C[1]);
881 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
885 * gets the directionally predicted 16x8 MV.
886 * @param n the block index
887 * @param mx the x component of the predicted motion vector
888 * @param my the y component of the predicted motion vector
890 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
892 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
893 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
895 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
903 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
904 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
906 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
916 pred_motion(h, n, 4, list, ref, mx, my);
920 * gets the directionally predicted 8x16 MV.
921 * @param n the block index
922 * @param mx the x component of the predicted motion vector
923 * @param my the y component of the predicted motion vector
925 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
927 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
928 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
930 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
941 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
943 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
945 if(diagonal_ref == ref){
953 pred_motion(h, n, 2, list, ref, mx, my);
956 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
957 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
958 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
960 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
962 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
963 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
964 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
970 pred_motion(h, 0, 4, 0, 0, mx, my);
975 static inline void direct_dist_scale_factor(H264Context * const h){
976 const int poc = h->s.current_picture_ptr->poc;
977 const int poc1 = h->ref_list[1][0].poc;
979 for(i=0; i<h->ref_count[0]; i++){
980 int poc0 = h->ref_list[0][i].poc;
981 int td = av_clip(poc1 - poc0, -128, 127);
982 if(td == 0 /* FIXME || pic0 is a long-term ref */){
983 h->dist_scale_factor[i] = 256;
985 int tb = av_clip(poc - poc0, -128, 127);
986 int tx = (16384 + (FFABS(td) >> 1)) / td;
987 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
991 for(i=0; i<h->ref_count[0]; i++){
992 h->dist_scale_factor_field[2*i] =
993 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
997 static inline void direct_ref_list_init(H264Context * const h){
998 MpegEncContext * const s = &h->s;
999 Picture * const ref1 = &h->ref_list[1][0];
1000 Picture * const cur = s->current_picture_ptr;
1002 if(cur->pict_type == I_TYPE)
1003 cur->ref_count[0] = 0;
1004 if(cur->pict_type != B_TYPE)
1005 cur->ref_count[1] = 0;
1006 for(list=0; list<2; list++){
1007 cur->ref_count[list] = h->ref_count[list];
1008 for(j=0; j<h->ref_count[list]; j++)
1009 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1011 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1013 for(list=0; list<2; list++){
1014 for(i=0; i<ref1->ref_count[list]; i++){
1015 const int poc = ref1->ref_poc[list][i];
1016 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1017 for(j=0; j<h->ref_count[list]; j++)
1018 if(h->ref_list[list][j].poc == poc){
1019 h->map_col_to_list0[list][i] = j;
1025 for(list=0; list<2; list++){
1026 for(i=0; i<ref1->ref_count[list]; i++){
1027 j = h->map_col_to_list0[list][i];
1028 h->map_col_to_list0_field[list][2*i] = 2*j;
1029 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1035 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1036 MpegEncContext * const s = &h->s;
1037 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1038 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1039 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1040 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1041 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1042 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1043 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1044 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1045 const int is_b8x8 = IS_8X8(*mb_type);
1046 unsigned int sub_mb_type;
1049 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1050 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1051 /* FIXME save sub mb types from previous frames (or derive from MVs)
1052 * so we know exactly what block size to use */
1053 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1055 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1056 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1057 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1059 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1060 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1063 *mb_type |= MB_TYPE_DIRECT2;
1065 *mb_type |= MB_TYPE_INTERLACED;
1067 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1069 if(h->direct_spatial_mv_pred){
1074 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1076 /* ref = min(neighbors) */
1077 for(list=0; list<2; list++){
1078 int refa = h->ref_cache[list][scan8[0] - 1];
1079 int refb = h->ref_cache[list][scan8[0] - 8];
1080 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1082 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1084 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1086 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1092 if(ref[0] < 0 && ref[1] < 0){
1093 ref[0] = ref[1] = 0;
1094 mv[0][0] = mv[0][1] =
1095 mv[1][0] = mv[1][1] = 0;
1097 for(list=0; list<2; list++){
1099 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1101 mv[list][0] = mv[list][1] = 0;
1106 *mb_type &= ~MB_TYPE_P0L1;
1107 sub_mb_type &= ~MB_TYPE_P0L1;
1108 }else if(ref[0] < 0){
1109 *mb_type &= ~MB_TYPE_P0L0;
1110 sub_mb_type &= ~MB_TYPE_P0L0;
1113 if(IS_16X16(*mb_type)){
1116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1117 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1118 if(!IS_INTRA(mb_type_col)
1119 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1120 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1121 && (h->x264_build>33 || !h->x264_build)))){
1123 a= pack16to32(mv[0][0],mv[0][1]);
1125 b= pack16to32(mv[1][0],mv[1][1]);
1127 a= pack16to32(mv[0][0],mv[0][1]);
1128 b= pack16to32(mv[1][0],mv[1][1]);
1130 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1131 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1133 for(i8=0; i8<4; i8++){
1134 const int x8 = i8&1;
1135 const int y8 = i8>>1;
1137 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1139 h->sub_mb_type[i8] = sub_mb_type;
1141 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1142 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1143 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1144 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1147 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1148 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1149 && (h->x264_build>33 || !h->x264_build)))){
1150 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1151 if(IS_SUB_8X8(sub_mb_type)){
1152 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1153 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1155 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1160 for(i4=0; i4<4; i4++){
1161 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1162 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1164 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1166 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1172 }else{ /* direct temporal mv pred */
1173 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1174 const int *dist_scale_factor = h->dist_scale_factor;
1177 if(IS_INTERLACED(*mb_type)){
1178 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1179 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1180 dist_scale_factor = h->dist_scale_factor_field;
1182 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1183 /* FIXME assumes direct_8x8_inference == 1 */
1184 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1185 int mb_types_col[2];
1188 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1189 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1190 | (*mb_type & MB_TYPE_INTERLACED);
1191 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1193 if(IS_INTERLACED(*mb_type)){
1194 /* frame to field scaling */
1195 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1198 l1ref0 -= 2*h->b8_stride;
1199 l1ref1 -= 2*h->b8_stride;
1200 l1mv0 -= 4*h->b_stride;
1201 l1mv1 -= 4*h->b_stride;
1205 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1206 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1208 *mb_type |= MB_TYPE_16x8;
1210 *mb_type |= MB_TYPE_8x8;
1212 /* field to frame scaling */
1213 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1214 * but in MBAFF, top and bottom POC are equal */
1215 int dy = (s->mb_y&1) ? 1 : 2;
1217 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1218 l1ref0 += dy*h->b8_stride;
1219 l1ref1 += dy*h->b8_stride;
1220 l1mv0 += 2*dy*h->b_stride;
1221 l1mv1 += 2*dy*h->b_stride;
1224 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1226 *mb_type |= MB_TYPE_16x16;
1228 *mb_type |= MB_TYPE_8x8;
1231 for(i8=0; i8<4; i8++){
1232 const int x8 = i8&1;
1233 const int y8 = i8>>1;
1235 const int16_t (*l1mv)[2]= l1mv0;
1237 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1239 h->sub_mb_type[i8] = sub_mb_type;
1241 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1242 if(IS_INTRA(mb_types_col[y8])){
1243 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1244 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1245 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1249 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1251 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1253 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1256 scale = dist_scale_factor[ref0];
1257 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1260 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1261 int my_col = (mv_col[1]<<y_shift)/2;
1262 int mx = (scale * mv_col[0] + 128) >> 8;
1263 int my = (scale * my_col + 128) >> 8;
1264 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1265 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1272 /* one-to-one mv scaling */
1274 if(IS_16X16(*mb_type)){
1277 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col)){
1281 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1282 : map_col_to_list0[1][l1ref1[0]];
1283 const int scale = dist_scale_factor[ref0];
1284 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1286 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1287 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1289 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1290 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1292 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1293 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1294 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1296 for(i8=0; i8<4; i8++){
1297 const int x8 = i8&1;
1298 const int y8 = i8>>1;
1300 const int16_t (*l1mv)[2]= l1mv0;
1302 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1304 h->sub_mb_type[i8] = sub_mb_type;
1305 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1306 if(IS_INTRA(mb_type_col)){
1307 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1308 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1309 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1313 ref0 = l1ref0[x8 + y8*h->b8_stride];
1315 ref0 = map_col_to_list0[0][ref0];
1317 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1320 scale = dist_scale_factor[ref0];
1322 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1323 if(IS_SUB_8X8(sub_mb_type)){
1324 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1325 int mx = (scale * mv_col[0] + 128) >> 8;
1326 int my = (scale * mv_col[1] + 128) >> 8;
1327 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1328 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1330 for(i4=0; i4<4; i4++){
1331 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1332 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1333 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1334 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1335 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1336 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1343 static inline void write_back_motion(H264Context *h, int mb_type){
1344 MpegEncContext * const s = &h->s;
1345 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1346 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1349 if(!USES_LIST(mb_type, 0))
1350 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1352 for(list=0; list<h->list_count; list++){
1354 if(!USES_LIST(mb_type, list))
1358 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1359 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1361 if( h->pps.cabac ) {
1362 if(IS_SKIP(mb_type))
1363 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1366 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1367 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1372 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1373 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1374 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1375 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1376 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1380 if(h->slice_type == B_TYPE && h->pps.cabac){
1381 if(IS_8X8(mb_type)){
1382 uint8_t *direct_table = &h->direct_table[b8_xy];
1383 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1384 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1385 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1391 * Decodes a network abstraction layer unit.
1392 * @param consumed is the number of bytes used as input
1393 * @param length is the length of the array
1394 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1395 * @returns decoded bytes, might be src+1 if no escapes
1397 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1402 // src[0]&0x80; //forbidden bit
1403 h->nal_ref_idc= src[0]>>5;
1404 h->nal_unit_type= src[0]&0x1F;
1408 for(i=0; i<length; i++)
1409 printf("%2X ", src[i]);
1411 for(i=0; i+1<length; i+=2){
1412 if(src[i]) continue;
1413 if(i>0 && src[i-1]==0) i--;
1414 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1416 /* startcode, so we must be past the end */
1423 if(i>=length-1){ //no escaped 0
1424 *dst_length= length;
1425 *consumed= length+1; //+1 for the header
1429 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1430 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1431 dst= h->rbsp_buffer[bufidx];
1437 //printf("decoding esc\n");
1440 //remove escapes (very rare 1:2^22)
1441 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1442 if(src[si+2]==3){ //escape
1447 }else //next start code
1451 dst[di++]= src[si++];
1455 *consumed= si + 1;//+1 for the header
1456 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1461 * identifies the exact end of the bitstream
1462 * @return the length of the trailing, or 0 if damaged
1464 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1468 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1478 * idct tranforms the 16 dc values and dequantize them.
1479 * @param qp quantization parameter
1481 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1484 int temp[16]; //FIXME check if this is a good idea
1485 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1486 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1488 //memset(block, 64, 2*256);
1491 const int offset= y_offset[i];
1492 const int z0= block[offset+stride*0] + block[offset+stride*4];
1493 const int z1= block[offset+stride*0] - block[offset+stride*4];
1494 const int z2= block[offset+stride*1] - block[offset+stride*5];
1495 const int z3= block[offset+stride*1] + block[offset+stride*5];
1504 const int offset= x_offset[i];
1505 const int z0= temp[4*0+i] + temp[4*2+i];
1506 const int z1= temp[4*0+i] - temp[4*2+i];
1507 const int z2= temp[4*1+i] - temp[4*3+i];
1508 const int z3= temp[4*1+i] + temp[4*3+i];
1510 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1511 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1512 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1513 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1519 * dct tranforms the 16 dc values.
1520 * @param qp quantization parameter ??? FIXME
1522 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1523 // const int qmul= dequant_coeff[qp][0];
1525 int temp[16]; //FIXME check if this is a good idea
1526 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1527 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1530 const int offset= y_offset[i];
1531 const int z0= block[offset+stride*0] + block[offset+stride*4];
1532 const int z1= block[offset+stride*0] - block[offset+stride*4];
1533 const int z2= block[offset+stride*1] - block[offset+stride*5];
1534 const int z3= block[offset+stride*1] + block[offset+stride*5];
1543 const int offset= x_offset[i];
1544 const int z0= temp[4*0+i] + temp[4*2+i];
1545 const int z1= temp[4*0+i] - temp[4*2+i];
1546 const int z2= temp[4*1+i] - temp[4*3+i];
1547 const int z3= temp[4*1+i] + temp[4*3+i];
1549 block[stride*0 +offset]= (z0 + z3)>>1;
1550 block[stride*2 +offset]= (z1 + z2)>>1;
1551 block[stride*8 +offset]= (z1 - z2)>>1;
1552 block[stride*10+offset]= (z0 - z3)>>1;
1560 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1561 const int stride= 16*2;
1562 const int xStride= 16;
1565 a= block[stride*0 + xStride*0];
1566 b= block[stride*0 + xStride*1];
1567 c= block[stride*1 + xStride*0];
1568 d= block[stride*1 + xStride*1];
1575 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1576 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1577 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1578 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1582 static void chroma_dc_dct_c(DCTELEM *block){
1583 const int stride= 16*2;
1584 const int xStride= 16;
1587 a= block[stride*0 + xStride*0];
1588 b= block[stride*0 + xStride*1];
1589 c= block[stride*1 + xStride*0];
1590 d= block[stride*1 + xStride*1];
1597 block[stride*0 + xStride*0]= (a+c);
1598 block[stride*0 + xStride*1]= (e+b);
1599 block[stride*1 + xStride*0]= (a-c);
1600 block[stride*1 + xStride*1]= (e-b);
1605 * gets the chroma qp.
1607 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1608 return h->pps.chroma_qp_table[t][qscale & 0xff];
1611 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1612 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1613 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1615 const int * const quant_table= quant_coeff[qscale];
1616 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1617 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1618 const unsigned int threshold2= (threshold1<<1);
1624 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1625 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1626 const unsigned int dc_threshold2= (dc_threshold1<<1);
1628 int level= block[0]*quant_coeff[qscale+18][0];
1629 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1631 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1634 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1637 // last_non_zero = i;
1642 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1643 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1644 const unsigned int dc_threshold2= (dc_threshold1<<1);
1646 int level= block[0]*quant_table[0];
1647 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1649 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1652 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1655 // last_non_zero = i;
1668 const int j= scantable[i];
1669 int level= block[j]*quant_table[j];
1671 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1672 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1673 if(((unsigned)(level+threshold1))>threshold2){
1675 level= (bias + level)>>QUANT_SHIFT;
1678 level= (bias - level)>>QUANT_SHIFT;
1687 return last_non_zero;
1690 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1691 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1692 int src_x_offset, int src_y_offset,
1693 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1694 MpegEncContext * const s = &h->s;
1695 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1696 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1697 const int luma_xy= (mx&3) + ((my&3)<<2);
1698 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1699 uint8_t * src_cb, * src_cr;
1700 int extra_width= h->emu_edge_width;
1701 int extra_height= h->emu_edge_height;
1703 const int full_mx= mx>>2;
1704 const int full_my= my>>2;
1705 const int pic_width = 16*s->mb_width;
1706 const int pic_height = 16*s->mb_height >> (MB_MBAFF || FIELD_PICTURE);
1708 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1711 if(mx&7) extra_width -= 3;
1712 if(my&7) extra_height -= 3;
1714 if( full_mx < 0-extra_width
1715 || full_my < 0-extra_height
1716 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1717 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1719 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1723 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1725 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1728 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1730 if(MB_MBAFF || FIELD_PICTURE){
1731 // chroma offset when predicting from a field of opposite parity
1732 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
1733 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1735 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1736 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1739 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1740 src_cb= s->edge_emu_buffer;
1742 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1745 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1746 src_cr= s->edge_emu_buffer;
1748 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1751 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1752 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1753 int x_offset, int y_offset,
1754 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1755 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1756 int list0, int list1){
1757 MpegEncContext * const s = &h->s;
1758 qpel_mc_func *qpix_op= qpix_put;
1759 h264_chroma_mc_func chroma_op= chroma_put;
1761 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1762 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1763 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1764 x_offset += 8*s->mb_x;
1765 y_offset += 8*(s->mb_y >> (MB_MBAFF || FIELD_PICTURE));
1768 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1769 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1770 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1771 qpix_op, chroma_op);
1774 chroma_op= chroma_avg;
1778 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1779 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1780 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1781 qpix_op, chroma_op);
1785 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1786 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 int x_offset, int y_offset,
1788 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1789 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1790 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1791 int list0, int list1){
1792 MpegEncContext * const s = &h->s;
1794 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1795 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1796 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1797 x_offset += 8*s->mb_x;
1798 y_offset += 8*(s->mb_y >> (MB_MBAFF || FIELD_PICTURE));
1801 /* don't optimize for luma-only case, since B-frames usually
1802 * use implicit weights => chroma too. */
1803 uint8_t *tmp_cb = s->obmc_scratchpad;
1804 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1805 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1806 int refn0 = h->ref_cache[0][ scan8[n] ];
1807 int refn1 = h->ref_cache[1][ scan8[n] ];
1809 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1810 dest_y, dest_cb, dest_cr,
1811 x_offset, y_offset, qpix_put, chroma_put);
1812 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1813 tmp_y, tmp_cb, tmp_cr,
1814 x_offset, y_offset, qpix_put, chroma_put);
1816 if(h->use_weight == 2){
1817 int weight0 = h->implicit_weight[refn0][refn1];
1818 int weight1 = 64 - weight0;
1819 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1820 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1821 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1823 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1824 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1825 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1826 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1827 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1828 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1829 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1831 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1834 int list = list1 ? 1 : 0;
1835 int refn = h->ref_cache[list][ scan8[n] ];
1836 Picture *ref= &h->ref_list[list][refn];
1837 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1838 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put, chroma_put);
1841 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1842 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1843 if(h->use_weight_chroma){
1844 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1845 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1846 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1847 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1852 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1853 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1854 int x_offset, int y_offset,
1855 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1856 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1857 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1858 int list0, int list1){
1859 if((h->use_weight==2 && list0 && list1
1860 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1861 || h->use_weight==1)
1862 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1863 x_offset, y_offset, qpix_put, chroma_put,
1864 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1866 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1867 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1870 static inline void prefetch_motion(H264Context *h, int list){
1871 /* fetch pixels for estimated mv 4 macroblocks ahead
1872 * optimized for 64byte cache lines */
1873 MpegEncContext * const s = &h->s;
1874 const int refn = h->ref_cache[list][scan8[0]];
1876 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1877 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1878 uint8_t **src= h->ref_list[list][refn].data;
1879 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1880 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1881 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1882 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1886 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1887 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1888 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1889 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1890 MpegEncContext * const s = &h->s;
1891 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1892 const int mb_type= s->current_picture.mb_type[mb_xy];
1894 assert(IS_INTER(mb_type));
1896 prefetch_motion(h, 0);
1898 if(IS_16X16(mb_type)){
1899 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1900 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1901 &weight_op[0], &weight_avg[0],
1902 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1903 }else if(IS_16X8(mb_type)){
1904 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1905 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1906 &weight_op[1], &weight_avg[1],
1907 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1908 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1909 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1910 &weight_op[1], &weight_avg[1],
1911 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1912 }else if(IS_8X16(mb_type)){
1913 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1914 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1915 &weight_op[2], &weight_avg[2],
1916 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1917 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1918 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1919 &weight_op[2], &weight_avg[2],
1920 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1924 assert(IS_8X8(mb_type));
1927 const int sub_mb_type= h->sub_mb_type[i];
1929 int x_offset= (i&1)<<2;
1930 int y_offset= (i&2)<<1;
1932 if(IS_SUB_8X8(sub_mb_type)){
1933 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1934 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1935 &weight_op[3], &weight_avg[3],
1936 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1937 }else if(IS_SUB_8X4(sub_mb_type)){
1938 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1939 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1940 &weight_op[4], &weight_avg[4],
1941 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1942 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1943 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1944 &weight_op[4], &weight_avg[4],
1945 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1946 }else if(IS_SUB_4X8(sub_mb_type)){
1947 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1948 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1949 &weight_op[5], &weight_avg[5],
1950 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1951 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1952 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1953 &weight_op[5], &weight_avg[5],
1954 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1957 assert(IS_SUB_4X4(sub_mb_type));
1959 int sub_x_offset= x_offset + 2*(j&1);
1960 int sub_y_offset= y_offset + (j&2);
1961 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1962 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1963 &weight_op[6], &weight_avg[6],
1964 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1970 prefetch_motion(h, 1);
1973 static void decode_init_vlc(void){
1974 static int done = 0;
1980 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1981 &chroma_dc_coeff_token_len [0], 1, 1,
1982 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1985 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1986 &coeff_token_len [i][0], 1, 1,
1987 &coeff_token_bits[i][0], 1, 1, 1);
1991 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1992 &chroma_dc_total_zeros_len [i][0], 1, 1,
1993 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1995 for(i=0; i<15; i++){
1996 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1997 &total_zeros_len [i][0], 1, 1,
1998 &total_zeros_bits[i][0], 1, 1, 1);
2002 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2003 &run_len [i][0], 1, 1,
2004 &run_bits[i][0], 1, 1, 1);
2006 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2007 &run_len [6][0], 1, 1,
2008 &run_bits[6][0], 1, 1, 1);
2012 static void free_tables(H264Context *h){
2015 av_freep(&h->intra4x4_pred_mode);
2016 av_freep(&h->chroma_pred_mode_table);
2017 av_freep(&h->cbp_table);
2018 av_freep(&h->mvd_table[0]);
2019 av_freep(&h->mvd_table[1]);
2020 av_freep(&h->direct_table);
2021 av_freep(&h->non_zero_count);
2022 av_freep(&h->slice_table_base);
2023 h->slice_table= NULL;
2025 av_freep(&h->mb2b_xy);
2026 av_freep(&h->mb2b8_xy);
2028 for(i = 0; i < MAX_SPS_COUNT; i++)
2029 av_freep(h->sps_buffers + i);
2031 for(i = 0; i < MAX_PPS_COUNT; i++)
2032 av_freep(h->pps_buffers + i);
2034 for(i = 0; i < h->s.avctx->thread_count; i++) {
2035 hx = h->thread_context[i];
2037 av_freep(&hx->top_borders[1]);
2038 av_freep(&hx->top_borders[0]);
2039 av_freep(&hx->s.obmc_scratchpad);
2040 av_freep(&hx->s.allocated_edge_emu_buffer);
2044 static void init_dequant8_coeff_table(H264Context *h){
2046 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2047 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2048 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2050 for(i=0; i<2; i++ ){
2051 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2052 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2056 for(q=0; q<52; q++){
2057 int shift = ff_div6[q];
2058 int idx = ff_rem6[q];
2060 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2061 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2062 h->pps.scaling_matrix8[i][x]) << shift;
2067 static void init_dequant4_coeff_table(H264Context *h){
2069 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2070 for(i=0; i<6; i++ ){
2071 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2073 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2074 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2081 for(q=0; q<52; q++){
2082 int shift = ff_div6[q] + 2;
2083 int idx = ff_rem6[q];
2085 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2086 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2087 h->pps.scaling_matrix4[i][x]) << shift;
2092 static void init_dequant_tables(H264Context *h){
2094 init_dequant4_coeff_table(h);
2095 if(h->pps.transform_8x8_mode)
2096 init_dequant8_coeff_table(h);
2097 if(h->sps.transform_bypass){
2100 h->dequant4_coeff[i][0][x] = 1<<6;
2101 if(h->pps.transform_8x8_mode)
2104 h->dequant8_coeff[i][0][x] = 1<<6;
2111 * needs width/height
2113 static int alloc_tables(H264Context *h){
2114 MpegEncContext * const s = &h->s;
2115 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2118 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2120 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2121 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2124 if( h->pps.cabac ) {
2125 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2126 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2127 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
2138 const int mb_xy= x + y*s->mb_stride;
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2147 s->obmc_scratchpad = NULL;
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2159 * Mimic alloc_tables(), but for every context thread.
2161 static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2179 * Allocate buffers which are not shared amongst multiple threads.
2181 static int context_init(H264Context *h){
2182 MpegEncContext * const s = &h->s;
2184 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2185 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2187 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
2188 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
2189 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
2190 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
2193 return -1; // free_tables will clean up for us
2196 static void common_init(H264Context *h){
2197 MpegEncContext * const s = &h->s;
2199 s->width = s->avctx->width;
2200 s->height = s->avctx->height;
2201 s->codec_id= s->avctx->codec->id;
2203 ff_h264_pred_init(&h->hpc, s->codec_id);
2205 h->dequant_coeff_pps= -1;
2206 s->unrestricted_mv=1;
2207 s->decode=1; //FIXME
2209 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2210 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2213 static int decode_init(AVCodecContext *avctx){
2214 H264Context *h= avctx->priv_data;
2215 MpegEncContext * const s = &h->s;
2217 MPV_decode_defaults(s);
2222 s->out_format = FMT_H264;
2223 s->workaround_bugs= avctx->workaround_bugs;
2226 // s->decode_mb= ff_h263_decode_mb;
2227 s->quarter_sample = 1;
2229 avctx->pix_fmt= PIX_FMT_YUV420P;
2233 if(avctx->extradata_size > 0 && avctx->extradata &&
2234 *(char *)avctx->extradata == 1){
2241 h->thread_context[0] = h;
2245 static int frame_start(H264Context *h){
2246 MpegEncContext * const s = &h->s;
2249 if(MPV_frame_start(s, s->avctx) < 0)
2251 ff_er_frame_start(s);
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
2255 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2256 * See decode_nal_units().
2258 s->current_picture_ptr->key_frame= 0;
2260 assert(s->linesize && s->uvlinesize);
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2279 /* some macroblocks will be accessed before they're available */
2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2283 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2287 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2288 MpegEncContext * const s = &h->s;
2292 src_cb -= uvlinesize;
2293 src_cr -= uvlinesize;
2295 // There are two lines saved, the line above the the top macroblock of a pair,
2296 // and the line above the bottom macroblock
2297 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2298 for(i=1; i<17; i++){
2299 h->left_border[i]= src_y[15+i* linesize];
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2305 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2306 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2307 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2309 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2310 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2312 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2313 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2317 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2318 MpegEncContext * const s = &h->s;
2325 if(h->deblocking_filter == 2) {
2326 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2327 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2328 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2330 deblock_left = (s->mb_x > 0);
2331 deblock_top = (s->mb_y > 0);
2334 src_y -= linesize + 1;
2335 src_cb -= uvlinesize + 1;
2336 src_cr -= uvlinesize + 1;
2338 #define XCHG(a,b,t,xchg)\
2345 for(i = !deblock_top; i<17; i++){
2346 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2351 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2352 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2353 if(s->mb_x+1 < s->mb_width){
2354 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2358 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2360 for(i = !deblock_top; i<9; i++){
2361 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2362 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2366 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2367 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2372 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2373 MpegEncContext * const s = &h->s;
2376 src_y -= 2 * linesize;
2377 src_cb -= 2 * uvlinesize;
2378 src_cr -= 2 * uvlinesize;
2380 // There are two lines saved, the line above the the top macroblock of a pair,
2381 // and the line above the bottom macroblock
2382 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2383 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2384 for(i=2; i<34; i++){
2385 h->left_border[i]= src_y[15+i* linesize];
2388 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2389 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2390 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2391 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2393 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2394 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2395 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2396 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2397 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2398 for(i=2; i<18; i++){
2399 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2400 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2402 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2403 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2404 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2405 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2409 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2410 MpegEncContext * const s = &h->s;
2413 int deblock_left = (s->mb_x > 0);
2414 int deblock_top = (s->mb_y > 1);
2416 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2418 src_y -= 2 * linesize + 1;
2419 src_cb -= 2 * uvlinesize + 1;
2420 src_cr -= 2 * uvlinesize + 1;
2422 #define XCHG(a,b,t,xchg)\
2429 for(i = (!deblock_top)<<1; i<34; i++){
2430 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2435 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2439 if(s->mb_x+1 < s->mb_width){
2440 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2441 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2445 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2447 for(i = (!deblock_top) << 1; i<18; i++){
2448 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2449 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2453 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2454 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2455 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2456 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2461 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2462 MpegEncContext * const s = &h->s;
2463 const int mb_x= s->mb_x;
2464 const int mb_y= s->mb_y;
2465 const int mb_xy= mb_x + mb_y*s->mb_stride;
2466 const int mb_type= s->current_picture.mb_type[mb_xy];
2467 uint8_t *dest_y, *dest_cb, *dest_cr;
2468 int linesize, uvlinesize /*dct_offset*/;
2470 int *block_offset = &h->block_offset[0];
2471 const unsigned int bottom = mb_y & 1;
2472 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2473 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2474 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2476 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2477 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2478 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2480 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2481 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2483 if (!simple && MB_FIELD) {
2484 linesize = h->mb_linesize = s->linesize * 2;
2485 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2486 block_offset = &h->block_offset[24];
2487 if(mb_y&1){ //FIXME move out of this func?
2488 dest_y -= s->linesize*15;
2489 dest_cb-= s->uvlinesize*7;
2490 dest_cr-= s->uvlinesize*7;
2494 for(list=0; list<h->list_count; list++){
2495 if(!USES_LIST(mb_type, list))
2497 if(IS_16X16(mb_type)){
2498 int8_t *ref = &h->ref_cache[list][scan8[0]];
2499 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
2501 for(i=0; i<16; i+=4){
2502 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2503 int ref = h->ref_cache[list][scan8[i]];
2505 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
2511 linesize = h->mb_linesize = s->linesize;
2512 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2513 // dct_offset = s->linesize * 16;
2516 if(transform_bypass){
2518 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2519 }else if(IS_8x8DCT(mb_type)){
2520 idct_dc_add = s->dsp.h264_idct8_dc_add;
2521 idct_add = s->dsp.h264_idct8_add;
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2527 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2528 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2529 int mbt_y = mb_y&~1;
2530 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2531 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2532 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2533 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2536 if (!simple && IS_INTRA_PCM(mb_type)) {
2539 // The pixels are stored in h->mb array in the same order as levels,
2540 // copy them in output in the correct order.
2541 for(i=0; i<16; i++) {
2542 for (y=0; y<4; y++) {
2543 for (x=0; x<4; x++) {
2544 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2548 for(i=16; i<16+4; i++) {
2549 for (y=0; y<4; y++) {
2550 for (x=0; x<4; x++) {
2551 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2555 for(i=20; i<20+4; i++) {
2556 for (y=0; y<4; y++) {
2557 for (x=0; x<4; x++) {
2558 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2563 if(IS_INTRA(mb_type)){
2564 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2565 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2567 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2568 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2569 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2572 if(IS_INTRA4x4(mb_type)){
2573 if(simple || !s->encoding){
2574 if(IS_8x8DCT(mb_type)){
2575 for(i=0; i<16; i+=4){
2576 uint8_t * const ptr= dest_y + block_offset[i];
2577 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2579 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2580 (h->topright_samples_available<<i)&0x4000, linesize);
2582 if(nnz == 1 && h->mb[i*16])
2583 idct_dc_add(ptr, h->mb + i*16, linesize);
2585 idct_add(ptr, h->mb + i*16, linesize);
2589 for(i=0; i<16; i++){
2590 uint8_t * const ptr= dest_y + block_offset[i];
2592 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2595 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2596 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2597 assert(mb_y || linesize <= block_offset[i]);
2598 if(!topright_avail){
2599 tr= ptr[3 - linesize]*0x01010101;
2600 topright= (uint8_t*) &tr;
2602 topright= ptr + 4 - linesize;
2606 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2607 nnz = h->non_zero_count_cache[ scan8[i] ];
2610 if(nnz == 1 && h->mb[i*16])
2611 idct_dc_add(ptr, h->mb + i*16, linesize);
2613 idct_add(ptr, h->mb + i*16, linesize);
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2620 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2622 if(!transform_bypass)
2623 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2625 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2627 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2628 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2630 hl_motion(h, dest_y, dest_cb, dest_cr,
2631 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2632 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2633 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2637 if(!IS_INTRA4x4(mb_type)){
2639 if(IS_INTRA16x16(mb_type)){
2640 for(i=0; i<16; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2647 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2648 for(i=0; i<16; i+=di){
2649 int nnz = h->non_zero_count_cache[ scan8[i] ];
2651 if(nnz==1 && h->mb[i*16])
2652 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2654 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2659 for(i=0; i<16; i++){
2660 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2661 uint8_t * const ptr= dest_y + block_offset[i];
2662 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2668 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2669 uint8_t *dest[2] = {dest_cb, dest_cr};
2670 if(transform_bypass){
2671 idct_add = idct_dc_add = s->dsp.add_pixels4;
2673 idct_add = s->dsp.h264_idct_add;
2674 idct_dc_add = s->dsp.h264_idct_dc_add;
2675 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2676 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2679 for(i=16; i<16+8; i++){
2680 if(h->non_zero_count_cache[ scan8[i] ])
2681 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2682 else if(h->mb[i*16])
2683 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2686 for(i=16; i<16+8; i++){
2687 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2688 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2689 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2695 if(h->deblocking_filter) {
2696 if (!simple && FRAME_MBAFF) {
2697 //FIXME try deblocking one mb at a time?
2698 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2699 const int mb_y = s->mb_y - 1;
2700 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2701 const int mb_xy= mb_x + mb_y*s->mb_stride;
2702 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2703 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2704 if (!bottom) return;
2705 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2706 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2707 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2709 if(IS_INTRA(mb_type_top | mb_type_bottom))
2710 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2712 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2716 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2717 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2718 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2719 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2720 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2723 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2724 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2725 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2726 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2727 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2729 tprintf(h->s.avctx, "call filter_mb\n");
2730 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2731 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2732 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2738 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2740 static void hl_decode_mb_simple(H264Context *h){
2741 hl_decode_mb_internal(h, 1);
2745 * Process a macroblock; this handles edge cases, such as interlacing.
2747 static void av_noinline hl_decode_mb_complex(H264Context *h){
2748 hl_decode_mb_internal(h, 0);
2751 static void hl_decode_mb(H264Context *h){
2752 MpegEncContext * const s = &h->s;
2753 const int mb_x= s->mb_x;
2754 const int mb_y= s->mb_y;
2755 const int mb_xy= mb_x + mb_y*s->mb_stride;
2756 const int mb_type= s->current_picture.mb_type[mb_xy];
2757 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2763 hl_decode_mb_complex(h);
2764 else hl_decode_mb_simple(h);
2767 static void pic_as_field(Picture *pic, const int bottom){
2769 for (i = 0; i < 4; ++i) {
2771 pic->data[i] += pic->linesize[i];
2772 pic->linesize[i] *= 2;
2776 static int split_field_copy(Picture *dest, Picture *src,
2777 int parity, int id_add){
2778 int match = !!(src->reference & parity);
2782 pic_as_field(dest, parity == PICT_BOTTOM_FIELD);
2784 dest->pic_id += id_add;
2791 * Split one reference list into field parts, interleaving by parity
2792 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2793 * set to look at the actual start of data for that field.
2795 * @param dest output list
2796 * @param dest_len maximum number of fields to put in dest
2797 * @param src the source reference list containing fields and/or field pairs
2798 * (aka short_ref/long_ref, or
2799 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2800 * @param src_len number of Picture's in source (pairs and unmatched fields)
2801 * @param parity the parity of the picture being decoded/needing
2802 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2803 * @return number of fields placed in dest
2805 static int split_field_half_ref_list(Picture *dest, int dest_len,
2806 Picture *src, int src_len, int parity){
2807 int same_parity = 1;
2813 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2814 if (same_parity && same_i < src_len) {
2815 field_output = split_field_copy(dest + out_i, src + same_i,
2817 same_parity = !field_output;
2820 } else if (opp_i < src_len) {
2821 field_output = split_field_copy(dest + out_i, src + opp_i,
2822 PICT_FRAME - parity, 0);
2823 same_parity = field_output;
2835 * Split the reference frame list into a reference field list.
2836 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2837 * The input list contains both reference field pairs and
2838 * unmatched reference fields; it is ordered as spec describes
2839 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2840 * unmatched field pairs are also present. Conceptually this is equivalent
2841 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2843 * @param dest output reference list where ordered fields are to be placed
2844 * @param dest_len max number of fields to place at dest
2845 * @param src source reference list, as described above
2846 * @param src_len number of pictures (pairs and unmatched fields) in src
2847 * @param parity parity of field being currently decoded
2848 * (one of PICT_{TOP,BOTTOM}_FIELD)
2849 * @param long_i index into src array that holds first long reference picture,
2850 * or src_len if no long refs present.
2852 static int split_field_ref_list(Picture *dest, int dest_len,
2853 Picture *src, int src_len,
2854 int parity, int long_i){
2856 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2860 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2861 src_len - long_i, parity);
2866 * fills the default_ref_list.
2868 static int fill_default_ref_list(H264Context *h){
2869 MpegEncContext * const s = &h->s;
2871 int smallest_poc_greater_than_current = -1;
2873 Picture sorted_short_ref[32];
2874 Picture field_entry_list[2][32];
2875 Picture *frame_list[2];
2877 if (FIELD_PICTURE) {
2878 structure_sel = PICT_FRAME;
2879 frame_list[0] = field_entry_list[0];
2880 frame_list[1] = field_entry_list[1];
2883 frame_list[0] = h->default_ref_list[0];
2884 frame_list[1] = h->default_ref_list[1];
2887 if(h->slice_type==B_TYPE){
2894 /* sort frame according to poc in B slice */
2895 for(out_i=0; out_i<h->short_ref_count; out_i++){
2897 int best_poc=INT_MAX;
2899 for(i=0; i<h->short_ref_count; i++){
2900 const int poc= h->short_ref[i]->poc;
2901 if(poc > limit && poc < best_poc){
2907 assert(best_i != INT_MIN);
2910 sorted_short_ref[out_i]= *h->short_ref[best_i];
2911 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2912 if (-1 == smallest_poc_greater_than_current) {
2913 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2914 smallest_poc_greater_than_current = out_i;
2919 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2921 // find the largest poc
2922 for(list=0; list<2; list++){
2925 int step= list ? -1 : 1;
2927 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2929 while(j<0 || j>= h->short_ref_count){
2930 if(j != -99 && step == (list ? -1 : 1))
2933 j= smallest_poc_greater_than_current + (step>>1);
2935 sel = sorted_short_ref[j].reference | structure_sel;
2936 if(sel != PICT_FRAME) continue;
2937 frame_list[list][index ]= sorted_short_ref[j];
2938 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2940 short_len[list] = index;
2942 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2944 if(h->long_ref[i] == NULL) continue;
2945 sel = h->long_ref[i]->reference | structure_sel;
2946 if(sel != PICT_FRAME) continue;
2948 frame_list[ list ][index ]= *h->long_ref[i];
2949 frame_list[ list ][index++].pic_id= i;;
2953 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
2954 // swap the two first elements of L1 when
2955 // L0 and L1 are identical
2956 Picture temp= frame_list[1][0];
2957 frame_list[1][0] = frame_list[1][1];
2958 frame_list[1][1] = temp;
2963 for(list=0; list<2; list++){
2965 len[list] = split_field_ref_list(h->default_ref_list[list],
2969 s->picture_structure,
2972 if(len[list] < h->ref_count[ list ])
2973 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2980 for(i=0; i<h->short_ref_count; i++){
2982 sel = h->short_ref[i]->reference | structure_sel;
2983 if(sel != PICT_FRAME) continue;
2984 frame_list[0][index ]= *h->short_ref[i];
2985 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2988 for(i = 0; i < 16; i++){
2990 if(h->long_ref[i] == NULL) continue;
2991 sel = h->long_ref[i]->reference | structure_sel;
2992 if(sel != PICT_FRAME) continue;
2993 frame_list[0][index ]= *h->long_ref[i];
2994 frame_list[0][index++].pic_id= i;;
2998 index = split_field_ref_list(h->default_ref_list[0],
2999 h->ref_count[0], frame_list[0],
3000 index, s->picture_structure,
3003 if(index < h->ref_count[0])
3004 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3007 for (i=0; i<h->ref_count[0]; i++) {
3008 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3010 if(h->slice_type==B_TYPE){
3011 for (i=0; i<h->ref_count[1]; i++) {
3012 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3019 static void print_short_term(H264Context *h);
3020 static void print_long_term(H264Context *h);
3023 * Extract structure information about the picture described by pic_num in
3024 * the current decoding context (frame or field). Note that pic_num is
3025 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3026 * @param pic_num picture number for which to extract structure information
3027 * @param structure one of PICT_XXX describing structure of picture
3029 * @return frame number (short term) or long term index of picture
3030 * described by pic_num
3032 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3033 MpegEncContext * const s = &h->s;
3035 *structure = s->picture_structure;
3038 /* opposite field */
3039 *structure ^= PICT_FRAME;
3046 static int decode_ref_pic_list_reordering(H264Context *h){
3047 MpegEncContext * const s = &h->s;
3048 int list, index, pic_structure;
3050 print_short_term(h);
3052 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3054 for(list=0; list<h->list_count; list++){
3055 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3057 if(get_bits1(&s->gb)){
3058 int pred= h->curr_pic_num;
3060 for(index=0; ; index++){
3061 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3062 unsigned int pic_id;
3064 Picture *ref = NULL;
3066 if(reordering_of_pic_nums_idc==3)
3069 if(index >= h->ref_count[list]){
3070 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3074 if(reordering_of_pic_nums_idc<3){
3075 if(reordering_of_pic_nums_idc<2){
3076 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3079 if(abs_diff_pic_num >= h->max_pic_num){
3080 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3084 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3085 else pred+= abs_diff_pic_num;
3086 pred &= h->max_pic_num - 1;
3088 frame_num = pic_num_extract(h, pred, &pic_structure);
3090 for(i= h->short_ref_count-1; i>=0; i--){
3091 ref = h->short_ref[i];
3092 assert(ref->reference);
3093 assert(!ref->long_ref);
3094 if(ref->data[0] != NULL &&
3095 ref->frame_num == frame_num &&
3096 (ref->reference & pic_structure) &&
3097 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3104 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3106 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3109 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3112 ref = h->long_ref[long_idx];
3113 assert(!(ref && !ref->reference));
3114 if(ref && (ref->reference & pic_structure)){
3115 ref->pic_id= pic_id;
3116 assert(ref->long_ref);
3124 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3125 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3127 for(i=index; i+1<h->ref_count[list]; i++){
3128 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3131 for(; i > index; i--){
3132 h->ref_list[list][i]= h->ref_list[list][i-1];
3134 h->ref_list[list][index]= *ref;
3136 int bot = pic_structure == PICT_BOTTOM_FIELD;
3137 pic_as_field(&h->ref_list[list][index], bot);
3141 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3147 for(list=0; list<h->list_count; list++){
3148 for(index= 0; index < h->ref_count[list]; index++){
3149 if(!h->ref_list[list][index].data[0])
3150 h->ref_list[list][index]= s->current_picture;
3154 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3155 direct_dist_scale_factor(h);
3156 direct_ref_list_init(h);
3160 static void fill_mbaff_ref_list(H264Context *h){
3162 for(list=0; list<2; list++){ //FIXME try list_count
3163 for(i=0; i<h->ref_count[list]; i++){
3164 Picture *frame = &h->ref_list[list][i];
3165 Picture *field = &h->ref_list[list][16+2*i];
3168 field[0].linesize[j] <<= 1;
3169 field[1] = field[0];
3171 field[1].data[j] += frame->linesize[j];
3173 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3174 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3176 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3177 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3181 for(j=0; j<h->ref_count[1]; j++){
3182 for(i=0; i<h->ref_count[0]; i++)
3183 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3184 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3185 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3189 static int pred_weight_table(H264Context *h){
3190 MpegEncContext * const s = &h->s;
3192 int luma_def, chroma_def;
3195 h->use_weight_chroma= 0;
3196 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3197 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3198 luma_def = 1<<h->luma_log2_weight_denom;
3199 chroma_def = 1<<h->chroma_log2_weight_denom;
3201 for(list=0; list<2; list++){
3202 for(i=0; i<h->ref_count[list]; i++){
3203 int luma_weight_flag, chroma_weight_flag;
3205 luma_weight_flag= get_bits1(&s->gb);
3206 if(luma_weight_flag){
3207 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3208 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3209 if( h->luma_weight[list][i] != luma_def
3210 || h->luma_offset[list][i] != 0)
3213 h->luma_weight[list][i]= luma_def;
3214 h->luma_offset[list][i]= 0;
3217 chroma_weight_flag= get_bits1(&s->gb);
3218 if(chroma_weight_flag){
3221 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3222 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3223 if( h->chroma_weight[list][i][j] != chroma_def
3224 || h->chroma_offset[list][i][j] != 0)
3225 h->use_weight_chroma= 1;
3230 h->chroma_weight[list][i][j]= chroma_def;
3231 h->chroma_offset[list][i][j]= 0;
3235 if(h->slice_type != B_TYPE) break;
3237 h->use_weight= h->use_weight || h->use_weight_chroma;
3241 static void implicit_weight_table(H264Context *h){
3242 MpegEncContext * const s = &h->s;
3244 int cur_poc = s->current_picture_ptr->poc;
3246 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3247 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3249 h->use_weight_chroma= 0;
3254 h->use_weight_chroma= 2;
3255 h->luma_log2_weight_denom= 5;
3256 h->chroma_log2_weight_denom= 5;
3258 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3259 int poc0 = h->ref_list[0][ref0].poc;
3260 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3261 int poc1 = h->ref_list[1][ref1].poc;
3262 int td = av_clip(poc1 - poc0, -128, 127);
3264 int tb = av_clip(cur_poc - poc0, -128, 127);
3265 int tx = (16384 + (FFABS(td) >> 1)) / td;
3266 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3267 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3268 h->implicit_weight[ref0][ref1] = 32;
3270 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3272 h->implicit_weight[ref0][ref1] = 32;
3278 * Mark a picture as no longer needed for reference. The refmask
3279 * argument allows unreferencing of individual fields or the whole frame.
3280 * If the picture becomes entirely unreferenced, but is being held for
3281 * display purposes, it is marked as such.
3282 * @param refmask mask of fields to unreference; the mask is bitwise
3283 * anded with the reference marking of pic
3284 * @return non-zero if pic becomes entirely unreferenced (except possibly
3285 * for display purposes) zero if one of the fields remains in
3288 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3290 if (pic->reference &= refmask) {
3293 if(pic == h->delayed_output_pic)
3294 pic->reference=DELAYED_PIC_REF;
3296 for(i = 0; h->delayed_pic[i]; i++)
3297 if(pic == h->delayed_pic[i]){
3298 pic->reference=DELAYED_PIC_REF;
3307 * instantaneous decoder refresh.
3309 static void idr(H264Context *h){
3312 for(i=0; i<16; i++){
3313 if (h->long_ref[i] != NULL) {
3314 unreference_pic(h, h->long_ref[i], 0);
3315 h->long_ref[i]= NULL;
3318 h->long_ref_count=0;
3320 for(i=0; i<h->short_ref_count; i++){
3321 unreference_pic(h, h->short_ref[i], 0);
3322 h->short_ref[i]= NULL;
3324 h->short_ref_count=0;
3327 /* forget old pics after a seek */
3328 static void flush_dpb(AVCodecContext *avctx){
3329 H264Context *h= avctx->priv_data;
3331 for(i=0; i<16; i++) {
3332 if(h->delayed_pic[i])
3333 h->delayed_pic[i]->reference= 0;
3334 h->delayed_pic[i]= NULL;
3336 if(h->delayed_output_pic)
3337 h->delayed_output_pic->reference= 0;
3338 h->delayed_output_pic= NULL;
3340 if(h->s.current_picture_ptr)
3341 h->s.current_picture_ptr->reference= 0;
3345 * Find a Picture in the short term reference list by frame number.
3346 * @param frame_num frame number to search for
3347 * @param idx the index into h->short_ref where returned picture is found
3348 * undefined if no picture found.
3349 * @return pointer to the found picture, or NULL if no pic with the provided
3350 * frame number is found
3352 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3353 MpegEncContext * const s = &h->s;
3356 for(i=0; i<h->short_ref_count; i++){
3357 Picture *pic= h->short_ref[i];
3358 if(s->avctx->debug&FF_DEBUG_MMCO)
3359 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3360 if(pic->frame_num == frame_num) {
3369 * Remove a picture from the short term reference list by its index in
3370 * that list. This does no checking on the provided index; it is assumed
3371 * to be valid. Other list entries are shifted down.
3372 * @param i index into h->short_ref of picture to remove.
3374 static void remove_short_at_index(H264Context *h, int i){
3375 assert(i > 0 && i < h->short_ref_count);
3376 h->short_ref[i]= NULL;
3377 if (--h->short_ref_count)
3378 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3383 * @return the removed picture or NULL if an error occurs
3385 static Picture * remove_short(H264Context *h, int frame_num){
3386 MpegEncContext * const s = &h->s;
3390 if(s->avctx->debug&FF_DEBUG_MMCO)
3391 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3393 pic = find_short(h, frame_num, &i);
3395 remove_short_at_index(h, i);
3401 * Remove a picture from the long term reference list by its index in
3402 * that list. This does no checking on the provided index; it is assumed
3403 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3404 * @param i index into h->long_ref of picture to remove.
3406 static void remove_long_at_index(H264Context *h, int i){
3407 h->long_ref[i]= NULL;
3408 h->long_ref_count--;
3413 * @return the removed picture or NULL if an error occurs
3415 static Picture * remove_long(H264Context *h, int i){
3418 pic= h->long_ref[i];
3420 remove_long_at_index(h, i);
3426 * print short term list
3428 static void print_short_term(H264Context *h) {
3430 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3431 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3432 for(i=0; i<h->short_ref_count; i++){
3433 Picture *pic= h->short_ref[i];
3434 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3440 * print long term list
3442 static void print_long_term(H264Context *h) {
3444 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3445 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3446 for(i = 0; i < 16; i++){
3447 Picture *pic= h->long_ref[i];
3449 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3456 * Executes the reference picture marking (memory management control operations).
3458 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3459 MpegEncContext * const s = &h->s;
3461 int current_is_long=0;
3464 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3465 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3467 for(i=0; i<mmco_count; i++){
3468 if(s->avctx->debug&FF_DEBUG_MMCO)
3469 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3471 switch(mmco[i].opcode){
3472 case MMCO_SHORT2UNUSED:
3473 pic= remove_short(h, mmco[i].short_pic_num);
3475 unreference_pic(h, pic, 0);
3476 else if(s->avctx->debug&FF_DEBUG_MMCO)
3477 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3479 case MMCO_SHORT2LONG:
3480 pic= remove_long(h, mmco[i].long_arg);
3481 if(pic) unreference_pic(h, pic, 0);
3483 h->long_ref[ mmco[i].long_arg ]= remove_short(h, mmco[i].short_pic_num);
3484 if (h->long_ref[ mmco[i].long_arg ]){
3485 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3486 h->long_ref_count++;
3489 case MMCO_LONG2UNUSED:
3490 pic= remove_long(h, mmco[i].long_arg);
3492 unreference_pic(h, pic, 0);
3493 else if(s->avctx->debug&FF_DEBUG_MMCO)
3494 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3497 pic= remove_long(h, mmco[i].long_arg);
3498 if(pic) unreference_pic(h, pic, 0);
3500 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3501 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3502 h->long_ref_count++;
3506 case MMCO_SET_MAX_LONG:
3507 assert(mmco[i].long_arg <= 16);
3508 // just remove the long term which index is greater than new max
3509 for(j = mmco[i].long_arg; j<16; j++){
3510 pic = remove_long(h, j);
3511 if (pic) unreference_pic(h, pic, 0);
3515 while(h->short_ref_count){
3516 pic= remove_short(h, h->short_ref[0]->frame_num);
3517 if(pic) unreference_pic(h, pic, 0);
3519 for(j = 0; j < 16; j++) {
3520 pic= remove_long(h, j);
3521 if(pic) unreference_pic(h, pic, 0);
3528 if(!current_is_long){
3529 pic= remove_short(h, s->current_picture_ptr->frame_num);
3531 unreference_pic(h, pic, 0);
3532 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3535 if(h->short_ref_count)
3536 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3538 h->short_ref[0]= s->current_picture_ptr;
3539 h->short_ref[0]->long_ref=0;
3540 h->short_ref_count++;
3543 print_short_term(h);
3548 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3549 MpegEncContext * const s = &h->s;
3552 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3553 s->broken_link= get_bits1(gb) -1;
3554 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3555 if(h->mmco[0].long_arg == -1)
3558 h->mmco[0].opcode= MMCO_LONG;
3562 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3563 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3564 MMCOOpcode opcode= get_ue_golomb(gb);
3566 h->mmco[i].opcode= opcode;
3567 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3568 h->mmco[i].short_pic_num= (h->frame_num - get_ue_golomb(gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
3569 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3570 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3574 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3575 unsigned int long_arg= get_ue_golomb(gb);
3576 if(/*h->mmco[i].long_arg >= h->long_ref_count || h->long_ref[ h->mmco[i].long_arg ] == NULL*/ long_arg >= 16){
3577 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3580 h->mmco[i].long_arg= long_arg;
3583 if(opcode > (unsigned)MMCO_LONG){
3584 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3587 if(opcode == MMCO_END)
3592 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3594 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
3595 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3596 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3606 static int init_poc(H264Context *h){
3607 MpegEncContext * const s = &h->s;
3608 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3611 if(h->nal_unit_type == NAL_IDR_SLICE){
3612 h->frame_num_offset= 0;
3614 if(h->frame_num < h->prev_frame_num)
3615 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3617 h->frame_num_offset= h->prev_frame_num_offset;
3620 if(h->sps.poc_type==0){
3621 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3623 if(h->nal_unit_type == NAL_IDR_SLICE){
3628 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3629 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3630 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3631 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3633 h->poc_msb = h->prev_poc_msb;
3634 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3636 field_poc[1] = h->poc_msb + h->poc_lsb;
3637 if(s->picture_structure == PICT_FRAME)
3638 field_poc[1] += h->delta_poc_bottom;
3639 }else if(h->sps.poc_type==1){
3640 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3643 if(h->sps.poc_cycle_length != 0)
3644 abs_frame_num = h->frame_num_offset + h->frame_num;
3648 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3651 expected_delta_per_poc_cycle = 0;
3652 for(i=0; i < h->sps.poc_cycle_length; i++)
3653 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3655 if(abs_frame_num > 0){
3656 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3657 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3659 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3660 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3661 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3665 if(h->nal_ref_idc == 0)
3666 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3668 field_poc[0] = expectedpoc + h->delta_poc[0];
3669 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3671 if(s->picture_structure == PICT_FRAME)
3672 field_poc[1] += h->delta_poc[1];
3675 if(h->nal_unit_type == NAL_IDR_SLICE){
3678 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3679 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3685 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3686 s->current_picture_ptr->field_poc[0]= field_poc[0];
3687 s->current_picture_ptr->poc = field_poc[0];
3689 if(s->picture_structure != PICT_TOP_FIELD) {
3690 s->current_picture_ptr->field_poc[1]= field_poc[1];
3691 s->current_picture_ptr->poc = field_poc[1];
3693 if(!FIELD_PICTURE || !s->first_field)
3694 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
3701 * initialize scan tables
3703 static void init_scan_tables(H264Context *h){
3704 MpegEncContext * const s = &h->s;
3706 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3707 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3708 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3710 for(i=0; i<16; i++){
3711 #define T(x) (x>>2) | ((x<<2) & 0xF)
3712 h->zigzag_scan[i] = T(zigzag_scan[i]);
3713 h-> field_scan[i] = T( field_scan[i]);
3717 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3718 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3719 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3720 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3721 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3723 for(i=0; i<64; i++){
3724 #define T(x) (x>>3) | ((x&7)<<3)
3725 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3726 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3727 h->field_scan8x8[i] = T(field_scan8x8[i]);
3728 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3732 if(h->sps.transform_bypass){ //FIXME same ugly
3733 h->zigzag_scan_q0 = zigzag_scan;
3734 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3735 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3736 h->field_scan_q0 = field_scan;
3737 h->field_scan8x8_q0 = field_scan8x8;
3738 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3740 h->zigzag_scan_q0 = h->zigzag_scan;
3741 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3742 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3743 h->field_scan_q0 = h->field_scan;
3744 h->field_scan8x8_q0 = h->field_scan8x8;
3745 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3750 * Replicates H264 "master" context to thread contexts.
3752 static void clone_slice(H264Context *dst, H264Context *src)
3754 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3755 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3756 dst->s.current_picture = src->s.current_picture;
3757 dst->s.linesize = src->s.linesize;
3758 dst->s.uvlinesize = src->s.uvlinesize;
3760 dst->prev_poc_msb = src->prev_poc_msb;
3761 dst->prev_poc_lsb = src->prev_poc_lsb;
3762 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3763 dst->prev_frame_num = src->prev_frame_num;
3764 dst->short_ref_count = src->short_ref_count;
3766 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3767 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3768 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3769 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3771 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3772 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3776 * decodes a slice header.
3777 * this will allso call MPV_common_init() and frame_start() as needed
3779 * @param h h264context
3780 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3782 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3784 static int decode_slice_header(H264Context *h, H264Context *h0){
3785 MpegEncContext * const s = &h->s;
3786 unsigned int first_mb_in_slice;
3787 unsigned int pps_id;
3788 int num_ref_idx_active_override_flag;
3789 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3790 unsigned int slice_type, tmp, i;
3791 int default_ref_list_done = 0;
3793 s->dropable= h->nal_ref_idc == 0;
3795 first_mb_in_slice= get_ue_golomb(&s->gb);
3797 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3798 h0->current_slice = 0;
3799 s->current_picture_ptr= NULL;
3802 slice_type= get_ue_golomb(&s->gb);
3804 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3809 h->slice_type_fixed=1;
3811 h->slice_type_fixed=0;
3813 slice_type= slice_type_map[ slice_type ];
3814 if (slice_type == I_TYPE
3815 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3816 default_ref_list_done = 1;
3818 h->slice_type= slice_type;
3820 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3822 pps_id= get_ue_golomb(&s->gb);
3823 if(pps_id>=MAX_PPS_COUNT){
3824 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3827 if(!h0->pps_buffers[pps_id]) {
3828 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3831 h->pps= *h0->pps_buffers[pps_id];
3833 if(!h0->sps_buffers[h->pps.sps_id]) {
3834 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3837 h->sps = *h0->sps_buffers[h->pps.sps_id];
3839 if(h == h0 && h->dequant_coeff_pps != pps_id){
3840 h->dequant_coeff_pps = pps_id;
3841 init_dequant_tables(h);
3844 s->mb_width= h->sps.mb_width;
3845 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3847 h->b_stride= s->mb_width*4;
3848 h->b8_stride= s->mb_width*2;
3850 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3851 if(h->sps.frame_mbs_only_flag)
3852 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3854 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3856 if (s->context_initialized
3857 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3859 return -1; // width / height changed during parallelized decoding
3863 if (!s->context_initialized) {
3865 return -1; // we cant (re-)initialize context during parallel decoding
3866 if (MPV_common_init(s) < 0)
3869 init_scan_tables(h);
3872 for(i = 1; i < s->avctx->thread_count; i++) {
3874 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3875 memcpy(c, h, sizeof(MpegEncContext));
3876 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3879 init_scan_tables(c);
3883 for(i = 0; i < s->avctx->thread_count; i++)
3884 if(context_init(h->thread_context[i]) < 0)
3887 s->avctx->width = s->width;
3888 s->avctx->height = s->height;
3889 s->avctx->sample_aspect_ratio= h->sps.sar;
3890 if(!s->avctx->sample_aspect_ratio.den)
3891 s->avctx->sample_aspect_ratio.den = 1;
3893 if(h->sps.timing_info_present_flag){
3894 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3895 if(h->x264_build > 0 && h->x264_build < 44)
3896 s->avctx->time_base.den *= 2;
3897 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3898 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3902 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3905 h->mb_aff_frame = 0;
3906 if(h->sps.frame_mbs_only_flag){
3907 s->picture_structure= PICT_FRAME;
3909 if(get_bits1(&s->gb)) { //field_pic_flag
3910 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3911 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
3913 s->picture_structure= PICT_FRAME;
3914 h->mb_aff_frame = h->sps.mb_aff;
3918 if(h0->current_slice == 0){
3919 if(frame_start(h) < 0)
3925 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3927 assert(s->mb_num == s->mb_width * s->mb_height);
3928 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3929 first_mb_in_slice >= s->mb_num){
3930 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3933 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3934 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3935 if (s->picture_structure == PICT_BOTTOM_FIELD)
3936 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3937 assert(s->mb_y < s->mb_height);
3939 if(s->picture_structure==PICT_FRAME){
3940 h->curr_pic_num= h->frame_num;
3941 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3943 h->curr_pic_num= 2*h->frame_num + 1;
3944 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3947 if(h->nal_unit_type == NAL_IDR_SLICE){
3948 get_ue_golomb(&s->gb); /* idr_pic_id */
3951 if(h->sps.poc_type==0){
3952 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3954 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3955 h->delta_poc_bottom= get_se_golomb(&s->gb);
3959 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3960 h->delta_poc[0]= get_se_golomb(&s->gb);
3962 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3963 h->delta_poc[1]= get_se_golomb(&s->gb);
3968 if(h->pps.redundant_pic_cnt_present){
3969 h->redundant_pic_count= get_ue_golomb(&s->gb);
3972 //set defaults, might be overriden a few line later
3973 h->ref_count[0]= h->pps.ref_count[0];
3974 h->ref_count[1]= h->pps.ref_count[1];
3976 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
3977 if(h->slice_type == B_TYPE){
3978 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3979 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
3980 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
3982 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3984 if(num_ref_idx_active_override_flag){
3985 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3986 if(h->slice_type==B_TYPE)
3987 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3989 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3990 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3991 h->ref_count[0]= h->ref_count[1]= 1;
3995 if(h->slice_type == B_TYPE)
4002 if(!default_ref_list_done){
4003 fill_default_ref_list(h);
4006 if(decode_ref_pic_list_reordering(h) < 0)
4009 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4010 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4011 pred_weight_table(h);
4012 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4013 implicit_weight_table(h);
4018 decode_ref_pic_marking(h0, &s->gb);
4021 fill_mbaff_ref_list(h);
4023 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4024 tmp = get_ue_golomb(&s->gb);
4026 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4029 h->cabac_init_idc= tmp;
4032 h->last_qscale_diff = 0;
4033 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4035 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4039 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4040 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4041 //FIXME qscale / qp ... stuff
4042 if(h->slice_type == SP_TYPE){
4043 get_bits1(&s->gb); /* sp_for_switch_flag */
4045 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4046 get_se_golomb(&s->gb); /* slice_qs_delta */
4049 h->deblocking_filter = 1;
4050 h->slice_alpha_c0_offset = 0;
4051 h->slice_beta_offset = 0;
4052 if( h->pps.deblocking_filter_parameters_present ) {
4053 tmp= get_ue_golomb(&s->gb);
4055 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4058 h->deblocking_filter= tmp;
4059 if(h->deblocking_filter < 2)
4060 h->deblocking_filter^= 1; // 1<->0
4062 if( h->deblocking_filter ) {
4063 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4064 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4068 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4069 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4070 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4071 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4072 h->deblocking_filter= 0;
4074 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4075 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4076 /* Cheat slightly for speed:
4077 Dont bother to deblock across slices */
4078 h->deblocking_filter = 2;
4080 h0->max_contexts = 1;
4081 if(!h0->single_decode_warning) {
4082 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4083 h0->single_decode_warning = 1;
4086 return 1; // deblocking switched inside frame
4091 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4092 slice_group_change_cycle= get_bits(&s->gb, ?);
4095 h0->last_slice_type = slice_type;
4096 h->slice_num = ++h0->current_slice;
4098 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4099 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4101 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4102 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4104 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4106 av_get_pict_type_char(h->slice_type),
4107 pps_id, h->frame_num,
4108 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4109 h->ref_count[0], h->ref_count[1],
4111 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4113 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4117 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
4118 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4119 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4121 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4122 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4131 static inline int get_level_prefix(GetBitContext *gb){
4135 OPEN_READER(re, gb);
4136 UPDATE_CACHE(re, gb);
4137 buf=GET_CACHE(re, gb);
4139 log= 32 - av_log2(buf);
4141 print_bin(buf>>(32-log), log);
4142 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4145 LAST_SKIP_BITS(re, gb, log);
4146 CLOSE_READER(re, gb);
4151 static inline int get_dct8x8_allowed(H264Context *h){
4154 if(!IS_SUB_8X8(h->sub_mb_type[i])
4155 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4162 * decodes a residual block.
4163 * @param n block index
4164 * @param scantable scantable
4165 * @param max_coeff number of coefficients in the block
4166 * @return <0 if an error occured
4168 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4169 MpegEncContext * const s = &h->s;
4170 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4172 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4174 //FIXME put trailing_onex into the context
4176 if(n == CHROMA_DC_BLOCK_INDEX){
4177 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4178 total_coeff= coeff_token>>2;
4180 if(n == LUMA_DC_BLOCK_INDEX){
4181 total_coeff= pred_non_zero_count(h, 0);
4182 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4183 total_coeff= coeff_token>>2;
4185 total_coeff= pred_non_zero_count(h, n);
4186 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4187 total_coeff= coeff_token>>2;
4188 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4192 //FIXME set last_non_zero?
4196 if(total_coeff > (unsigned)max_coeff) {
4197 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4201 trailing_ones= coeff_token&3;
4202 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4203 assert(total_coeff<=16);
4205 for(i=0; i<trailing_ones; i++){
4206 level[i]= 1 - 2*get_bits1(gb);
4210 int level_code, mask;
4211 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4212 int prefix= get_level_prefix(gb);
4214 //first coefficient has suffix_length equal to 0 or 1
4215 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4217 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4219 level_code= (prefix<<suffix_length); //part
4220 }else if(prefix==14){
4222 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4224 level_code= prefix + get_bits(gb, 4); //part
4225 }else if(prefix==15){
4226 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4227 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4229 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4233 if(trailing_ones < 3) level_code += 2;
4238 mask= -(level_code&1);
4239 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4242 //remaining coefficients have suffix_length > 0
4243 for(;i<total_coeff;i++) {
4244 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4245 prefix = get_level_prefix(gb);
4247 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4248 }else if(prefix==15){
4249 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4251 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4254 mask= -(level_code&1);
4255 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4256 if(level_code > suffix_limit[suffix_length])
4261 if(total_coeff == max_coeff)
4264 if(n == CHROMA_DC_BLOCK_INDEX)
4265 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4267 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4270 coeff_num = zeros_left + total_coeff - 1;
4271 j = scantable[coeff_num];
4273 block[j] = level[0];
4274 for(i=1;i<total_coeff;i++) {
4277 else if(zeros_left < 7){
4278 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4280 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4282 zeros_left -= run_before;
4283 coeff_num -= 1 + run_before;
4284 j= scantable[ coeff_num ];
4289 block[j] = (level[0] * qmul[j] + 32)>>6;
4290 for(i=1;i<total_coeff;i++) {
4293 else if(zeros_left < 7){
4294 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4296 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4298 zeros_left -= run_before;
4299 coeff_num -= 1 + run_before;
4300 j= scantable[ coeff_num ];
4302 block[j]= (level[i] * qmul[j] + 32)>>6;
4307 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4314 static void predict_field_decoding_flag(H264Context *h){
4315 MpegEncContext * const s = &h->s;
4316 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4317 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4318 ? s->current_picture.mb_type[mb_xy-1]
4319 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4320 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4322 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4326 * decodes a P_SKIP or B_SKIP macroblock
4328 static void decode_mb_skip(H264Context *h){
4329 MpegEncContext * const s = &h->s;
4330 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4333 memset(h->non_zero_count[mb_xy], 0, 16);
4334 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4337 mb_type|= MB_TYPE_INTERLACED;
4339 if( h->slice_type == B_TYPE )
4341 // just for fill_caches. pred_direct_motion will set the real mb_type
4342 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4344 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4345 pred_direct_motion(h, &mb_type);
4346 mb_type|= MB_TYPE_SKIP;
4351 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4353 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4354 pred_pskip_motion(h, &mx, &my);
4355 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4356 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4359 write_back_motion(h, mb_type);
4360 s->current_picture.mb_type[mb_xy]= mb_type;
4361 s->current_picture.qscale_table[mb_xy]= s->qscale;
4362 h->slice_table[ mb_xy ]= h->slice_num;
4363 h->prev_mb_skipped= 1;
4367 * decodes a macroblock
4368 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4370 static int decode_mb_cavlc(H264Context *h){
4371 MpegEncContext * const s = &h->s;
4372 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4373 int partition_count;
4374 unsigned int mb_type, cbp;
4375 int dct8x8_allowed= h->pps.transform_8x8_mode;
4377 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4379 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4380 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4382 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4383 if(s->mb_skip_run==-1)
4384 s->mb_skip_run= get_ue_golomb(&s->gb);
4386 if (s->mb_skip_run--) {
4387 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4388 if(s->mb_skip_run==0)
4389 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4391 predict_field_decoding_flag(h);
4398 if( (s->mb_y&1) == 0 )
4399 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4401 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4403 h->prev_mb_skipped= 0;
4405 mb_type= get_ue_golomb(&s->gb);
4406 if(h->slice_type == B_TYPE){
4408 partition_count= b_mb_type_info[mb_type].partition_count;
4409 mb_type= b_mb_type_info[mb_type].type;
4412 goto decode_intra_mb;
4414 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4416 partition_count= p_mb_type_info[mb_type].partition_count;
4417 mb_type= p_mb_type_info[mb_type].type;
4420 goto decode_intra_mb;
4423 assert(h->slice_type == I_TYPE);
4426 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4430 cbp= i_mb_type_info[mb_type].cbp;
4431 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4432 mb_type= i_mb_type_info[mb_type].type;
4436 mb_type |= MB_TYPE_INTERLACED;
4438 h->slice_table[ mb_xy ]= h->slice_num;
4440 if(IS_INTRA_PCM(mb_type)){
4443 // We assume these blocks are very rare so we do not optimize it.
4444 align_get_bits(&s->gb);
4446 // The pixels are stored in the same order as levels in h->mb array.
4447 for(y=0; y<16; y++){
4448 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4449 for(x=0; x<16; x++){
4450 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4451 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4455 const int index= 256 + 4*(y&3) + 32*(y>>2);
4457 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4458 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4462 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4464 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4465 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4469 // In deblocking, the quantizer is 0
4470 s->current_picture.qscale_table[mb_xy]= 0;
4471 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4472 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4473 // All coeffs are present
4474 memset(h->non_zero_count[mb_xy], 16, 16);
4476 s->current_picture.mb_type[mb_xy]= mb_type;
4481 h->ref_count[0] <<= 1;
4482 h->ref_count[1] <<= 1;
4485 fill_caches(h, mb_type, 0);
4488 if(IS_INTRA(mb_type)){
4490 // init_top_left_availability(h);
4491 if(IS_INTRA4x4(mb_type)){
4494 if(dct8x8_allowed && get_bits1(&s->gb)){
4495 mb_type |= MB_TYPE_8x8DCT;
4499 // fill_intra4x4_pred_table(h);
4500 for(i=0; i<16; i+=di){
4501 int mode= pred_intra_mode(h, i);
4503 if(!get_bits1(&s->gb)){
4504 const int rem_mode= get_bits(&s->gb, 3);
4505 mode = rem_mode + (rem_mode >= mode);
4509 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4511 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4513 write_back_intra_pred_mode(h);
4514 if( check_intra4x4_pred_mode(h) < 0)
4517 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4518 if(h->intra16x16_pred_mode < 0)
4522 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4525 h->chroma_pred_mode= pred_mode;
4526 }else if(partition_count==4){
4527 int i, j, sub_partition_count[4], list, ref[2][4];
4529 if(h->slice_type == B_TYPE){
4531 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4532 if(h->sub_mb_type[i] >=13){
4533 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4536 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4537 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4539 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4540 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4541 pred_direct_motion(h, &mb_type);
4542 h->ref_cache[0][scan8[4]] =
4543 h->ref_cache[1][scan8[4]] =
4544 h->ref_cache[0][scan8[12]] =
4545 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4548 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4550 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4551 if(h->sub_mb_type[i] >=4){
4552 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4555 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4556 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4560 for(list=0; list<h->list_count; list++){
4561 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4563 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4564 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4565 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4567 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4579 dct8x8_allowed = get_dct8x8_allowed(h);
4581 for(list=0; list<h->list_count; list++){
4583 if(IS_DIRECT(h->sub_mb_type[i])) {
4584 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4587 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4588 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4590 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4591 const int sub_mb_type= h->sub_mb_type[i];
4592 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4593 for(j=0; j<sub_partition_count[i]; j++){
4595 const int index= 4*i + block_width*j;
4596 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4597 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4598 mx += get_se_golomb(&s->gb);
4599 my += get_se_golomb(&s->gb);
4600 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4602 if(IS_SUB_8X8(sub_mb_type)){
4604 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4606 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4607 }else if(IS_SUB_8X4(sub_mb_type)){
4608 mv_cache[ 1 ][0]= mx;
4609 mv_cache[ 1 ][1]= my;
4610 }else if(IS_SUB_4X8(sub_mb_type)){
4611 mv_cache[ 8 ][0]= mx;
4612 mv_cache[ 8 ][1]= my;
4614 mv_cache[ 0 ][0]= mx;
4615 mv_cache[ 0 ][1]= my;
4618 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4624 }else if(IS_DIRECT(mb_type)){
4625 pred_direct_motion(h, &mb_type);
4626 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4628 int list, mx, my, i;
4629 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4630 if(IS_16X16(mb_type)){
4631 for(list=0; list<h->list_count; list++){
4633 if(IS_DIR(mb_type, 0, list)){
4634 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4635 if(val >= h->ref_count[list]){
4636 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4640 val= LIST_NOT_USED&0xFF;
4641 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4643 for(list=0; list<h->list_count; list++){
4645 if(IS_DIR(mb_type, 0, list)){
4646 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4647 mx += get_se_golomb(&s->gb);
4648 my += get_se_golomb(&s->gb);
4649 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4651 val= pack16to32(mx,my);
4654 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4657 else if(IS_16X8(mb_type)){
4658 for(list=0; list<h->list_count; list++){
4661 if(IS_DIR(mb_type, i, list)){
4662 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4663 if(val >= h->ref_count[list]){
4664 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4668 val= LIST_NOT_USED&0xFF;
4669 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4672 for(list=0; list<h->list_count; list++){
4675 if(IS_DIR(mb_type, i, list)){
4676 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4677 mx += get_se_golomb(&s->gb);
4678 my += get_se_golomb(&s->gb);
4679 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4681 val= pack16to32(mx,my);
4684 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4688 assert(IS_8X16(mb_type));
4689 for(list=0; list<h->list_count; list++){
4692 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4693 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4694 if(val >= h->ref_count[list]){
4695 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4699 val= LIST_NOT_USED&0xFF;
4700 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4703 for(list=0; list<h->list_count; list++){
4706 if(IS_DIR(mb_type, i, list)){
4707 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4708 mx += get_se_golomb(&s->gb);
4709 my += get_se_golomb(&s->gb);
4710 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4712 val= pack16to32(mx,my);
4715 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4721 if(IS_INTER(mb_type))
4722 write_back_motion(h, mb_type);
4724 if(!IS_INTRA16x16(mb_type)){
4725 cbp= get_ue_golomb(&s->gb);
4727 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4731 if(IS_INTRA4x4(mb_type))
4732 cbp= golomb_to_intra4x4_cbp[cbp];
4734 cbp= golomb_to_inter_cbp[cbp];
4738 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4739 if(get_bits1(&s->gb))
4740 mb_type |= MB_TYPE_8x8DCT;
4742 s->current_picture.mb_type[mb_xy]= mb_type;
4744 if(cbp || IS_INTRA16x16(mb_type)){
4745 int i8x8, i4x4, chroma_idx;
4747 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4748 const uint8_t *scan, *scan8x8, *dc_scan;
4750 // fill_non_zero_count_cache(h);
4752 if(IS_INTERLACED(mb_type)){
4753 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4754 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4755 dc_scan= luma_dc_field_scan;
4757 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4758 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4759 dc_scan= luma_dc_zigzag_scan;
4762 dquant= get_se_golomb(&s->gb);
4764 if( dquant > 25 || dquant < -26 ){
4765 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4769 s->qscale += dquant;
4770 if(((unsigned)s->qscale) > 51){
4771 if(s->qscale<0) s->qscale+= 52;
4772 else s->qscale-= 52;
4775 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4776 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4777 if(IS_INTRA16x16(mb_type)){
4778 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4779 return -1; //FIXME continue if partitioned and other return -1 too
4782 assert((cbp&15) == 0 || (cbp&15) == 15);
4785 for(i8x8=0; i8x8<4; i8x8++){
4786 for(i4x4=0; i4x4<4; i4x4++){
4787 const int index= i4x4 + 4*i8x8;
4788 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4794 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4797 for(i8x8=0; i8x8<4; i8x8++){
4798 if(cbp & (1<<i8x8)){
4799 if(IS_8x8DCT(mb_type)){
4800 DCTELEM *buf = &h->mb[64*i8x8];
4802 for(i4x4=0; i4x4<4; i4x4++){
4803 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4804 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4807 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4808 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4810 for(i4x4=0; i4x4<4; i4x4++){
4811 const int index= i4x4 + 4*i8x8;
4813 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4819 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4820 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4826 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4827 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4833 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4834 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4835 for(i4x4=0; i4x4<4; i4x4++){
4836 const int index= 16 + 4*chroma_idx + i4x4;
4837 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4843 uint8_t * const nnz= &h->non_zero_count_cache[0];
4844 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4845 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4848 uint8_t * const nnz= &h->non_zero_count_cache[0];
4849 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4850 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4851 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4853 s->current_picture.qscale_table[mb_xy]= s->qscale;
4854 write_back_non_zero_count(h);
4857 h->ref_count[0] >>= 1;
4858 h->ref_count[1] >>= 1;
4864 static int decode_cabac_field_decoding_flag(H264Context *h) {
4865 MpegEncContext * const s = &h->s;
4866 const int mb_x = s->mb_x;
4867 const int mb_y = s->mb_y & ~1;
4868 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4869 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4871 unsigned int ctx = 0;
4873 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4876 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4880 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4883 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4884 uint8_t *state= &h->cabac_state[ctx_base];
4888 MpegEncContext * const s = &h->s;
4889 const int mba_xy = h->left_mb_xy[0];
4890 const int mbb_xy = h->top_mb_xy;
4892 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4894 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4896 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4897 return 0; /* I4x4 */
4900 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4901 return 0; /* I4x4 */
4904 if( get_cabac_terminate( &h->cabac ) )
4905 return 25; /* PCM */
4907 mb_type = 1; /* I16x16 */
4908 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4909 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4910 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4911 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4912 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4916 static int decode_cabac_mb_type( H264Context *h ) {
4917 MpegEncContext * const s = &h->s;
4919 if( h->slice_type == I_TYPE ) {
4920 return decode_cabac_intra_mb_type(h, 3, 1);
4921 } else if( h->slice_type == P_TYPE ) {
4922 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4924 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4925 /* P_L0_D16x16, P_8x8 */
4926 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4928 /* P_L0_D8x16, P_L0_D16x8 */
4929 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4932 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4934 } else if( h->slice_type == B_TYPE ) {
4935 const int mba_xy = h->left_mb_xy[0];
4936 const int mbb_xy = h->top_mb_xy;
4940 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4942 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4945 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4946 return 0; /* B_Direct_16x16 */
4948 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4949 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4952 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4953 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4954 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4955 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4957 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4958 else if( bits == 13 ) {
4959 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4960 } else if( bits == 14 )
4961 return 11; /* B_L1_L0_8x16 */
4962 else if( bits == 15 )
4963 return 22; /* B_8x8 */
4965 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4966 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4968 /* TODO SI/SP frames? */
4973 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4974 MpegEncContext * const s = &h->s;
4978 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4979 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4982 && h->slice_table[mba_xy] == h->slice_num
4983 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4984 mba_xy += s->mb_stride;
4986 mbb_xy = mb_xy - s->mb_stride;
4988 && h->slice_table[mbb_xy] == h->slice_num
4989 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4990 mbb_xy -= s->mb_stride;
4992 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4994 int mb_xy = mb_x + mb_y*s->mb_stride;
4996 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4999 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5001 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5004 if( h->slice_type == B_TYPE )
5006 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5009 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5012 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5015 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5016 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5017 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5019 if( mode >= pred_mode )
5025 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5026 const int mba_xy = h->left_mb_xy[0];
5027 const int mbb_xy = h->top_mb_xy;
5031 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5032 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5035 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5038 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5041 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5043 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5049 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5050 int cbp_b, cbp_a, ctx, cbp = 0;
5052 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5053 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5055 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5056 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5057 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5058 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5059 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5060 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5061 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5062 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5065 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5069 cbp_a = (h->left_cbp>>4)&0x03;
5070 cbp_b = (h-> top_cbp>>4)&0x03;
5073 if( cbp_a > 0 ) ctx++;
5074 if( cbp_b > 0 ) ctx += 2;
5075 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5079 if( cbp_a == 2 ) ctx++;
5080 if( cbp_b == 2 ) ctx += 2;
5081 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5083 static int decode_cabac_mb_dqp( H264Context *h) {
5087 if( h->last_qscale_diff != 0 )
5090 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5096 if(val > 102) //prevent infinite loop
5103 return -(val + 1)/2;
5105 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5106 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5108 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5110 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5114 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5116 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5117 return 0; /* B_Direct_8x8 */
5118 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5119 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5121 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5122 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5123 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5126 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5127 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5131 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5132 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5135 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5136 int refa = h->ref_cache[list][scan8[n] - 1];
5137 int refb = h->ref_cache[list][scan8[n] - 8];
5141 if( h->slice_type == B_TYPE) {
5142 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5144 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5153 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5159 if(ref >= 32 /*h->ref_list[list]*/){
5160 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5161 return 0; //FIXME we should return -1 and check the return everywhere
5167 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5168 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5169 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5170 int ctxbase = (l == 0) ? 40 : 47;
5175 else if( amvd > 32 )
5180 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5185 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5193 while( get_cabac_bypass( &h->cabac ) ) {
5197 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5202 if( get_cabac_bypass( &h->cabac ) )
5206 return get_cabac_bypass_sign( &h->cabac, -mvd );
5209 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5214 nza = h->left_cbp&0x100;
5215 nzb = h-> top_cbp&0x100;
5216 } else if( cat == 1 || cat == 2 ) {
5217 nza = h->non_zero_count_cache[scan8[idx] - 1];
5218 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5219 } else if( cat == 3 ) {
5220 nza = (h->left_cbp>>(6+idx))&0x01;
5221 nzb = (h-> top_cbp>>(6+idx))&0x01;
5224 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5225 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5234 return ctx + 4 * cat;
5237 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5238 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5239 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5240 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5241 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5244 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5245 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5246 static const int significant_coeff_flag_offset[2][6] = {
5247 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5248 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5250 static const int last_coeff_flag_offset[2][6] = {
5251 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5252 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5254 static const int coeff_abs_level_m1_offset[6] = {
5255 227+0, 227+10, 227+20, 227+30, 227+39, 426
5257 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5258 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5259 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5260 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5261 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5262 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5263 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5264 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5265 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5271 int coeff_count = 0;
5274 int abslevelgt1 = 0;
5276 uint8_t *significant_coeff_ctx_base;
5277 uint8_t *last_coeff_ctx_base;
5278 uint8_t *abs_level_m1_ctx_base;
5281 #define CABAC_ON_STACK
5283 #ifdef CABAC_ON_STACK
5286 cc.range = h->cabac.range;
5287 cc.low = h->cabac.low;
5288 cc.bytestream= h->cabac.bytestream;
5290 #define CC &h->cabac
5294 /* cat: 0-> DC 16x16 n = 0
5295 * 1-> AC 16x16 n = luma4x4idx
5296 * 2-> Luma4x4 n = luma4x4idx
5297 * 3-> DC Chroma n = iCbCr
5298 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5299 * 5-> Luma8x8 n = 4 * luma8x8idx
5302 /* read coded block flag */
5304 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5305 if( cat == 1 || cat == 2 )
5306 h->non_zero_count_cache[scan8[n]] = 0;
5308 h->non_zero_count_cache[scan8[16+n]] = 0;
5309 #ifdef CABAC_ON_STACK
5310 h->cabac.range = cc.range ;
5311 h->cabac.low = cc.low ;
5312 h->cabac.bytestream= cc.bytestream;
5318 significant_coeff_ctx_base = h->cabac_state
5319 + significant_coeff_flag_offset[MB_FIELD][cat];
5320 last_coeff_ctx_base = h->cabac_state
5321 + last_coeff_flag_offset[MB_FIELD][cat];
5322 abs_level_m1_ctx_base = h->cabac_state
5323 + coeff_abs_level_m1_offset[cat];
5326 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5327 for(last= 0; last < coefs; last++) { \
5328 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5329 if( get_cabac( CC, sig_ctx )) { \
5330 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5331 index[coeff_count++] = last; \
5332 if( get_cabac( CC, last_ctx ) ) { \
5338 if( last == max_coeff -1 ) {\
5339 index[coeff_count++] = last;\
5341 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5342 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5343 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5345 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5347 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5349 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5352 assert(coeff_count > 0);
5355 h->cbp_table[mb_xy] |= 0x100;
5356 else if( cat == 1 || cat == 2 )
5357 h->non_zero_count_cache[scan8[n]] = coeff_count;
5359 h->cbp_table[mb_xy] |= 0x40 << n;
5361 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5364 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5367 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5368 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5369 int j= scantable[index[coeff_count]];
5371 if( get_cabac( CC, ctx ) == 0 ) {
5373 block[j] = get_cabac_bypass_sign( CC, -1);
5375 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5381 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5382 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5386 if( coeff_abs >= 15 ) {
5388 while( get_cabac_bypass( CC ) ) {
5394 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5400 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5401 else block[j] = coeff_abs;
5403 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5404 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5410 #ifdef CABAC_ON_STACK
5411 h->cabac.range = cc.range ;
5412 h->cabac.low = cc.low ;
5413 h->cabac.bytestream= cc.bytestream;
5418 static inline void compute_mb_neighbors(H264Context *h)
5420 MpegEncContext * const s = &h->s;
5421 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5422 h->top_mb_xy = mb_xy - s->mb_stride;
5423 h->left_mb_xy[0] = mb_xy - 1;
5425 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5426 const int top_pair_xy = pair_xy - s->mb_stride;
5427 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5428 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5429 const int curr_mb_frame_flag = !MB_FIELD;
5430 const int bottom = (s->mb_y & 1);
5432 ? !curr_mb_frame_flag // bottom macroblock
5433 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5435 h->top_mb_xy -= s->mb_stride;
5437 if (left_mb_frame_flag != curr_mb_frame_flag) {
5438 h->left_mb_xy[0] = pair_xy - 1;
5440 } else if (FIELD_PICTURE) {
5441 h->top_mb_xy -= s->mb_stride;
5447 * decodes a macroblock
5448 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5450 static int decode_mb_cabac(H264Context *h) {
5451 MpegEncContext * const s = &h->s;
5452 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5453 int mb_type, partition_count, cbp = 0;
5454 int dct8x8_allowed= h->pps.transform_8x8_mode;
5456 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5458 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5459 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5461 /* a skipped mb needs the aff flag from the following mb */
5462 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5463 predict_field_decoding_flag(h);
5464 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5465 skip = h->next_mb_skipped;
5467 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5468 /* read skip flags */
5470 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5471 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5472 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5473 if(h->next_mb_skipped)
5474 predict_field_decoding_flag(h);
5476 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5481 h->cbp_table[mb_xy] = 0;
5482 h->chroma_pred_mode_table[mb_xy] = 0;
5483 h->last_qscale_diff = 0;
5490 if( (s->mb_y&1) == 0 )
5492 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5494 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5496 h->prev_mb_skipped = 0;
5498 compute_mb_neighbors(h);
5499 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5500 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5504 if( h->slice_type == B_TYPE ) {
5506 partition_count= b_mb_type_info[mb_type].partition_count;
5507 mb_type= b_mb_type_info[mb_type].type;
5510 goto decode_intra_mb;
5512 } else if( h->slice_type == P_TYPE ) {
5514 partition_count= p_mb_type_info[mb_type].partition_count;
5515 mb_type= p_mb_type_info[mb_type].type;
5518 goto decode_intra_mb;
5521 assert(h->slice_type == I_TYPE);
5523 partition_count = 0;
5524 cbp= i_mb_type_info[mb_type].cbp;
5525 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5526 mb_type= i_mb_type_info[mb_type].type;
5529 mb_type |= MB_TYPE_INTERLACED;
5531 h->slice_table[ mb_xy ]= h->slice_num;
5533 if(IS_INTRA_PCM(mb_type)) {
5537 // We assume these blocks are very rare so we do not optimize it.
5538 // FIXME The two following lines get the bitstream position in the cabac
5539 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5540 ptr= h->cabac.bytestream;
5541 if(h->cabac.low&0x1) ptr--;
5543 if(h->cabac.low&0x1FF) ptr--;
5546 // The pixels are stored in the same order as levels in h->mb array.
5547 for(y=0; y<16; y++){
5548 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5549 for(x=0; x<16; x++){
5550 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5551 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5555 const int index= 256 + 4*(y&3) + 32*(y>>2);
5557 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5558 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5562 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5564 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5565 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5569 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5571 // All blocks are present
5572 h->cbp_table[mb_xy] = 0x1ef;
5573 h->chroma_pred_mode_table[mb_xy] = 0;
5574 // In deblocking, the quantizer is 0
5575 s->current_picture.qscale_table[mb_xy]= 0;
5576 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5577 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5578 // All coeffs are present
5579 memset(h->non_zero_count[mb_xy], 16, 16);
5580 s->current_picture.mb_type[mb_xy]= mb_type;
5585 h->ref_count[0] <<= 1;
5586 h->ref_count[1] <<= 1;
5589 fill_caches(h, mb_type, 0);
5591 if( IS_INTRA( mb_type ) ) {
5593 if( IS_INTRA4x4( mb_type ) ) {
5594 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5595 mb_type |= MB_TYPE_8x8DCT;
5596 for( i = 0; i < 16; i+=4 ) {
5597 int pred = pred_intra_mode( h, i );
5598 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5599 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5602 for( i = 0; i < 16; i++ ) {
5603 int pred = pred_intra_mode( h, i );
5604 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5606 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5609 write_back_intra_pred_mode(h);
5610 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5612 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5613 if( h->intra16x16_pred_mode < 0 ) return -1;
5615 h->chroma_pred_mode_table[mb_xy] =
5616 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5618 pred_mode= check_intra_pred_mode( h, pred_mode );
5619 if( pred_mode < 0 ) return -1;
5620 h->chroma_pred_mode= pred_mode;
5621 } else if( partition_count == 4 ) {
5622 int i, j, sub_partition_count[4], list, ref[2][4];
5624 if( h->slice_type == B_TYPE ) {
5625 for( i = 0; i < 4; i++ ) {
5626 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5627 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5628 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5630 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5631 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5632 pred_direct_motion(h, &mb_type);
5633 h->ref_cache[0][scan8[4]] =
5634 h->ref_cache[1][scan8[4]] =
5635 h->ref_cache[0][scan8[12]] =
5636 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5637 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5638 for( i = 0; i < 4; i++ )
5639 if( IS_DIRECT(h->sub_mb_type[i]) )
5640 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5644 for( i = 0; i < 4; i++ ) {
5645 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5646 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5647 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5651 for( list = 0; list < h->list_count; list++ ) {
5652 for( i = 0; i < 4; i++ ) {
5653 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5654 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5655 if( h->ref_count[list] > 1 )
5656 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5662 h->ref_cache[list][ scan8[4*i]+1 ]=
5663 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5668 dct8x8_allowed = get_dct8x8_allowed(h);
5670 for(list=0; list<h->list_count; list++){
5672 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5673 if(IS_DIRECT(h->sub_mb_type[i])){
5674 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5678 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5679 const int sub_mb_type= h->sub_mb_type[i];
5680 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5681 for(j=0; j<sub_partition_count[i]; j++){
5684 const int index= 4*i + block_width*j;
5685 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5686 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5687 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5689 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5690 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5691 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5693 if(IS_SUB_8X8(sub_mb_type)){
5695 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5697 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5700 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5702 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5703 }else if(IS_SUB_8X4(sub_mb_type)){
5704 mv_cache[ 1 ][0]= mx;
5705 mv_cache[ 1 ][1]= my;
5707 mvd_cache[ 1 ][0]= mx - mpx;
5708 mvd_cache[ 1 ][1]= my - mpy;
5709 }else if(IS_SUB_4X8(sub_mb_type)){
5710 mv_cache[ 8 ][0]= mx;
5711 mv_cache[ 8 ][1]= my;
5713 mvd_cache[ 8 ][0]= mx - mpx;
5714 mvd_cache[ 8 ][1]= my - mpy;
5716 mv_cache[ 0 ][0]= mx;
5717 mv_cache[ 0 ][1]= my;
5719 mvd_cache[ 0 ][0]= mx - mpx;
5720 mvd_cache[ 0 ][1]= my - mpy;
5723 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5724 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5725 p[0] = p[1] = p[8] = p[9] = 0;
5726 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5730 } else if( IS_DIRECT(mb_type) ) {
5731 pred_direct_motion(h, &mb_type);
5732 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5733 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5734 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5736 int list, mx, my, i, mpx, mpy;
5737 if(IS_16X16(mb_type)){
5738 for(list=0; list<h->list_count; list++){
5739 if(IS_DIR(mb_type, 0, list)){
5740 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5741 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5743 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5745 for(list=0; list<h->list_count; list++){
5746 if(IS_DIR(mb_type, 0, list)){
5747 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5749 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5750 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5751 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5753 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5754 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5756 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5759 else if(IS_16X8(mb_type)){
5760 for(list=0; list<h->list_count; list++){
5762 if(IS_DIR(mb_type, i, list)){
5763 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5764 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5766 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5769 for(list=0; list<h->list_count; list++){
5771 if(IS_DIR(mb_type, i, list)){
5772 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5773 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5774 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5775 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5777 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5778 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5780 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5781 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5786 assert(IS_8X16(mb_type));
5787 for(list=0; list<h->list_count; list++){
5789 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5790 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5791 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5793 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5796 for(list=0; list<h->list_count; list++){
5798 if(IS_DIR(mb_type, i, list)){
5799 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5800 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5801 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5803 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5804 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5805 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5807 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5808 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5815 if( IS_INTER( mb_type ) ) {
5816 h->chroma_pred_mode_table[mb_xy] = 0;
5817 write_back_motion( h, mb_type );
5820 if( !IS_INTRA16x16( mb_type ) ) {
5821 cbp = decode_cabac_mb_cbp_luma( h );
5822 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5825 h->cbp_table[mb_xy] = h->cbp = cbp;
5827 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5828 if( decode_cabac_mb_transform_size( h ) )
5829 mb_type |= MB_TYPE_8x8DCT;
5831 s->current_picture.mb_type[mb_xy]= mb_type;
5833 if( cbp || IS_INTRA16x16( mb_type ) ) {
5834 const uint8_t *scan, *scan8x8, *dc_scan;
5835 const uint32_t *qmul;
5838 if(IS_INTERLACED(mb_type)){
5839 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5840 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5841 dc_scan= luma_dc_field_scan;
5843 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5844 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5845 dc_scan= luma_dc_zigzag_scan;
5848 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5849 if( dqp == INT_MIN ){
5850 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5854 if(((unsigned)s->qscale) > 51){
5855 if(s->qscale<0) s->qscale+= 52;
5856 else s->qscale-= 52;
5858 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5859 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5861 if( IS_INTRA16x16( mb_type ) ) {
5863 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5864 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5867 qmul = h->dequant4_coeff[0][s->qscale];
5868 for( i = 0; i < 16; i++ ) {
5869 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5870 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5873 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5877 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5878 if( cbp & (1<<i8x8) ) {
5879 if( IS_8x8DCT(mb_type) ) {
5880 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5881 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5883 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5884 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5885 const int index = 4*i8x8 + i4x4;
5886 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5888 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5889 //STOP_TIMER("decode_residual")
5893 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5894 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5901 for( c = 0; c < 2; c++ ) {
5902 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5903 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5909 for( c = 0; c < 2; c++ ) {
5910 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5911 for( i = 0; i < 4; i++ ) {
5912 const int index = 16 + 4 * c + i;
5913 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5914 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5918 uint8_t * const nnz= &h->non_zero_count_cache[0];
5919 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5920 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5923 uint8_t * const nnz= &h->non_zero_count_cache[0];
5924 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5925 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5926 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5927 h->last_qscale_diff = 0;
5930 s->current_picture.qscale_table[mb_xy]= s->qscale;
5931 write_back_non_zero_count(h);
5934 h->ref_count[0] >>= 1;
5935 h->ref_count[1] >>= 1;
5942 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5944 const int index_a = qp + h->slice_alpha_c0_offset;
5945 const int alpha = (alpha_table+52)[index_a];
5946 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5951 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5952 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5954 /* 16px edge length, because bS=4 is triggered by being at
5955 * the edge of an intra MB, so all 4 bS are the same */
5956 for( d = 0; d < 16; d++ ) {
5957 const int p0 = pix[-1];
5958 const int p1 = pix[-2];
5959 const int p2 = pix[-3];
5961 const int q0 = pix[0];
5962 const int q1 = pix[1];
5963 const int q2 = pix[2];
5965 if( FFABS( p0 - q0 ) < alpha &&
5966 FFABS( p1 - p0 ) < beta &&
5967 FFABS( q1 - q0 ) < beta ) {
5969 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5970 if( FFABS( p2 - p0 ) < beta)
5972 const int p3 = pix[-4];
5974 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5975 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5976 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5979 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5981 if( FFABS( q2 - q0 ) < beta)
5983 const int q3 = pix[3];
5985 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5986 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5987 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5990 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5994 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5995 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5997 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6003 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6005 const int index_a = qp + h->slice_alpha_c0_offset;
6006 const int alpha = (alpha_table+52)[index_a];
6007 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6012 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6013 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6015 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6019 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6021 for( i = 0; i < 16; i++, pix += stride) {
6027 int bS_index = (i >> 1);
6030 bS_index |= (i & 1);
6033 if( bS[bS_index] == 0 ) {
6037 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6038 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6039 alpha = (alpha_table+52)[index_a];
6040 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6042 if( bS[bS_index] < 4 ) {
6043 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6044 const int p0 = pix[-1];
6045 const int p1 = pix[-2];
6046 const int p2 = pix[-3];
6047 const int q0 = pix[0];
6048 const int q1 = pix[1];
6049 const int q2 = pix[2];
6051 if( FFABS( p0 - q0 ) < alpha &&
6052 FFABS( p1 - p0 ) < beta &&
6053 FFABS( q1 - q0 ) < beta ) {
6057 if( FFABS( p2 - p0 ) < beta ) {
6058 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6061 if( FFABS( q2 - q0 ) < beta ) {
6062 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6066 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6067 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6068 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6069 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6072 const int p0 = pix[-1];
6073 const int p1 = pix[-2];
6074 const int p2 = pix[-3];
6076 const int q0 = pix[0];
6077 const int q1 = pix[1];
6078 const int q2 = pix[2];
6080 if( FFABS( p0 - q0 ) < alpha &&
6081 FFABS( p1 - p0 ) < beta &&
6082 FFABS( q1 - q0 ) < beta ) {
6084 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6085 if( FFABS( p2 - p0 ) < beta)
6087 const int p3 = pix[-4];
6089 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6090 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6091 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6094 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6096 if( FFABS( q2 - q0 ) < beta)
6098 const int q3 = pix[3];
6100 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6101 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6102 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6105 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6109 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6110 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6112 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6117 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6119 for( i = 0; i < 8; i++, pix += stride) {
6127 if( bS[bS_index] == 0 ) {
6131 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6132 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6133 alpha = (alpha_table+52)[index_a];
6134 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6136 if( bS[bS_index] < 4 ) {
6137 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6138 const int p0 = pix[-1];
6139 const int p1 = pix[-2];
6140 const int q0 = pix[0];
6141 const int q1 = pix[1];
6143 if( FFABS( p0 - q0 ) < alpha &&
6144 FFABS( p1 - p0 ) < beta &&
6145 FFABS( q1 - q0 ) < beta ) {
6146 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6148 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6149 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6150 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6153 const int p0 = pix[-1];
6154 const int p1 = pix[-2];
6155 const int q0 = pix[0];
6156 const int q1 = pix[1];
6158 if( FFABS( p0 - q0 ) < alpha &&
6159 FFABS( p1 - p0 ) < beta &&
6160 FFABS( q1 - q0 ) < beta ) {
6162 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6163 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6164 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6170 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6172 const int index_a = qp + h->slice_alpha_c0_offset;
6173 const int alpha = (alpha_table+52)[index_a];
6174 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6175 const int pix_next = stride;
6180 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6181 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6183 /* 16px edge length, see filter_mb_edgev */
6184 for( d = 0; d < 16; d++ ) {
6185 const int p0 = pix[-1*pix_next];
6186 const int p1 = pix[-2*pix_next];
6187 const int p2 = pix[-3*pix_next];
6188 const int q0 = pix[0];
6189 const int q1 = pix[1*pix_next];
6190 const int q2 = pix[2*pix_next];
6192 if( FFABS( p0 - q0 ) < alpha &&
6193 FFABS( p1 - p0 ) < beta &&
6194 FFABS( q1 - q0 ) < beta ) {
6196 const int p3 = pix[-4*pix_next];
6197 const int q3 = pix[ 3*pix_next];
6199 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6200 if( FFABS( p2 - p0 ) < beta) {
6202 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6203 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6204 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6207 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6209 if( FFABS( q2 - q0 ) < beta) {
6211 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6212 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6213 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6216 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6220 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6221 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6223 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6230 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6232 const int index_a = qp + h->slice_alpha_c0_offset;
6233 const int alpha = (alpha_table+52)[index_a];
6234 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6239 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6240 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6242 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6246 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6247 MpegEncContext * const s = &h->s;
6249 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6251 mb_xy = mb_x + mb_y*s->mb_stride;
6253 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6254 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6255 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6256 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6259 assert(!FRAME_MBAFF);
6261 mb_type = s->current_picture.mb_type[mb_xy];
6262 qp = s->current_picture.qscale_table[mb_xy];
6263 qp0 = s->current_picture.qscale_table[mb_xy-1];
6264 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6265 qpc = get_chroma_qp( h, 0, qp );
6266 qpc0 = get_chroma_qp( h, 0, qp0 );
6267 qpc1 = get_chroma_qp( h, 0, qp1 );
6268 qp0 = (qp + qp0 + 1) >> 1;
6269 qp1 = (qp + qp1 + 1) >> 1;
6270 qpc0 = (qpc + qpc0 + 1) >> 1;
6271 qpc1 = (qpc + qpc1 + 1) >> 1;
6272 qp_thresh = 15 - h->slice_alpha_c0_offset;
6273 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6274 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6277 if( IS_INTRA(mb_type) ) {
6278 int16_t bS4[4] = {4,4,4,4};
6279 int16_t bS3[4] = {3,3,3,3};
6280 if( IS_8x8DCT(mb_type) ) {
6281 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6282 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6283 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6284 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6286 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6287 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6288 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6289 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6290 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6291 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6292 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6293 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6295 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6296 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6297 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6298 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6299 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6300 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6301 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6302 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6305 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6306 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6308 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6310 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6312 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6313 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6314 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6315 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6317 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6318 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6319 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6320 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6322 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6323 bSv[0][0] = 0x0004000400040004ULL;
6324 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6325 bSv[1][0] = 0x0004000400040004ULL;
6327 #define FILTER(hv,dir,edge)\
6328 if(bSv[dir][edge]) {\
6329 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6331 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6332 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6338 } else if( IS_8x8DCT(mb_type) ) {
6357 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6358 MpegEncContext * const s = &h->s;
6359 const int mb_xy= mb_x + mb_y*s->mb_stride;
6360 const int mb_type = s->current_picture.mb_type[mb_xy];
6361 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6362 int first_vertical_edge_done = 0;
6364 /* FIXME: A given frame may occupy more than one position in
6365 * the reference list. So ref2frm should be populated with
6366 * frame numbers, not indices. */
6367 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6368 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6370 //for sufficiently low qp, filtering wouldn't do anything
6371 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6373 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6374 int qp = s->current_picture.qscale_table[mb_xy];
6376 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6377 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6383 // left mb is in picture
6384 && h->slice_table[mb_xy-1] != 255
6385 // and current and left pair do not have the same interlaced type
6386 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6387 // and left mb is in the same slice if deblocking_filter == 2
6388 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6389 /* First vertical edge is different in MBAFF frames
6390 * There are 8 different bS to compute and 2 different Qp
6392 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6393 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6398 int mb_qp, mbn0_qp, mbn1_qp;
6400 first_vertical_edge_done = 1;
6402 if( IS_INTRA(mb_type) )
6403 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6405 for( i = 0; i < 8; i++ ) {
6406 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6408 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6410 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6411 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6412 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6419 mb_qp = s->current_picture.qscale_table[mb_xy];
6420 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6421 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6422 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6423 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6424 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6425 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6426 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6427 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6428 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6429 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6430 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6431 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6434 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6435 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6436 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6437 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6438 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6440 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6441 for( dir = 0; dir < 2; dir++ )
6444 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6445 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6446 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6448 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6449 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6450 // how often to recheck mv-based bS when iterating between edges
6451 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6452 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6453 // how often to recheck mv-based bS when iterating along each edge
6454 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6456 if (first_vertical_edge_done) {
6458 first_vertical_edge_done = 0;
6461 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6464 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6465 && !IS_INTERLACED(mb_type)
6466 && IS_INTERLACED(mbm_type)
6468 // This is a special case in the norm where the filtering must
6469 // be done twice (one each of the field) even if we are in a
6470 // frame macroblock.
6472 static const int nnz_idx[4] = {4,5,6,3};
6473 unsigned int tmp_linesize = 2 * linesize;
6474 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6475 int mbn_xy = mb_xy - 2 * s->mb_stride;
6480 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6481 if( IS_INTRA(mb_type) ||
6482 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6483 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6485 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6486 for( i = 0; i < 4; i++ ) {
6487 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6488 mbn_nnz[nnz_idx[i]] != 0 )
6494 // Do not use s->qscale as luma quantizer because it has not the same
6495 // value in IPCM macroblocks.
6496 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6497 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6498 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6499 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6500 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6501 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6502 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6503 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6510 for( edge = start; edge < edges; edge++ ) {
6511 /* mbn_xy: neighbor macroblock */
6512 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6513 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6517 if( (edge&1) && IS_8x8DCT(mb_type) )
6520 if( IS_INTRA(mb_type) ||
6521 IS_INTRA(mbn_type) ) {
6524 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6525 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6534 bS[0] = bS[1] = bS[2] = bS[3] = value;
6539 if( edge & mask_edge ) {
6540 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6543 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6544 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6547 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6548 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6549 int bn_idx= b_idx - (dir ? 8:1);
6551 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6552 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6553 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6554 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6556 bS[0] = bS[1] = bS[2] = bS[3] = v;
6562 for( i = 0; i < 4; i++ ) {
6563 int x = dir == 0 ? edge : i;
6564 int y = dir == 0 ? i : edge;
6565 int b_idx= 8 + 4 + x + 8*y;
6566 int bn_idx= b_idx - (dir ? 8:1);
6568 if( h->non_zero_count_cache[b_idx] != 0 ||
6569 h->non_zero_count_cache[bn_idx] != 0 ) {
6575 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6576 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6577 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6578 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6586 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6591 // Do not use s->qscale as luma quantizer because it has not the same
6592 // value in IPCM macroblocks.
6593 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6594 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6595 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6596 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6598 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6599 if( (edge&1) == 0 ) {
6600 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6601 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6602 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6603 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6606 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6607 if( (edge&1) == 0 ) {
6608 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6609 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6610 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6611 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6618 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6619 MpegEncContext * const s = &h->s;
6620 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6624 if( h->pps.cabac ) {
6628 align_get_bits( &s->gb );
6631 ff_init_cabac_states( &h->cabac);
6632 ff_init_cabac_decoder( &h->cabac,
6633 s->gb.buffer + get_bits_count(&s->gb)/8,
6634 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6635 /* calculate pre-state */
6636 for( i= 0; i < 460; i++ ) {
6638 if( h->slice_type == I_TYPE )
6639 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6641 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6644 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6646 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6651 int ret = decode_mb_cabac(h);
6653 //STOP_TIMER("decode_mb_cabac")
6655 if(ret>=0) hl_decode_mb(h);
6657 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6660 if(ret>=0) ret = decode_mb_cabac(h);
6662 if(ret>=0) hl_decode_mb(h);
6665 eos = get_cabac_terminate( &h->cabac );
6667 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6668 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6669 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6673 if( ++s->mb_x >= s->mb_width ) {
6675 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6677 if(FIELD_OR_MBAFF_PICTURE) {
6682 if( eos || s->mb_y >= s->mb_height ) {
6683 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6684 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6691 int ret = decode_mb_cavlc(h);
6693 if(ret>=0) hl_decode_mb(h);
6695 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6697 ret = decode_mb_cavlc(h);
6699 if(ret>=0) hl_decode_mb(h);
6704 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6705 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6710 if(++s->mb_x >= s->mb_width){
6712 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6714 if(FIELD_OR_MBAFF_PICTURE) {
6717 if(s->mb_y >= s->mb_height){
6718 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6720 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6721 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6725 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6732 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6733 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6734 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6735 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6739 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6748 for(;s->mb_y < s->mb_height; s->mb_y++){
6749 for(;s->mb_x < s->mb_width; s->mb_x++){
6750 int ret= decode_mb(h);
6755 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6756 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6761 if(++s->mb_x >= s->mb_width){
6763 if(++s->mb_y >= s->mb_height){
6764 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6765 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6769 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6776 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6777 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6778 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6782 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6789 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6792 return -1; //not reached
6795 static int decode_unregistered_user_data(H264Context *h, int size){
6796 MpegEncContext * const s = &h->s;
6797 uint8_t user_data[16+256];
6803 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6804 user_data[i]= get_bits(&s->gb, 8);
6808 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6809 if(e==1 && build>=0)
6810 h->x264_build= build;
6812 if(s->avctx->debug & FF_DEBUG_BUGS)
6813 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6816 skip_bits(&s->gb, 8);
6821 static int decode_sei(H264Context *h){
6822 MpegEncContext * const s = &h->s;
6824 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6829 type+= show_bits(&s->gb, 8);
6830 }while(get_bits(&s->gb, 8) == 255);
6834 size+= show_bits(&s->gb, 8);
6835 }while(get_bits(&s->gb, 8) == 255);
6839 if(decode_unregistered_user_data(h, size) < 0)
6843 skip_bits(&s->gb, 8*size);
6846 //FIXME check bits here
6847 align_get_bits(&s->gb);
6853 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6854 MpegEncContext * const s = &h->s;
6856 cpb_count = get_ue_golomb(&s->gb) + 1;
6857 get_bits(&s->gb, 4); /* bit_rate_scale */
6858 get_bits(&s->gb, 4); /* cpb_size_scale */
6859 for(i=0; i<cpb_count; i++){
6860 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6861 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6862 get_bits1(&s->gb); /* cbr_flag */
6864 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6865 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6866 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6867 get_bits(&s->gb, 5); /* time_offset_length */
6870 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6871 MpegEncContext * const s = &h->s;
6872 int aspect_ratio_info_present_flag;
6873 unsigned int aspect_ratio_idc;
6874 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6876 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6878 if( aspect_ratio_info_present_flag ) {
6879 aspect_ratio_idc= get_bits(&s->gb, 8);
6880 if( aspect_ratio_idc == EXTENDED_SAR ) {
6881 sps->sar.num= get_bits(&s->gb, 16);
6882 sps->sar.den= get_bits(&s->gb, 16);
6883 }else if(aspect_ratio_idc < 14){
6884 sps->sar= pixel_aspect[aspect_ratio_idc];
6886 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6893 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6895 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6896 get_bits1(&s->gb); /* overscan_appropriate_flag */
6899 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6900 get_bits(&s->gb, 3); /* video_format */
6901 get_bits1(&s->gb); /* video_full_range_flag */
6902 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6903 get_bits(&s->gb, 8); /* colour_primaries */
6904 get_bits(&s->gb, 8); /* transfer_characteristics */
6905 get_bits(&s->gb, 8); /* matrix_coefficients */
6909 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6910 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6911 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6914 sps->timing_info_present_flag = get_bits1(&s->gb);
6915 if(sps->timing_info_present_flag){
6916 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6917 sps->time_scale = get_bits_long(&s->gb, 32);
6918 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6921 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6922 if(nal_hrd_parameters_present_flag)
6923 decode_hrd_parameters(h, sps);
6924 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6925 if(vcl_hrd_parameters_present_flag)
6926 decode_hrd_parameters(h, sps);
6927 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6928 get_bits1(&s->gb); /* low_delay_hrd_flag */
6929 get_bits1(&s->gb); /* pic_struct_present_flag */
6931 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6932 if(sps->bitstream_restriction_flag){
6933 unsigned int num_reorder_frames;
6934 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6935 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6936 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6937 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6938 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6939 num_reorder_frames= get_ue_golomb(&s->gb);
6940 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6942 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6943 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6947 sps->num_reorder_frames= num_reorder_frames;
6953 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6954 const uint8_t *jvt_list, const uint8_t *fallback_list){
6955 MpegEncContext * const s = &h->s;
6956 int i, last = 8, next = 8;
6957 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6958 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6959 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6961 for(i=0;i<size;i++){
6963 next = (last + get_se_golomb(&s->gb)) & 0xff;
6964 if(!i && !next){ /* matrix not written, we use the preset one */
6965 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6968 last = factors[scan[i]] = next ? next : last;
6972 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6973 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6974 MpegEncContext * const s = &h->s;
6975 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6976 const uint8_t *fallback[4] = {
6977 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6978 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6979 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6980 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6982 if(get_bits1(&s->gb)){
6983 sps->scaling_matrix_present |= is_sps;
6984 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6985 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6986 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6987 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6988 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6989 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6990 if(is_sps || pps->transform_8x8_mode){
6991 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6992 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6994 } else if(fallback_sps) {
6995 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
6996 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7001 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7004 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7005 const size_t size, const char *name)
7008 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7013 vec[id] = av_mallocz(size);
7015 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7020 static inline int decode_seq_parameter_set(H264Context *h){
7021 MpegEncContext * const s = &h->s;
7022 int profile_idc, level_idc;
7023 unsigned int sps_id, tmp, mb_width, mb_height;
7027 profile_idc= get_bits(&s->gb, 8);
7028 get_bits1(&s->gb); //constraint_set0_flag
7029 get_bits1(&s->gb); //constraint_set1_flag
7030 get_bits1(&s->gb); //constraint_set2_flag
7031 get_bits1(&s->gb); //constraint_set3_flag
7032 get_bits(&s->gb, 4); // reserved
7033 level_idc= get_bits(&s->gb, 8);
7034 sps_id= get_ue_golomb(&s->gb);
7036 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7040 sps->profile_idc= profile_idc;
7041 sps->level_idc= level_idc;
7043 if(sps->profile_idc >= 100){ //high profile
7044 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7045 get_bits1(&s->gb); //residual_color_transform_flag
7046 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7047 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7048 sps->transform_bypass = get_bits1(&s->gb);
7049 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7051 sps->scaling_matrix_present = 0;
7053 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7054 sps->poc_type= get_ue_golomb(&s->gb);
7056 if(sps->poc_type == 0){ //FIXME #define
7057 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7058 } else if(sps->poc_type == 1){//FIXME #define
7059 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7060 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7061 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7062 tmp= get_ue_golomb(&s->gb);
7064 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7065 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7068 sps->poc_cycle_length= tmp;
7070 for(i=0; i<sps->poc_cycle_length; i++)
7071 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7072 }else if(sps->poc_type != 2){
7073 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7077 tmp= get_ue_golomb(&s->gb);
7078 if(tmp > MAX_PICTURE_COUNT-2){
7079 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7081 sps->ref_frame_count= tmp;
7082 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7083 mb_width= get_ue_golomb(&s->gb) + 1;
7084 mb_height= get_ue_golomb(&s->gb) + 1;
7085 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7086 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7087 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7090 sps->mb_width = mb_width;
7091 sps->mb_height= mb_height;
7093 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7094 if(!sps->frame_mbs_only_flag)
7095 sps->mb_aff= get_bits1(&s->gb);
7099 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7101 #ifndef ALLOW_INTERLACE
7103 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7105 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7106 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7108 sps->crop= get_bits1(&s->gb);
7110 sps->crop_left = get_ue_golomb(&s->gb);
7111 sps->crop_right = get_ue_golomb(&s->gb);
7112 sps->crop_top = get_ue_golomb(&s->gb);
7113 sps->crop_bottom= get_ue_golomb(&s->gb);
7114 if(sps->crop_left || sps->crop_top){
7115 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7121 sps->crop_bottom= 0;
7124 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7125 if( sps->vui_parameters_present_flag )
7126 decode_vui_parameters(h, sps);
7128 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7129 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7130 sps_id, sps->profile_idc, sps->level_idc,
7132 sps->ref_frame_count,
7133 sps->mb_width, sps->mb_height,
7134 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7135 sps->direct_8x8_inference_flag ? "8B8" : "",
7136 sps->crop_left, sps->crop_right,
7137 sps->crop_top, sps->crop_bottom,
7138 sps->vui_parameters_present_flag ? "VUI" : ""
7145 build_qp_table(PPS *pps, int t, int index)
7148 for(i = 0; i < 255; i++)
7149 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7152 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7153 MpegEncContext * const s = &h->s;
7154 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7157 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7161 tmp= get_ue_golomb(&s->gb);
7162 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7163 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7168 pps->cabac= get_bits1(&s->gb);
7169 pps->pic_order_present= get_bits1(&s->gb);
7170 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7171 if(pps->slice_group_count > 1 ){
7172 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7173 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7174 switch(pps->mb_slice_group_map_type){
7177 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7178 | run_length[ i ] |1 |ue(v) |
7183 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7185 | top_left_mb[ i ] |1 |ue(v) |
7186 | bottom_right_mb[ i ] |1 |ue(v) |
7194 | slice_group_change_direction_flag |1 |u(1) |
7195 | slice_group_change_rate_minus1 |1 |ue(v) |
7200 | slice_group_id_cnt_minus1 |1 |ue(v) |
7201 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7203 | slice_group_id[ i ] |1 |u(v) |
7208 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7209 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7210 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7211 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7212 pps->ref_count[0]= pps->ref_count[1]= 1;
7216 pps->weighted_pred= get_bits1(&s->gb);
7217 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7218 pps->init_qp= get_se_golomb(&s->gb) + 26;
7219 pps->init_qs= get_se_golomb(&s->gb) + 26;
7220 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7221 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7222 pps->constrained_intra_pred= get_bits1(&s->gb);
7223 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7225 pps->transform_8x8_mode= 0;
7226 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7227 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7228 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7230 if(get_bits_count(&s->gb) < bit_length){
7231 pps->transform_8x8_mode= get_bits1(&s->gb);
7232 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7233 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7235 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7238 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7239 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7240 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7241 h->pps.chroma_qp_diff= 1;
7243 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7245 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7246 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7247 pps_id, pps->sps_id,
7248 pps->cabac ? "CABAC" : "CAVLC",
7249 pps->slice_group_count,
7250 pps->ref_count[0], pps->ref_count[1],
7251 pps->weighted_pred ? "weighted" : "",
7252 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7253 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7254 pps->constrained_intra_pred ? "CONSTR" : "",
7255 pps->redundant_pic_cnt_present ? "REDU" : "",
7256 pps->transform_8x8_mode ? "8x8DCT" : ""
7264 * Call decode_slice() for each context.
7266 * @param h h264 master context
7267 * @param context_count number of contexts to execute
7269 static void execute_decode_slices(H264Context *h, int context_count){
7270 MpegEncContext * const s = &h->s;
7271 AVCodecContext * const avctx= s->avctx;
7275 if(context_count == 1) {
7276 decode_slice(avctx, h);
7278 for(i = 1; i < context_count; i++) {
7279 hx = h->thread_context[i];
7280 hx->s.error_resilience = avctx->error_resilience;
7281 hx->s.error_count = 0;
7284 avctx->execute(avctx, (void *)decode_slice,
7285 (void **)h->thread_context, NULL, context_count);
7287 /* pull back stuff from slices to master context */
7288 hx = h->thread_context[context_count - 1];
7289 s->mb_x = hx->s.mb_x;
7290 s->mb_y = hx->s.mb_y;
7291 for(i = 1; i < context_count; i++)
7292 h->s.error_count += h->thread_context[i]->s.error_count;
7297 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7298 MpegEncContext * const s = &h->s;
7299 AVCodecContext * const avctx= s->avctx;
7301 H264Context *hx; ///< thread context
7302 int context_count = 0;
7304 h->max_contexts = avctx->thread_count;
7307 for(i=0; i<50; i++){
7308 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7311 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7312 h->current_slice = 0;
7313 s->current_picture_ptr= NULL;
7325 if(buf_index >= buf_size) break;
7327 for(i = 0; i < h->nal_length_size; i++)
7328 nalsize = (nalsize << 8) | buf[buf_index++];
7329 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7334 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7339 // start code prefix search
7340 for(; buf_index + 3 < buf_size; buf_index++){
7341 // This should always succeed in the first iteration.
7342 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7346 if(buf_index+3 >= buf_size) break;
7351 hx = h->thread_context[context_count];
7353 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7354 if (ptr==NULL || dst_length < 0){
7357 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7359 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7361 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7362 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7365 if (h->is_avc && (nalsize != consumed))
7366 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7368 buf_index += consumed;
7370 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7371 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7376 switch(hx->nal_unit_type){
7378 if (h->nal_unit_type != NAL_IDR_SLICE) {
7379 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7382 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7384 init_get_bits(&hx->s.gb, ptr, bit_length);
7386 hx->inter_gb_ptr= &hx->s.gb;
7387 hx->s.data_partitioning = 0;
7389 if((err = decode_slice_header(hx, h)))
7392 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7393 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7394 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7395 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7396 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7397 && avctx->skip_frame < AVDISCARD_ALL)
7401 init_get_bits(&hx->s.gb, ptr, bit_length);
7403 hx->inter_gb_ptr= NULL;
7404 hx->s.data_partitioning = 1;
7406 err = decode_slice_header(hx, h);
7409 init_get_bits(&hx->intra_gb, ptr, bit_length);
7410 hx->intra_gb_ptr= &hx->intra_gb;
7413 init_get_bits(&hx->inter_gb, ptr, bit_length);
7414 hx->inter_gb_ptr= &hx->inter_gb;
7416 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7417 && s->context_initialized
7419 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7420 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7421 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7422 && avctx->skip_frame < AVDISCARD_ALL)
7426 init_get_bits(&s->gb, ptr, bit_length);
7430 init_get_bits(&s->gb, ptr, bit_length);
7431 decode_seq_parameter_set(h);
7433 if(s->flags& CODEC_FLAG_LOW_DELAY)
7436 if(avctx->has_b_frames < 2)
7437 avctx->has_b_frames= !s->low_delay;
7440 init_get_bits(&s->gb, ptr, bit_length);
7442 decode_picture_parameter_set(h, bit_length);
7446 case NAL_END_SEQUENCE:
7447 case NAL_END_STREAM:
7448 case NAL_FILLER_DATA:
7450 case NAL_AUXILIARY_SLICE:
7453 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7456 if(context_count == h->max_contexts) {
7457 execute_decode_slices(h, context_count);
7462 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7464 /* Slice could not be decoded in parallel mode, copy down
7465 * NAL unit stuff to context 0 and restart. Note that
7466 * rbsp_buffer is not transfered, but since we no longer
7467 * run in parallel mode this should not be an issue. */
7468 h->nal_unit_type = hx->nal_unit_type;
7469 h->nal_ref_idc = hx->nal_ref_idc;
7475 execute_decode_slices(h, context_count);
7480 * returns the number of bytes consumed for building the current frame
7482 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7483 if(s->flags&CODEC_FLAG_TRUNCATED){
7484 pos -= s->parse_context.last_index;
7485 if(pos<0) pos=0; // FIXME remove (unneeded?)
7489 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7490 if(pos+10>buf_size) pos=buf_size; // oops ;)
7496 static int decode_frame(AVCodecContext *avctx,
7497 void *data, int *data_size,
7498 uint8_t *buf, int buf_size)
7500 H264Context *h = avctx->priv_data;
7501 MpegEncContext *s = &h->s;
7502 AVFrame *pict = data;
7505 s->flags= avctx->flags;
7506 s->flags2= avctx->flags2;
7508 /* no supplementary picture */
7509 if (buf_size == 0) {
7513 //FIXME factorize this with the output code below
7514 out = h->delayed_pic[0];
7516 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7517 if(h->delayed_pic[i]->poc < out->poc){
7518 out = h->delayed_pic[i];
7522 for(i=out_idx; h->delayed_pic[i]; i++)
7523 h->delayed_pic[i] = h->delayed_pic[i+1];
7526 *data_size = sizeof(AVFrame);
7527 *pict= *(AVFrame*)out;
7533 if(s->flags&CODEC_FLAG_TRUNCATED){
7534 int next= ff_h264_find_frame_end(h, buf, buf_size);
7536 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7538 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7541 if(h->is_avc && !h->got_avcC) {
7542 int i, cnt, nalsize;
7543 unsigned char *p = avctx->extradata;
7544 if(avctx->extradata_size < 7) {
7545 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7549 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7552 /* sps and pps in the avcC always have length coded with 2 bytes,
7553 so put a fake nal_length_size = 2 while parsing them */
7554 h->nal_length_size = 2;
7555 // Decode sps from avcC
7556 cnt = *(p+5) & 0x1f; // Number of sps
7558 for (i = 0; i < cnt; i++) {
7559 nalsize = AV_RB16(p) + 2;
7560 if(decode_nal_units(h, p, nalsize) < 0) {
7561 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7566 // Decode pps from avcC
7567 cnt = *(p++); // Number of pps
7568 for (i = 0; i < cnt; i++) {
7569 nalsize = AV_RB16(p) + 2;
7570 if(decode_nal_units(h, p, nalsize) != nalsize) {
7571 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7576 // Now store right nal length size, that will be use to parse all other nals
7577 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7578 // Do not reparse avcC
7582 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7583 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7587 buf_index=decode_nal_units(h, buf, buf_size);
7591 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7592 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7593 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7597 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7598 Picture *out = s->current_picture_ptr;
7599 Picture *cur = s->current_picture_ptr;
7600 Picture *prev = h->delayed_output_pic;
7601 int i, pics, cross_idr, out_of_order, out_idx;
7605 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7606 s->current_picture_ptr->pict_type= s->pict_type;
7608 h->prev_frame_num_offset= h->frame_num_offset;
7609 h->prev_frame_num= h->frame_num;
7610 if(s->current_picture_ptr->reference & s->picture_structure){
7611 h->prev_poc_msb= h->poc_msb;
7612 h->prev_poc_lsb= h->poc_lsb;
7613 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7620 //FIXME do something with unavailable reference frames
7622 #if 0 //decode order
7623 *data_size = sizeof(AVFrame);
7625 /* Sort B-frames into display order */
7627 if(h->sps.bitstream_restriction_flag
7628 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7629 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7634 while(h->delayed_pic[pics]) pics++;
7636 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7638 h->delayed_pic[pics++] = cur;
7639 if(cur->reference == 0)
7640 cur->reference = DELAYED_PIC_REF;
7643 for(i=0; h->delayed_pic[i]; i++)
7644 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7647 out = h->delayed_pic[0];
7649 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7650 if(h->delayed_pic[i]->poc < out->poc){
7651 out = h->delayed_pic[i];
7655 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7656 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7658 else if(prev && pics <= s->avctx->has_b_frames)
7660 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7662 ((!cross_idr && prev && out->poc > prev->poc + 2)
7663 || cur->pict_type == B_TYPE)))
7666 s->avctx->has_b_frames++;
7669 else if(out_of_order)
7672 if(out_of_order || pics > s->avctx->has_b_frames){
7673 for(i=out_idx; h->delayed_pic[i]; i++)
7674 h->delayed_pic[i] = h->delayed_pic[i+1];
7680 *data_size = sizeof(AVFrame);
7681 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7682 prev->reference = 0;
7683 h->delayed_output_pic = out;
7687 *pict= *(AVFrame*)out;
7689 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7692 assert(pict->data[0] || !*data_size);
7693 ff_print_debug_info(s, pict);
7694 //printf("out %d\n", (int)pict->data[0]);
7697 /* Return the Picture timestamp as the frame number */
7698 /* we substract 1 because it is added on utils.c */
7699 avctx->frame_number = s->picture_number - 1;
7701 return get_consumed_bytes(s, buf_index, buf_size);
7704 static inline void fill_mb_avail(H264Context *h){
7705 MpegEncContext * const s = &h->s;
7706 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7709 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7710 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7711 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7717 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7718 h->mb_avail[4]= 1; //FIXME move out
7719 h->mb_avail[5]= 0; //FIXME move out
7726 #define SIZE (COUNT*40)
7732 // int int_temp[10000];
7734 AVCodecContext avctx;
7736 dsputil_init(&dsp, &avctx);
7738 init_put_bits(&pb, temp, SIZE);
7739 printf("testing unsigned exp golomb\n");
7740 for(i=0; i<COUNT; i++){
7742 set_ue_golomb(&pb, i);
7743 STOP_TIMER("set_ue_golomb");
7745 flush_put_bits(&pb);
7747 init_get_bits(&gb, temp, 8*SIZE);
7748 for(i=0; i<COUNT; i++){
7751 s= show_bits(&gb, 24);
7754 j= get_ue_golomb(&gb);
7756 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7759 STOP_TIMER("get_ue_golomb");
7763 init_put_bits(&pb, temp, SIZE);
7764 printf("testing signed exp golomb\n");
7765 for(i=0; i<COUNT; i++){
7767 set_se_golomb(&pb, i - COUNT/2);
7768 STOP_TIMER("set_se_golomb");
7770 flush_put_bits(&pb);
7772 init_get_bits(&gb, temp, 8*SIZE);
7773 for(i=0; i<COUNT; i++){
7776 s= show_bits(&gb, 24);
7779 j= get_se_golomb(&gb);
7780 if(j != i - COUNT/2){
7781 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7784 STOP_TIMER("get_se_golomb");
7787 printf("testing 4x4 (I)DCT\n");
7790 uint8_t src[16], ref[16];
7791 uint64_t error= 0, max_error=0;
7793 for(i=0; i<COUNT; i++){
7795 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7796 for(j=0; j<16; j++){
7797 ref[j]= random()%255;
7798 src[j]= random()%255;
7801 h264_diff_dct_c(block, src, ref, 4);
7804 for(j=0; j<16; j++){
7805 // printf("%d ", block[j]);
7806 block[j]= block[j]*4;
7807 if(j&1) block[j]= (block[j]*4 + 2)/5;
7808 if(j&4) block[j]= (block[j]*4 + 2)/5;
7812 s->dsp.h264_idct_add(ref, block, 4);
7813 /* for(j=0; j<16; j++){
7814 printf("%d ", ref[j]);
7818 for(j=0; j<16; j++){
7819 int diff= FFABS(src[j] - ref[j]);
7822 max_error= FFMAX(max_error, diff);
7825 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7827 printf("testing quantizer\n");
7828 for(qp=0; qp<52; qp++){
7830 src1_block[i]= src2_block[i]= random()%255;
7834 printf("Testing NAL layer\n");
7836 uint8_t bitstream[COUNT];
7837 uint8_t nal[COUNT*2];
7839 memset(&h, 0, sizeof(H264Context));
7841 for(i=0; i<COUNT; i++){
7849 for(j=0; j<COUNT; j++){
7850 bitstream[j]= (random() % 255) + 1;
7853 for(j=0; j<zeros; j++){
7854 int pos= random() % COUNT;
7855 while(bitstream[pos] == 0){
7864 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7866 printf("encoding failed\n");
7870 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7874 if(out_length != COUNT){
7875 printf("incorrect length %d %d\n", out_length, COUNT);
7879 if(consumed != nal_length){
7880 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7884 if(memcmp(bitstream, out, COUNT)){
7885 printf("mismatch\n");
7890 printf("Testing RBSP\n");
7898 static int decode_end(AVCodecContext *avctx)
7900 H264Context *h = avctx->priv_data;
7901 MpegEncContext *s = &h->s;
7903 av_freep(&h->rbsp_buffer[0]);
7904 av_freep(&h->rbsp_buffer[1]);
7905 free_tables(h); //FIXME cleanup init stuff perhaps
7908 // memset(h, 0, sizeof(H264Context));
7914 AVCodec h264_decoder = {
7918 sizeof(H264Context),
7923 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,