2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
42 * Value of Picture.reference when Picture is not a reference picture, but
43 * is held for delayed output.
45 #define DELAYED_PIC_REF 4
47 static VLC coeff_token_vlc[4];
48 static VLC chroma_dc_coeff_token_vlc;
50 static VLC total_zeros_vlc[15];
51 static VLC chroma_dc_total_zeros_vlc[3];
53 static VLC run_vlc[6];
56 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
57 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
58 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
59 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
61 static av_always_inline uint32_t pack16to32(int a, int b){
62 #ifdef WORDS_BIGENDIAN
63 return (b&0xFFFF) + (a<<16);
65 return (a&0xFFFF) + (b<<16);
69 const uint8_t ff_rem6[52]={
70 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
73 const uint8_t ff_div6[52]={
74 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
80 * @param h height of the rectangle, should be a constant
81 * @param w width of the rectangle, should be a constant
82 * @param size the size of val (1 or 4), should be a constant
84 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
85 uint8_t *p= (uint8_t*)vp;
86 assert(size==1 || size==4);
92 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
93 assert((stride&(w-1))==0);
95 const uint16_t v= size==4 ? val : val*0x0101;
96 *(uint16_t*)(p + 0*stride)= v;
98 *(uint16_t*)(p + 1*stride)= v;
100 *(uint16_t*)(p + 2*stride)= v;
101 *(uint16_t*)(p + 3*stride)= v;
103 const uint32_t v= size==4 ? val : val*0x01010101;
104 *(uint32_t*)(p + 0*stride)= v;
106 *(uint32_t*)(p + 1*stride)= v;
108 *(uint32_t*)(p + 2*stride)= v;
109 *(uint32_t*)(p + 3*stride)= v;
111 //gcc can't optimize 64bit math on x86_32
112 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
113 const uint64_t v= val*0x0100000001ULL;
114 *(uint64_t*)(p + 0*stride)= v;
116 *(uint64_t*)(p + 1*stride)= v;
118 *(uint64_t*)(p + 2*stride)= v;
119 *(uint64_t*)(p + 3*stride)= v;
121 const uint64_t v= val*0x0100000001ULL;
122 *(uint64_t*)(p + 0+0*stride)= v;
123 *(uint64_t*)(p + 8+0*stride)= v;
124 *(uint64_t*)(p + 0+1*stride)= v;
125 *(uint64_t*)(p + 8+1*stride)= v;
127 *(uint64_t*)(p + 0+2*stride)= v;
128 *(uint64_t*)(p + 8+2*stride)= v;
129 *(uint64_t*)(p + 0+3*stride)= v;
130 *(uint64_t*)(p + 8+3*stride)= v;
132 *(uint32_t*)(p + 0+0*stride)= val;
133 *(uint32_t*)(p + 4+0*stride)= val;
135 *(uint32_t*)(p + 0+1*stride)= val;
136 *(uint32_t*)(p + 4+1*stride)= val;
138 *(uint32_t*)(p + 0+2*stride)= val;
139 *(uint32_t*)(p + 4+2*stride)= val;
140 *(uint32_t*)(p + 0+3*stride)= val;
141 *(uint32_t*)(p + 4+3*stride)= val;
143 *(uint32_t*)(p + 0+0*stride)= val;
144 *(uint32_t*)(p + 4+0*stride)= val;
145 *(uint32_t*)(p + 8+0*stride)= val;
146 *(uint32_t*)(p +12+0*stride)= val;
147 *(uint32_t*)(p + 0+1*stride)= val;
148 *(uint32_t*)(p + 4+1*stride)= val;
149 *(uint32_t*)(p + 8+1*stride)= val;
150 *(uint32_t*)(p +12+1*stride)= val;
152 *(uint32_t*)(p + 0+2*stride)= val;
153 *(uint32_t*)(p + 4+2*stride)= val;
154 *(uint32_t*)(p + 8+2*stride)= val;
155 *(uint32_t*)(p +12+2*stride)= val;
156 *(uint32_t*)(p + 0+3*stride)= val;
157 *(uint32_t*)(p + 4+3*stride)= val;
158 *(uint32_t*)(p + 8+3*stride)= val;
159 *(uint32_t*)(p +12+3*stride)= val;
166 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
167 MpegEncContext * const s = &h->s;
168 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
169 int topleft_xy, top_xy, topright_xy, left_xy[2];
170 int topleft_type, top_type, topright_type, left_type[2];
174 //FIXME deblocking could skip the intra and nnz parts.
175 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
178 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
180 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
181 topleft_xy = top_xy - 1;
182 topright_xy= top_xy + 1;
183 left_xy[1] = left_xy[0] = mb_xy-1;
193 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
194 const int top_pair_xy = pair_xy - s->mb_stride;
195 const int topleft_pair_xy = top_pair_xy - 1;
196 const int topright_pair_xy = top_pair_xy + 1;
197 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
198 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
199 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
200 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
201 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
202 const int bottom = (s->mb_y & 1);
203 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
208 top_xy -= s->mb_stride;
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
214 topleft_xy -= s->mb_stride;
217 ? !curr_mb_frame_flag // bottom macroblock
218 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
220 topright_xy -= s->mb_stride;
222 if (left_mb_frame_flag != curr_mb_frame_flag) {
223 left_xy[1] = left_xy[0] = pair_xy - 1;
224 if (curr_mb_frame_flag) {
245 left_xy[1] += s->mb_stride;
258 h->top_mb_xy = top_xy;
259 h->left_mb_xy[0] = left_xy[0];
260 h->left_mb_xy[1] = left_xy[1];
264 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
265 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
266 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
268 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
270 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
272 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
273 for(list=0; list<h->list_count; list++){
274 if(USES_LIST(mb_type,list)){
275 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
276 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
277 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
278 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
284 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
285 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
287 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
288 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
290 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
291 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
296 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
297 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
298 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
299 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
300 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
303 if(IS_INTRA(mb_type)){
304 h->topleft_samples_available=
305 h->top_samples_available=
306 h->left_samples_available= 0xFFFF;
307 h->topright_samples_available= 0xEEEA;
309 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available= 0xB3FF;
311 h->top_samples_available= 0x33FF;
312 h->topright_samples_available= 0x26EA;
315 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
316 h->topleft_samples_available&= 0xDF5F;
317 h->left_samples_available&= 0x5F5F;
321 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
322 h->topleft_samples_available&= 0x7FFF;
324 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
325 h->topright_samples_available&= 0xFBFF;
327 if(IS_INTRA4x4(mb_type)){
328 if(IS_INTRA4x4(top_type)){
329 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
330 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
331 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
332 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
335 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
340 h->intra4x4_pred_mode_cache[4+8*0]=
341 h->intra4x4_pred_mode_cache[5+8*0]=
342 h->intra4x4_pred_mode_cache[6+8*0]=
343 h->intra4x4_pred_mode_cache[7+8*0]= pred;
346 if(IS_INTRA4x4(left_type[i])){
347 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
348 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
351 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
356 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
357 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
372 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
374 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
375 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
376 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
377 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
379 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
380 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
382 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
383 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
386 h->non_zero_count_cache[4+8*0]=
387 h->non_zero_count_cache[5+8*0]=
388 h->non_zero_count_cache[6+8*0]=
389 h->non_zero_count_cache[7+8*0]=
391 h->non_zero_count_cache[1+8*0]=
392 h->non_zero_count_cache[2+8*0]=
394 h->non_zero_count_cache[1+8*3]=
395 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
399 for (i=0; i<2; i++) {
401 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
402 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
403 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
404 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
406 h->non_zero_count_cache[3+8*1 + 2*8*i]=
407 h->non_zero_count_cache[3+8*2 + 2*8*i]=
408 h->non_zero_count_cache[0+8*1 + 8*i]=
409 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
416 h->top_cbp = h->cbp_table[top_xy];
417 } else if(IS_INTRA(mb_type)) {
424 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
425 } else if(IS_INTRA(mb_type)) {
431 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
434 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
439 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
441 for(list=0; list<h->list_count; list++){
442 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
443 /*if(!h->mv_cache_clean[list]){
444 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
445 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
446 h->mv_cache_clean[list]= 1;
450 h->mv_cache_clean[list]= 0;
452 if(USES_LIST(top_type, list)){
453 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
454 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
455 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
456 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
457 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
458 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
459 h->ref_cache[list][scan8[0] + 0 - 1*8]=
460 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
461 h->ref_cache[list][scan8[0] + 2 - 1*8]=
462 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
464 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
465 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
466 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
467 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
468 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
472 int cache_idx = scan8[0] - 1 + i*2*8;
473 if(USES_LIST(left_type[i], list)){
474 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
475 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
476 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
477 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
478 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
479 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
481 *(uint32_t*)h->mv_cache [list][cache_idx ]=
482 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
483 h->ref_cache[list][cache_idx ]=
484 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
488 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
491 if(USES_LIST(topleft_type, list)){
492 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
493 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
494 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
495 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
497 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
498 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
501 if(USES_LIST(topright_type, list)){
502 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
503 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
504 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
505 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
507 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
508 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
511 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
514 h->ref_cache[list][scan8[5 ]+1] =
515 h->ref_cache[list][scan8[7 ]+1] =
516 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
517 h->ref_cache[list][scan8[4 ]] =
518 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
519 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
520 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
521 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
522 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
523 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
526 /* XXX beurk, Load mvd */
527 if(USES_LIST(top_type, list)){
528 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
529 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
530 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
531 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
532 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
534 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
535 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
536 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
537 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
539 if(USES_LIST(left_type[0], list)){
540 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
541 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
542 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
544 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
545 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
547 if(USES_LIST(left_type[1], list)){
548 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
549 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
550 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
552 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
553 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
555 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
556 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
557 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
558 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
559 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
561 if(h->slice_type == B_TYPE){
562 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
564 if(IS_DIRECT(top_type)){
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
566 }else if(IS_8X8(top_type)){
567 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
568 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
569 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
571 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
574 if(IS_DIRECT(left_type[0]))
575 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
576 else if(IS_8X8(left_type[0]))
577 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
579 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
581 if(IS_DIRECT(left_type[1]))
582 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
583 else if(IS_8X8(left_type[1]))
584 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
586 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
592 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
593 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
594 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
595 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
596 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
597 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
598 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
599 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
600 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
601 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
603 #define MAP_F2F(idx, mb_type)\
604 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
605 h->ref_cache[list][idx] <<= 1;\
606 h->mv_cache[list][idx][1] /= 2;\
607 h->mvd_cache[list][idx][1] /= 2;\
612 #define MAP_F2F(idx, mb_type)\
613 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
614 h->ref_cache[list][idx] >>= 1;\
615 h->mv_cache[list][idx][1] <<= 1;\
616 h->mvd_cache[list][idx][1] <<= 1;\
626 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
629 static inline void write_back_intra_pred_mode(H264Context *h){
630 MpegEncContext * const s = &h->s;
631 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
633 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
634 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
635 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
636 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
637 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
638 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
639 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
643 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
645 static inline int check_intra4x4_pred_mode(H264Context *h){
646 MpegEncContext * const s = &h->s;
647 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
648 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
651 if(!(h->top_samples_available&0x8000)){
653 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
655 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
658 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
663 if(!(h->left_samples_available&0x8000)){
665 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
667 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
670 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
676 } //FIXME cleanup like next
679 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
681 static inline int check_intra_pred_mode(H264Context *h, int mode){
682 MpegEncContext * const s = &h->s;
683 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
684 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
687 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
691 if(!(h->top_samples_available&0x8000)){
694 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
699 if(!(h->left_samples_available&0x8000)){
702 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
711 * gets the predicted intra4x4 prediction mode.
713 static inline int pred_intra_mode(H264Context *h, int n){
714 const int index8= scan8[n];
715 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
716 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
717 const int min= FFMIN(left, top);
719 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
721 if(min<0) return DC_PRED;
725 static inline void write_back_non_zero_count(H264Context *h){
726 MpegEncContext * const s = &h->s;
727 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
729 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
730 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
731 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
732 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
733 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
734 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
735 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
737 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
738 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
739 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
741 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
742 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
743 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
746 // store all luma nnzs, for deblocking
749 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
750 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
755 * gets the predicted number of non zero coefficients.
756 * @param n block index
758 static inline int pred_non_zero_count(H264Context *h, int n){
759 const int index8= scan8[n];
760 const int left= h->non_zero_count_cache[index8 - 1];
761 const int top = h->non_zero_count_cache[index8 - 8];
764 if(i<64) i= (i+1)>>1;
766 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
771 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
772 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
773 MpegEncContext *s = &h->s;
775 /* there is no consistent mapping of mvs to neighboring locations that will
776 * make mbaff happy, so we can't move all this logic to fill_caches */
778 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
780 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
781 *C = h->mv_cache[list][scan8[0]-2];
784 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
785 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
786 if(IS_INTERLACED(mb_types[topright_xy])){
787 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
788 const int x4 = X4, y4 = Y4;\
789 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
790 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
791 return LIST_NOT_USED;\
792 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
793 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
794 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
795 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
797 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
800 if(topright_ref == PART_NOT_AVAILABLE
801 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
802 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
804 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
805 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
808 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
810 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
811 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
817 if(topright_ref != PART_NOT_AVAILABLE){
818 *C= h->mv_cache[list][ i - 8 + part_width ];
821 tprintf(s->avctx, "topright MV not available\n");
823 *C= h->mv_cache[list][ i - 8 - 1 ];
824 return h->ref_cache[list][ i - 8 - 1 ];
829 * gets the predicted MV.
830 * @param n the block index
831 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
832 * @param mx the x component of the predicted motion vector
833 * @param my the y component of the predicted motion vector
835 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
836 const int index8= scan8[n];
837 const int top_ref= h->ref_cache[list][ index8 - 8 ];
838 const int left_ref= h->ref_cache[list][ index8 - 1 ];
839 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
840 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
842 int diagonal_ref, match_count;
844 assert(part_width==1 || part_width==2 || part_width==4);
854 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
855 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
856 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
857 if(match_count > 1){ //most common
858 *mx= mid_pred(A[0], B[0], C[0]);
859 *my= mid_pred(A[1], B[1], C[1]);
860 }else if(match_count==1){
864 }else if(top_ref==ref){
872 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
876 *mx= mid_pred(A[0], B[0], C[0]);
877 *my= mid_pred(A[1], B[1], C[1]);
881 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
885 * gets the directionally predicted 16x8 MV.
886 * @param n the block index
887 * @param mx the x component of the predicted motion vector
888 * @param my the y component of the predicted motion vector
890 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
892 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
893 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
895 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
903 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
904 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
906 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
916 pred_motion(h, n, 4, list, ref, mx, my);
920 * gets the directionally predicted 8x16 MV.
921 * @param n the block index
922 * @param mx the x component of the predicted motion vector
923 * @param my the y component of the predicted motion vector
925 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
927 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
928 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
930 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
941 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
943 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
945 if(diagonal_ref == ref){
953 pred_motion(h, n, 2, list, ref, mx, my);
956 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
957 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
958 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
960 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
962 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
963 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
964 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
970 pred_motion(h, 0, 4, 0, 0, mx, my);
975 static inline void direct_dist_scale_factor(H264Context * const h){
976 const int poc = h->s.current_picture_ptr->poc;
977 const int poc1 = h->ref_list[1][0].poc;
979 for(i=0; i<h->ref_count[0]; i++){
980 int poc0 = h->ref_list[0][i].poc;
981 int td = av_clip(poc1 - poc0, -128, 127);
982 if(td == 0 /* FIXME || pic0 is a long-term ref */){
983 h->dist_scale_factor[i] = 256;
985 int tb = av_clip(poc - poc0, -128, 127);
986 int tx = (16384 + (FFABS(td) >> 1)) / td;
987 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
991 for(i=0; i<h->ref_count[0]; i++){
992 h->dist_scale_factor_field[2*i] =
993 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
997 static inline void direct_ref_list_init(H264Context * const h){
998 MpegEncContext * const s = &h->s;
999 Picture * const ref1 = &h->ref_list[1][0];
1000 Picture * const cur = s->current_picture_ptr;
1002 if(cur->pict_type == I_TYPE)
1003 cur->ref_count[0] = 0;
1004 if(cur->pict_type != B_TYPE)
1005 cur->ref_count[1] = 0;
1006 for(list=0; list<2; list++){
1007 cur->ref_count[list] = h->ref_count[list];
1008 for(j=0; j<h->ref_count[list]; j++)
1009 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1011 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1013 for(list=0; list<2; list++){
1014 for(i=0; i<ref1->ref_count[list]; i++){
1015 const int poc = ref1->ref_poc[list][i];
1016 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1017 for(j=0; j<h->ref_count[list]; j++)
1018 if(h->ref_list[list][j].poc == poc){
1019 h->map_col_to_list0[list][i] = j;
1025 for(list=0; list<2; list++){
1026 for(i=0; i<ref1->ref_count[list]; i++){
1027 j = h->map_col_to_list0[list][i];
1028 h->map_col_to_list0_field[list][2*i] = 2*j;
1029 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1035 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1036 MpegEncContext * const s = &h->s;
1037 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1038 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1039 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1040 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1041 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1042 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1043 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1044 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1045 const int is_b8x8 = IS_8X8(*mb_type);
1046 unsigned int sub_mb_type;
1049 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1050 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1051 /* FIXME save sub mb types from previous frames (or derive from MVs)
1052 * so we know exactly what block size to use */
1053 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1055 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1056 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1057 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1059 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1060 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1063 *mb_type |= MB_TYPE_DIRECT2;
1065 *mb_type |= MB_TYPE_INTERLACED;
1067 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1069 if(h->direct_spatial_mv_pred){
1074 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1076 /* ref = min(neighbors) */
1077 for(list=0; list<2; list++){
1078 int refa = h->ref_cache[list][scan8[0] - 1];
1079 int refb = h->ref_cache[list][scan8[0] - 8];
1080 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1082 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1084 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1086 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1092 if(ref[0] < 0 && ref[1] < 0){
1093 ref[0] = ref[1] = 0;
1094 mv[0][0] = mv[0][1] =
1095 mv[1][0] = mv[1][1] = 0;
1097 for(list=0; list<2; list++){
1099 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1101 mv[list][0] = mv[list][1] = 0;
1106 *mb_type &= ~MB_TYPE_P0L1;
1107 sub_mb_type &= ~MB_TYPE_P0L1;
1108 }else if(ref[0] < 0){
1109 *mb_type &= ~MB_TYPE_P0L0;
1110 sub_mb_type &= ~MB_TYPE_P0L0;
1113 if(IS_16X16(*mb_type)){
1116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1117 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1118 if(!IS_INTRA(mb_type_col)
1119 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1120 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1121 && (h->x264_build>33 || !h->x264_build)))){
1123 a= pack16to32(mv[0][0],mv[0][1]);
1125 b= pack16to32(mv[1][0],mv[1][1]);
1127 a= pack16to32(mv[0][0],mv[0][1]);
1128 b= pack16to32(mv[1][0],mv[1][1]);
1130 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1131 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1133 for(i8=0; i8<4; i8++){
1134 const int x8 = i8&1;
1135 const int y8 = i8>>1;
1137 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1139 h->sub_mb_type[i8] = sub_mb_type;
1141 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1142 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1143 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1144 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1147 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1148 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1149 && (h->x264_build>33 || !h->x264_build)))){
1150 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1151 if(IS_SUB_8X8(sub_mb_type)){
1152 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1153 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1155 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1160 for(i4=0; i4<4; i4++){
1161 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1162 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1164 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1166 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1172 }else{ /* direct temporal mv pred */
1173 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1174 const int *dist_scale_factor = h->dist_scale_factor;
1177 if(IS_INTERLACED(*mb_type)){
1178 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1179 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1180 dist_scale_factor = h->dist_scale_factor_field;
1182 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1183 /* FIXME assumes direct_8x8_inference == 1 */
1184 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1185 int mb_types_col[2];
1188 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1189 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1190 | (*mb_type & MB_TYPE_INTERLACED);
1191 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1193 if(IS_INTERLACED(*mb_type)){
1194 /* frame to field scaling */
1195 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1198 l1ref0 -= 2*h->b8_stride;
1199 l1ref1 -= 2*h->b8_stride;
1200 l1mv0 -= 4*h->b_stride;
1201 l1mv1 -= 4*h->b_stride;
1205 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1206 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1208 *mb_type |= MB_TYPE_16x8;
1210 *mb_type |= MB_TYPE_8x8;
1212 /* field to frame scaling */
1213 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1214 * but in MBAFF, top and bottom POC are equal */
1215 int dy = (s->mb_y&1) ? 1 : 2;
1217 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1218 l1ref0 += dy*h->b8_stride;
1219 l1ref1 += dy*h->b8_stride;
1220 l1mv0 += 2*dy*h->b_stride;
1221 l1mv1 += 2*dy*h->b_stride;
1224 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1226 *mb_type |= MB_TYPE_16x16;
1228 *mb_type |= MB_TYPE_8x8;
1231 for(i8=0; i8<4; i8++){
1232 const int x8 = i8&1;
1233 const int y8 = i8>>1;
1235 const int16_t (*l1mv)[2]= l1mv0;
1237 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1239 h->sub_mb_type[i8] = sub_mb_type;
1241 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1242 if(IS_INTRA(mb_types_col[y8])){
1243 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1244 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1245 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1249 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1251 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1253 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1256 scale = dist_scale_factor[ref0];
1257 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1260 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1261 int my_col = (mv_col[1]<<y_shift)/2;
1262 int mx = (scale * mv_col[0] + 128) >> 8;
1263 int my = (scale * my_col + 128) >> 8;
1264 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1265 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1272 /* one-to-one mv scaling */
1274 if(IS_16X16(*mb_type)){
1277 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col)){
1281 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1282 : map_col_to_list0[1][l1ref1[0]];
1283 const int scale = dist_scale_factor[ref0];
1284 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1286 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1287 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1289 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1290 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1292 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1293 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1294 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1296 for(i8=0; i8<4; i8++){
1297 const int x8 = i8&1;
1298 const int y8 = i8>>1;
1300 const int16_t (*l1mv)[2]= l1mv0;
1302 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1304 h->sub_mb_type[i8] = sub_mb_type;
1305 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1306 if(IS_INTRA(mb_type_col)){
1307 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1308 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1309 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1313 ref0 = l1ref0[x8 + y8*h->b8_stride];
1315 ref0 = map_col_to_list0[0][ref0];
1317 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1320 scale = dist_scale_factor[ref0];
1322 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1323 if(IS_SUB_8X8(sub_mb_type)){
1324 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1325 int mx = (scale * mv_col[0] + 128) >> 8;
1326 int my = (scale * mv_col[1] + 128) >> 8;
1327 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1328 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1330 for(i4=0; i4<4; i4++){
1331 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1332 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1333 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1334 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1335 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1336 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1343 static inline void write_back_motion(H264Context *h, int mb_type){
1344 MpegEncContext * const s = &h->s;
1345 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1346 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1349 if(!USES_LIST(mb_type, 0))
1350 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1352 for(list=0; list<h->list_count; list++){
1354 if(!USES_LIST(mb_type, list))
1358 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1359 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1361 if( h->pps.cabac ) {
1362 if(IS_SKIP(mb_type))
1363 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1366 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1367 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1372 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1373 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1374 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1375 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1376 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1380 if(h->slice_type == B_TYPE && h->pps.cabac){
1381 if(IS_8X8(mb_type)){
1382 uint8_t *direct_table = &h->direct_table[b8_xy];
1383 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1384 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1385 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1391 * Decodes a network abstraction layer unit.
1392 * @param consumed is the number of bytes used as input
1393 * @param length is the length of the array
1394 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1395 * @returns decoded bytes, might be src+1 if no escapes
1397 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1402 // src[0]&0x80; //forbidden bit
1403 h->nal_ref_idc= src[0]>>5;
1404 h->nal_unit_type= src[0]&0x1F;
1408 for(i=0; i<length; i++)
1409 printf("%2X ", src[i]);
1411 for(i=0; i+1<length; i+=2){
1412 if(src[i]) continue;
1413 if(i>0 && src[i-1]==0) i--;
1414 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1416 /* startcode, so we must be past the end */
1423 if(i>=length-1){ //no escaped 0
1424 *dst_length= length;
1425 *consumed= length+1; //+1 for the header
1429 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1430 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1431 dst= h->rbsp_buffer[bufidx];
1437 //printf("decoding esc\n");
1440 //remove escapes (very rare 1:2^22)
1441 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1442 if(src[si+2]==3){ //escape
1447 }else //next start code
1451 dst[di++]= src[si++];
1455 *consumed= si + 1;//+1 for the header
1456 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1461 * identifies the exact end of the bitstream
1462 * @return the length of the trailing, or 0 if damaged
1464 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1468 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1478 * idct tranforms the 16 dc values and dequantize them.
1479 * @param qp quantization parameter
1481 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1484 int temp[16]; //FIXME check if this is a good idea
1485 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1486 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1488 //memset(block, 64, 2*256);
1491 const int offset= y_offset[i];
1492 const int z0= block[offset+stride*0] + block[offset+stride*4];
1493 const int z1= block[offset+stride*0] - block[offset+stride*4];
1494 const int z2= block[offset+stride*1] - block[offset+stride*5];
1495 const int z3= block[offset+stride*1] + block[offset+stride*5];
1504 const int offset= x_offset[i];
1505 const int z0= temp[4*0+i] + temp[4*2+i];
1506 const int z1= temp[4*0+i] - temp[4*2+i];
1507 const int z2= temp[4*1+i] - temp[4*3+i];
1508 const int z3= temp[4*1+i] + temp[4*3+i];
1510 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1511 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1512 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1513 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1519 * dct tranforms the 16 dc values.
1520 * @param qp quantization parameter ??? FIXME
1522 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1523 // const int qmul= dequant_coeff[qp][0];
1525 int temp[16]; //FIXME check if this is a good idea
1526 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1527 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1530 const int offset= y_offset[i];
1531 const int z0= block[offset+stride*0] + block[offset+stride*4];
1532 const int z1= block[offset+stride*0] - block[offset+stride*4];
1533 const int z2= block[offset+stride*1] - block[offset+stride*5];
1534 const int z3= block[offset+stride*1] + block[offset+stride*5];
1543 const int offset= x_offset[i];
1544 const int z0= temp[4*0+i] + temp[4*2+i];
1545 const int z1= temp[4*0+i] - temp[4*2+i];
1546 const int z2= temp[4*1+i] - temp[4*3+i];
1547 const int z3= temp[4*1+i] + temp[4*3+i];
1549 block[stride*0 +offset]= (z0 + z3)>>1;
1550 block[stride*2 +offset]= (z1 + z2)>>1;
1551 block[stride*8 +offset]= (z1 - z2)>>1;
1552 block[stride*10+offset]= (z0 - z3)>>1;
1560 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1561 const int stride= 16*2;
1562 const int xStride= 16;
1565 a= block[stride*0 + xStride*0];
1566 b= block[stride*0 + xStride*1];
1567 c= block[stride*1 + xStride*0];
1568 d= block[stride*1 + xStride*1];
1575 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1576 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1577 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1578 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1582 static void chroma_dc_dct_c(DCTELEM *block){
1583 const int stride= 16*2;
1584 const int xStride= 16;
1587 a= block[stride*0 + xStride*0];
1588 b= block[stride*0 + xStride*1];
1589 c= block[stride*1 + xStride*0];
1590 d= block[stride*1 + xStride*1];
1597 block[stride*0 + xStride*0]= (a+c);
1598 block[stride*0 + xStride*1]= (e+b);
1599 block[stride*1 + xStride*0]= (a-c);
1600 block[stride*1 + xStride*1]= (e-b);
1605 * gets the chroma qp.
1607 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1608 return h->pps.chroma_qp_table[t][qscale & 0xff];
1611 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1612 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1613 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1615 const int * const quant_table= quant_coeff[qscale];
1616 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1617 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1618 const unsigned int threshold2= (threshold1<<1);
1624 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1625 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1626 const unsigned int dc_threshold2= (dc_threshold1<<1);
1628 int level= block[0]*quant_coeff[qscale+18][0];
1629 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1631 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1634 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1637 // last_non_zero = i;
1642 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1643 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1644 const unsigned int dc_threshold2= (dc_threshold1<<1);
1646 int level= block[0]*quant_table[0];
1647 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1649 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1652 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1655 // last_non_zero = i;
1668 const int j= scantable[i];
1669 int level= block[j]*quant_table[j];
1671 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1672 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1673 if(((unsigned)(level+threshold1))>threshold2){
1675 level= (bias + level)>>QUANT_SHIFT;
1678 level= (bias - level)>>QUANT_SHIFT;
1687 return last_non_zero;
1690 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1691 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1692 int src_x_offset, int src_y_offset,
1693 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1694 MpegEncContext * const s = &h->s;
1695 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1696 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1697 const int luma_xy= (mx&3) + ((my&3)<<2);
1698 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1699 uint8_t * src_cb, * src_cr;
1700 int extra_width= h->emu_edge_width;
1701 int extra_height= h->emu_edge_height;
1703 const int full_mx= mx>>2;
1704 const int full_my= my>>2;
1705 const int pic_width = 16*s->mb_width;
1706 const int pic_height = 16*s->mb_height >> (MB_MBAFF || FIELD_PICTURE);
1708 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1711 if(mx&7) extra_width -= 3;
1712 if(my&7) extra_height -= 3;
1714 if( full_mx < 0-extra_width
1715 || full_my < 0-extra_height
1716 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1717 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1719 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1723 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1725 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1728 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1730 if(MB_MBAFF || FIELD_PICTURE){
1731 // chroma offset when predicting from a field of opposite parity
1732 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
1733 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1735 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1736 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1739 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1740 src_cb= s->edge_emu_buffer;
1742 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1745 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1746 src_cr= s->edge_emu_buffer;
1748 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1751 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1752 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1753 int x_offset, int y_offset,
1754 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1755 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1756 int list0, int list1){
1757 MpegEncContext * const s = &h->s;
1758 qpel_mc_func *qpix_op= qpix_put;
1759 h264_chroma_mc_func chroma_op= chroma_put;
1761 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1762 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1763 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1764 x_offset += 8*s->mb_x;
1765 y_offset += 8*(s->mb_y >> (MB_MBAFF || FIELD_PICTURE));
1768 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1769 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1770 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1771 qpix_op, chroma_op);
1774 chroma_op= chroma_avg;
1778 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1779 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1780 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1781 qpix_op, chroma_op);
1785 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1786 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 int x_offset, int y_offset,
1788 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1789 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1790 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1791 int list0, int list1){
1792 MpegEncContext * const s = &h->s;
1794 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1795 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1796 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1797 x_offset += 8*s->mb_x;
1798 y_offset += 8*(s->mb_y >> (MB_MBAFF || FIELD_PICTURE));
1801 /* don't optimize for luma-only case, since B-frames usually
1802 * use implicit weights => chroma too. */
1803 uint8_t *tmp_cb = s->obmc_scratchpad;
1804 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1805 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1806 int refn0 = h->ref_cache[0][ scan8[n] ];
1807 int refn1 = h->ref_cache[1][ scan8[n] ];
1809 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1810 dest_y, dest_cb, dest_cr,
1811 x_offset, y_offset, qpix_put, chroma_put);
1812 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1813 tmp_y, tmp_cb, tmp_cr,
1814 x_offset, y_offset, qpix_put, chroma_put);
1816 if(h->use_weight == 2){
1817 int weight0 = h->implicit_weight[refn0][refn1];
1818 int weight1 = 64 - weight0;
1819 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1820 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1821 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1823 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1824 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1825 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1826 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1827 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1828 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1829 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1831 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1834 int list = list1 ? 1 : 0;
1835 int refn = h->ref_cache[list][ scan8[n] ];
1836 Picture *ref= &h->ref_list[list][refn];
1837 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1838 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put, chroma_put);
1841 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1842 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1843 if(h->use_weight_chroma){
1844 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1845 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1846 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1847 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1852 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1853 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1854 int x_offset, int y_offset,
1855 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1856 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1857 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1858 int list0, int list1){
1859 if((h->use_weight==2 && list0 && list1
1860 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1861 || h->use_weight==1)
1862 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1863 x_offset, y_offset, qpix_put, chroma_put,
1864 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1866 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1867 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1870 static inline void prefetch_motion(H264Context *h, int list){
1871 /* fetch pixels for estimated mv 4 macroblocks ahead
1872 * optimized for 64byte cache lines */
1873 MpegEncContext * const s = &h->s;
1874 const int refn = h->ref_cache[list][scan8[0]];
1876 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1877 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1878 uint8_t **src= h->ref_list[list][refn].data;
1879 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1880 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1881 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1882 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1886 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1887 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1888 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1889 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1890 MpegEncContext * const s = &h->s;
1891 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1892 const int mb_type= s->current_picture.mb_type[mb_xy];
1894 assert(IS_INTER(mb_type));
1896 prefetch_motion(h, 0);
1898 if(IS_16X16(mb_type)){
1899 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1900 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1901 &weight_op[0], &weight_avg[0],
1902 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1903 }else if(IS_16X8(mb_type)){
1904 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1905 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1906 &weight_op[1], &weight_avg[1],
1907 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1908 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1909 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1910 &weight_op[1], &weight_avg[1],
1911 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1912 }else if(IS_8X16(mb_type)){
1913 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1914 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1915 &weight_op[2], &weight_avg[2],
1916 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1917 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1918 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1919 &weight_op[2], &weight_avg[2],
1920 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1924 assert(IS_8X8(mb_type));
1927 const int sub_mb_type= h->sub_mb_type[i];
1929 int x_offset= (i&1)<<2;
1930 int y_offset= (i&2)<<1;
1932 if(IS_SUB_8X8(sub_mb_type)){
1933 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1934 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1935 &weight_op[3], &weight_avg[3],
1936 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1937 }else if(IS_SUB_8X4(sub_mb_type)){
1938 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1939 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1940 &weight_op[4], &weight_avg[4],
1941 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1942 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1943 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1944 &weight_op[4], &weight_avg[4],
1945 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1946 }else if(IS_SUB_4X8(sub_mb_type)){
1947 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1948 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1949 &weight_op[5], &weight_avg[5],
1950 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1951 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1952 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1953 &weight_op[5], &weight_avg[5],
1954 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1957 assert(IS_SUB_4X4(sub_mb_type));
1959 int sub_x_offset= x_offset + 2*(j&1);
1960 int sub_y_offset= y_offset + (j&2);
1961 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1962 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1963 &weight_op[6], &weight_avg[6],
1964 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1970 prefetch_motion(h, 1);
1973 static void decode_init_vlc(void){
1974 static int done = 0;
1980 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1981 &chroma_dc_coeff_token_len [0], 1, 1,
1982 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1985 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1986 &coeff_token_len [i][0], 1, 1,
1987 &coeff_token_bits[i][0], 1, 1, 1);
1991 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1992 &chroma_dc_total_zeros_len [i][0], 1, 1,
1993 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1995 for(i=0; i<15; i++){
1996 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1997 &total_zeros_len [i][0], 1, 1,
1998 &total_zeros_bits[i][0], 1, 1, 1);
2002 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2003 &run_len [i][0], 1, 1,
2004 &run_bits[i][0], 1, 1, 1);
2006 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2007 &run_len [6][0], 1, 1,
2008 &run_bits[6][0], 1, 1, 1);
2012 static void free_tables(H264Context *h){
2015 av_freep(&h->intra4x4_pred_mode);
2016 av_freep(&h->chroma_pred_mode_table);
2017 av_freep(&h->cbp_table);
2018 av_freep(&h->mvd_table[0]);
2019 av_freep(&h->mvd_table[1]);
2020 av_freep(&h->direct_table);
2021 av_freep(&h->non_zero_count);
2022 av_freep(&h->slice_table_base);
2023 h->slice_table= NULL;
2025 av_freep(&h->mb2b_xy);
2026 av_freep(&h->mb2b8_xy);
2028 for(i = 0; i < MAX_SPS_COUNT; i++)
2029 av_freep(h->sps_buffers + i);
2031 for(i = 0; i < MAX_PPS_COUNT; i++)
2032 av_freep(h->pps_buffers + i);
2034 for(i = 0; i < h->s.avctx->thread_count; i++) {
2035 hx = h->thread_context[i];
2037 av_freep(&hx->top_borders[1]);
2038 av_freep(&hx->top_borders[0]);
2039 av_freep(&hx->s.obmc_scratchpad);
2040 av_freep(&hx->s.allocated_edge_emu_buffer);
2044 static void init_dequant8_coeff_table(H264Context *h){
2046 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2047 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2048 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2050 for(i=0; i<2; i++ ){
2051 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2052 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2056 for(q=0; q<52; q++){
2057 int shift = ff_div6[q];
2058 int idx = ff_rem6[q];
2060 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2061 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2062 h->pps.scaling_matrix8[i][x]) << shift;
2067 static void init_dequant4_coeff_table(H264Context *h){
2069 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2070 for(i=0; i<6; i++ ){
2071 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2073 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2074 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2081 for(q=0; q<52; q++){
2082 int shift = ff_div6[q] + 2;
2083 int idx = ff_rem6[q];
2085 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2086 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2087 h->pps.scaling_matrix4[i][x]) << shift;
2092 static void init_dequant_tables(H264Context *h){
2094 init_dequant4_coeff_table(h);
2095 if(h->pps.transform_8x8_mode)
2096 init_dequant8_coeff_table(h);
2097 if(h->sps.transform_bypass){
2100 h->dequant4_coeff[i][0][x] = 1<<6;
2101 if(h->pps.transform_8x8_mode)
2104 h->dequant8_coeff[i][0][x] = 1<<6;
2111 * needs width/height
2113 static int alloc_tables(H264Context *h){
2114 MpegEncContext * const s = &h->s;
2115 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2118 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2120 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2121 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2124 if( h->pps.cabac ) {
2125 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2126 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2127 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
2138 const int mb_xy= x + y*s->mb_stride;
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2147 s->obmc_scratchpad = NULL;
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2159 * Mimic alloc_tables(), but for every context thread.
2161 static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2179 * Allocate buffers which are not shared amongst multiple threads.
2181 static int context_init(H264Context *h){
2182 MpegEncContext * const s = &h->s;
2184 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2185 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2187 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
2188 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
2189 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
2190 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
2193 return -1; // free_tables will clean up for us
2196 static void common_init(H264Context *h){
2197 MpegEncContext * const s = &h->s;
2199 s->width = s->avctx->width;
2200 s->height = s->avctx->height;
2201 s->codec_id= s->avctx->codec->id;
2203 ff_h264_pred_init(&h->hpc, s->codec_id);
2205 h->dequant_coeff_pps= -1;
2206 s->unrestricted_mv=1;
2207 s->decode=1; //FIXME
2209 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2210 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2213 static int decode_init(AVCodecContext *avctx){
2214 H264Context *h= avctx->priv_data;
2215 MpegEncContext * const s = &h->s;
2217 MPV_decode_defaults(s);
2222 s->out_format = FMT_H264;
2223 s->workaround_bugs= avctx->workaround_bugs;
2226 // s->decode_mb= ff_h263_decode_mb;
2227 s->quarter_sample = 1;
2229 avctx->pix_fmt= PIX_FMT_YUV420P;
2233 if(avctx->extradata_size > 0 && avctx->extradata &&
2234 *(char *)avctx->extradata == 1){
2241 h->thread_context[0] = h;
2245 static int frame_start(H264Context *h){
2246 MpegEncContext * const s = &h->s;
2249 if(MPV_frame_start(s, s->avctx) < 0)
2251 ff_er_frame_start(s);
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
2255 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2256 * See decode_nal_units().
2258 s->current_picture_ptr->key_frame= 0;
2260 assert(s->linesize && s->uvlinesize);
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2279 /* some macroblocks will be accessed before they're available */
2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2283 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2287 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2288 MpegEncContext * const s = &h->s;
2292 src_cb -= uvlinesize;
2293 src_cr -= uvlinesize;
2295 // There are two lines saved, the line above the the top macroblock of a pair,
2296 // and the line above the bottom macroblock
2297 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2298 for(i=1; i<17; i++){
2299 h->left_border[i]= src_y[15+i* linesize];
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2305 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2306 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2307 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2309 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2310 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2312 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2313 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2317 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2318 MpegEncContext * const s = &h->s;
2325 if(h->deblocking_filter == 2) {
2326 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2327 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2328 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2330 deblock_left = (s->mb_x > 0);
2331 deblock_top = (s->mb_y > 0);
2334 src_y -= linesize + 1;
2335 src_cb -= uvlinesize + 1;
2336 src_cr -= uvlinesize + 1;
2338 #define XCHG(a,b,t,xchg)\
2345 for(i = !deblock_top; i<17; i++){
2346 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2351 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2352 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2353 if(s->mb_x+1 < s->mb_width){
2354 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2358 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2360 for(i = !deblock_top; i<9; i++){
2361 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2362 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2366 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2367 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2372 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2373 MpegEncContext * const s = &h->s;
2376 src_y -= 2 * linesize;
2377 src_cb -= 2 * uvlinesize;
2378 src_cr -= 2 * uvlinesize;
2380 // There are two lines saved, the line above the the top macroblock of a pair,
2381 // and the line above the bottom macroblock
2382 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2383 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2384 for(i=2; i<34; i++){
2385 h->left_border[i]= src_y[15+i* linesize];
2388 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2389 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2390 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2391 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2393 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2394 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2395 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2396 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2397 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2398 for(i=2; i<18; i++){
2399 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2400 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2402 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2403 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2404 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2405 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2409 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2410 MpegEncContext * const s = &h->s;
2413 int deblock_left = (s->mb_x > 0);
2414 int deblock_top = (s->mb_y > 1);
2416 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2418 src_y -= 2 * linesize + 1;
2419 src_cb -= 2 * uvlinesize + 1;
2420 src_cr -= 2 * uvlinesize + 1;
2422 #define XCHG(a,b,t,xchg)\
2429 for(i = (!deblock_top)<<1; i<34; i++){
2430 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2435 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2439 if(s->mb_x+1 < s->mb_width){
2440 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2441 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2445 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2447 for(i = (!deblock_top) << 1; i<18; i++){
2448 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2449 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2453 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2454 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2455 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2456 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2461 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2462 MpegEncContext * const s = &h->s;
2463 const int mb_x= s->mb_x;
2464 const int mb_y= s->mb_y;
2465 const int mb_xy= mb_x + mb_y*s->mb_stride;
2466 const int mb_type= s->current_picture.mb_type[mb_xy];
2467 uint8_t *dest_y, *dest_cb, *dest_cr;
2468 int linesize, uvlinesize /*dct_offset*/;
2470 int *block_offset = &h->block_offset[0];
2471 const unsigned int bottom = mb_y & 1;
2472 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2473 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2474 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2476 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2477 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2478 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2480 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2481 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2483 if (!simple && MB_FIELD) {
2484 linesize = h->mb_linesize = s->linesize * 2;
2485 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2486 block_offset = &h->block_offset[24];
2487 if(mb_y&1){ //FIXME move out of this func?
2488 dest_y -= s->linesize*15;
2489 dest_cb-= s->uvlinesize*7;
2490 dest_cr-= s->uvlinesize*7;
2494 for(list=0; list<h->list_count; list++){
2495 if(!USES_LIST(mb_type, list))
2497 if(IS_16X16(mb_type)){
2498 int8_t *ref = &h->ref_cache[list][scan8[0]];
2499 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
2501 for(i=0; i<16; i+=4){
2502 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2503 int ref = h->ref_cache[list][scan8[i]];
2505 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
2511 linesize = h->mb_linesize = s->linesize;
2512 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2513 // dct_offset = s->linesize * 16;
2516 if(transform_bypass){
2518 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2519 }else if(IS_8x8DCT(mb_type)){
2520 idct_dc_add = s->dsp.h264_idct8_dc_add;
2521 idct_add = s->dsp.h264_idct8_add;
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2527 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2528 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2529 int mbt_y = mb_y&~1;
2530 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2531 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2532 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2533 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2536 if (!simple && IS_INTRA_PCM(mb_type)) {
2539 // The pixels are stored in h->mb array in the same order as levels,
2540 // copy them in output in the correct order.
2541 for(i=0; i<16; i++) {
2542 for (y=0; y<4; y++) {
2543 for (x=0; x<4; x++) {
2544 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2548 for(i=16; i<16+4; i++) {
2549 for (y=0; y<4; y++) {
2550 for (x=0; x<4; x++) {
2551 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2555 for(i=20; i<20+4; i++) {
2556 for (y=0; y<4; y++) {
2557 for (x=0; x<4; x++) {
2558 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2563 if(IS_INTRA(mb_type)){
2564 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2565 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2567 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2568 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2569 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2572 if(IS_INTRA4x4(mb_type)){
2573 if(simple || !s->encoding){
2574 if(IS_8x8DCT(mb_type)){
2575 for(i=0; i<16; i+=4){
2576 uint8_t * const ptr= dest_y + block_offset[i];
2577 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2579 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2580 (h->topright_samples_available<<i)&0x4000, linesize);
2582 if(nnz == 1 && h->mb[i*16])
2583 idct_dc_add(ptr, h->mb + i*16, linesize);
2585 idct_add(ptr, h->mb + i*16, linesize);
2589 for(i=0; i<16; i++){
2590 uint8_t * const ptr= dest_y + block_offset[i];
2592 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2595 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2596 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2597 assert(mb_y || linesize <= block_offset[i]);
2598 if(!topright_avail){
2599 tr= ptr[3 - linesize]*0x01010101;
2600 topright= (uint8_t*) &tr;
2602 topright= ptr + 4 - linesize;
2606 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2607 nnz = h->non_zero_count_cache[ scan8[i] ];
2610 if(nnz == 1 && h->mb[i*16])
2611 idct_dc_add(ptr, h->mb + i*16, linesize);
2613 idct_add(ptr, h->mb + i*16, linesize);
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2620 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2622 if(!transform_bypass)
2623 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2625 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2627 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2628 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2630 hl_motion(h, dest_y, dest_cb, dest_cr,
2631 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2632 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2633 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2637 if(!IS_INTRA4x4(mb_type)){
2639 if(IS_INTRA16x16(mb_type)){
2640 for(i=0; i<16; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2647 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2648 for(i=0; i<16; i+=di){
2649 int nnz = h->non_zero_count_cache[ scan8[i] ];
2651 if(nnz==1 && h->mb[i*16])
2652 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2654 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2659 for(i=0; i<16; i++){
2660 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2661 uint8_t * const ptr= dest_y + block_offset[i];
2662 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2668 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2669 uint8_t *dest[2] = {dest_cb, dest_cr};
2670 if(transform_bypass){
2671 idct_add = idct_dc_add = s->dsp.add_pixels4;
2673 idct_add = s->dsp.h264_idct_add;
2674 idct_dc_add = s->dsp.h264_idct_dc_add;
2675 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2676 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2679 for(i=16; i<16+8; i++){
2680 if(h->non_zero_count_cache[ scan8[i] ])
2681 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2682 else if(h->mb[i*16])
2683 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2686 for(i=16; i<16+8; i++){
2687 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2688 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2689 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2695 if(h->deblocking_filter) {
2696 if (!simple && FRAME_MBAFF) {
2697 //FIXME try deblocking one mb at a time?
2698 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2699 const int mb_y = s->mb_y - 1;
2700 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2701 const int mb_xy= mb_x + mb_y*s->mb_stride;
2702 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2703 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2704 if (!bottom) return;
2705 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2706 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2707 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2709 if(IS_INTRA(mb_type_top | mb_type_bottom))
2710 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2712 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2716 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2717 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2718 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2719 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2720 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2723 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2724 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2725 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2726 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2727 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2729 tprintf(h->s.avctx, "call filter_mb\n");
2730 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2731 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2732 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2738 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2740 static void hl_decode_mb_simple(H264Context *h){
2741 hl_decode_mb_internal(h, 1);
2745 * Process a macroblock; this handles edge cases, such as interlacing.
2747 static void av_noinline hl_decode_mb_complex(H264Context *h){
2748 hl_decode_mb_internal(h, 0);
2751 static void hl_decode_mb(H264Context *h){
2752 MpegEncContext * const s = &h->s;
2753 const int mb_x= s->mb_x;
2754 const int mb_y= s->mb_y;
2755 const int mb_xy= mb_x + mb_y*s->mb_stride;
2756 const int mb_type= s->current_picture.mb_type[mb_xy];
2757 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2763 hl_decode_mb_complex(h);
2764 else hl_decode_mb_simple(h);
2767 static void pic_as_field(Picture *pic, const int bottom){
2769 for (i = 0; i < 4; ++i) {
2771 pic->data[i] += pic->linesize[i];
2772 pic->linesize[i] *= 2;
2776 static int split_field_copy(Picture *dest, Picture *src,
2777 int parity, int id_add){
2778 int match = !!(src->reference & parity);
2782 pic_as_field(dest, parity == PICT_BOTTOM_FIELD);
2784 dest->pic_id += id_add;
2791 * Split one reference list into field parts, interleaving by parity
2792 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2793 * set to look at the actual start of data for that field.
2795 * @param dest output list
2796 * @param dest_len maximum number of fields to put in dest
2797 * @param src the source reference list containing fields and/or field pairs
2798 * (aka short_ref/long_ref, or
2799 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2800 * @param src_len number of Picture's in source (pairs and unmatched fields)
2801 * @param parity the parity of the picture being decoded/needing
2802 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2803 * @return number of fields placed in dest
2805 static int split_field_half_ref_list(Picture *dest, int dest_len,
2806 Picture *src, int src_len, int parity){
2807 int same_parity = 1;
2813 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2814 if (same_parity && same_i < src_len) {
2815 field_output = split_field_copy(dest + out_i, src + same_i,
2817 same_parity = !field_output;
2820 } else if (opp_i < src_len) {
2821 field_output = split_field_copy(dest + out_i, src + opp_i,
2822 PICT_FRAME - parity, 0);
2823 same_parity = field_output;
2835 * Split the reference frame list into a reference field list.
2836 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2837 * The input list contains both reference field pairs and
2838 * unmatched reference fields; it is ordered as spec describes
2839 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2840 * unmatched field pairs are also present. Conceptually this is equivalent
2841 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2843 * @param dest output reference list where ordered fields are to be placed
2844 * @param dest_len max number of fields to place at dest
2845 * @param src source reference list, as described above
2846 * @param src_len number of pictures (pairs and unmatched fields) in src
2847 * @param parity parity of field being currently decoded
2848 * (one of PICT_{TOP,BOTTOM}_FIELD)
2849 * @param long_i index into src array that holds first long reference picture,
2850 * or src_len if no long refs present.
2852 static int split_field_ref_list(Picture *dest, int dest_len,
2853 Picture *src, int src_len,
2854 int parity, int long_i){
2856 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2860 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2861 src_len - long_i, parity);
2866 * fills the default_ref_list.
2868 static int fill_default_ref_list(H264Context *h){
2869 MpegEncContext * const s = &h->s;
2871 int smallest_poc_greater_than_current = -1;
2873 Picture sorted_short_ref[32];
2874 Picture field_entry_list[2][32];
2875 Picture *frame_list[2];
2877 if (FIELD_PICTURE) {
2878 structure_sel = PICT_FRAME;
2879 frame_list[0] = field_entry_list[0];
2880 frame_list[1] = field_entry_list[1];
2883 frame_list[0] = h->default_ref_list[0];
2884 frame_list[1] = h->default_ref_list[1];
2887 if(h->slice_type==B_TYPE){
2894 /* sort frame according to poc in B slice */
2895 for(out_i=0; out_i<h->short_ref_count; out_i++){
2897 int best_poc=INT_MAX;
2899 for(i=0; i<h->short_ref_count; i++){
2900 const int poc= h->short_ref[i]->poc;
2901 if(poc > limit && poc < best_poc){
2907 assert(best_i != INT_MIN);
2910 sorted_short_ref[out_i]= *h->short_ref[best_i];
2911 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2912 if (-1 == smallest_poc_greater_than_current) {
2913 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2914 smallest_poc_greater_than_current = out_i;
2919 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2921 // find the largest poc
2922 for(list=0; list<2; list++){
2925 int step= list ? -1 : 1;
2927 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2929 while(j<0 || j>= h->short_ref_count){
2930 if(j != -99 && step == (list ? -1 : 1))
2933 j= smallest_poc_greater_than_current + (step>>1);
2935 sel = sorted_short_ref[j].reference | structure_sel;
2936 if(sel != PICT_FRAME) continue;
2937 frame_list[list][index ]= sorted_short_ref[j];
2938 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2940 short_len[list] = index;
2942 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2944 if(h->long_ref[i] == NULL) continue;
2945 sel = h->long_ref[i]->reference | structure_sel;
2946 if(sel != PICT_FRAME) continue;
2948 frame_list[ list ][index ]= *h->long_ref[i];
2949 frame_list[ list ][index++].pic_id= i;;
2953 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
2954 // swap the two first elements of L1 when
2955 // L0 and L1 are identical
2956 Picture temp= frame_list[1][0];
2957 frame_list[1][0] = frame_list[1][1];
2958 frame_list[1][1] = temp;
2963 for(list=0; list<2; list++){
2965 len[list] = split_field_ref_list(h->default_ref_list[list],
2969 s->picture_structure,
2972 if(len[list] < h->ref_count[ list ])
2973 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2980 for(i=0; i<h->short_ref_count; i++){
2982 sel = h->short_ref[i]->reference | structure_sel;
2983 if(sel != PICT_FRAME) continue;
2984 frame_list[0][index ]= *h->short_ref[i];
2985 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2988 for(i = 0; i < 16; i++){
2990 if(h->long_ref[i] == NULL) continue;
2991 sel = h->long_ref[i]->reference | structure_sel;
2992 if(sel != PICT_FRAME) continue;
2993 frame_list[0][index ]= *h->long_ref[i];
2994 frame_list[0][index++].pic_id= i;;
2998 index = split_field_ref_list(h->default_ref_list[0],
2999 h->ref_count[0], frame_list[0],
3000 index, s->picture_structure,
3003 if(index < h->ref_count[0])
3004 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3007 for (i=0; i<h->ref_count[0]; i++) {
3008 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3010 if(h->slice_type==B_TYPE){
3011 for (i=0; i<h->ref_count[1]; i++) {
3012 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3019 static void print_short_term(H264Context *h);
3020 static void print_long_term(H264Context *h);
3023 * Extract structure information about the picture described by pic_num in
3024 * the current decoding context (frame or field). Note that pic_num is
3025 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3026 * @param pic_num picture number for which to extract structure information
3027 * @param structure one of PICT_XXX describing structure of picture
3029 * @return frame number (short term) or long term index of picture
3030 * described by pic_num
3032 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3033 MpegEncContext * const s = &h->s;
3035 *structure = s->picture_structure;
3038 /* opposite field */
3039 *structure ^= PICT_FRAME;
3046 static int decode_ref_pic_list_reordering(H264Context *h){
3047 MpegEncContext * const s = &h->s;
3048 int list, index, pic_structure;
3050 print_short_term(h);
3052 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3054 for(list=0; list<h->list_count; list++){
3055 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3057 if(get_bits1(&s->gb)){
3058 int pred= h->curr_pic_num;
3060 for(index=0; ; index++){
3061 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3062 unsigned int pic_id;
3064 Picture *ref = NULL;
3066 if(reordering_of_pic_nums_idc==3)
3069 if(index >= h->ref_count[list]){
3070 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3074 if(reordering_of_pic_nums_idc<3){
3075 if(reordering_of_pic_nums_idc<2){
3076 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3079 if(abs_diff_pic_num >= h->max_pic_num){
3080 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3084 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3085 else pred+= abs_diff_pic_num;
3086 pred &= h->max_pic_num - 1;
3088 frame_num = pic_num_extract(h, pred, &pic_structure);
3090 for(i= h->short_ref_count-1; i>=0; i--){
3091 ref = h->short_ref[i];
3092 assert(ref->reference);
3093 assert(!ref->long_ref);
3094 if(ref->data[0] != NULL &&
3095 ref->frame_num == frame_num &&
3096 (ref->reference & pic_structure) &&
3097 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3104 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3106 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3109 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3112 ref = h->long_ref[long_idx];
3113 assert(!(ref && !ref->reference));
3114 if(ref && (ref->reference & pic_structure)){
3115 ref->pic_id= pic_id;
3116 assert(ref->long_ref);
3124 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3125 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3127 for(i=index; i+1<h->ref_count[list]; i++){
3128 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3131 for(; i > index; i--){
3132 h->ref_list[list][i]= h->ref_list[list][i-1];
3134 h->ref_list[list][index]= *ref;
3136 int bot = pic_structure == PICT_BOTTOM_FIELD;
3137 pic_as_field(&h->ref_list[list][index], bot);
3141 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3147 for(list=0; list<h->list_count; list++){
3148 for(index= 0; index < h->ref_count[list]; index++){
3149 if(!h->ref_list[list][index].data[0])
3150 h->ref_list[list][index]= s->current_picture;
3154 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3155 direct_dist_scale_factor(h);
3156 direct_ref_list_init(h);
3160 static void fill_mbaff_ref_list(H264Context *h){
3162 for(list=0; list<2; list++){ //FIXME try list_count
3163 for(i=0; i<h->ref_count[list]; i++){
3164 Picture *frame = &h->ref_list[list][i];
3165 Picture *field = &h->ref_list[list][16+2*i];
3168 field[0].linesize[j] <<= 1;
3169 field[1] = field[0];
3171 field[1].data[j] += frame->linesize[j];
3173 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3174 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3176 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3177 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3181 for(j=0; j<h->ref_count[1]; j++){
3182 for(i=0; i<h->ref_count[0]; i++)
3183 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3184 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3185 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3189 static int pred_weight_table(H264Context *h){
3190 MpegEncContext * const s = &h->s;
3192 int luma_def, chroma_def;
3195 h->use_weight_chroma= 0;
3196 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3197 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3198 luma_def = 1<<h->luma_log2_weight_denom;
3199 chroma_def = 1<<h->chroma_log2_weight_denom;
3201 for(list=0; list<2; list++){
3202 for(i=0; i<h->ref_count[list]; i++){
3203 int luma_weight_flag, chroma_weight_flag;
3205 luma_weight_flag= get_bits1(&s->gb);
3206 if(luma_weight_flag){
3207 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3208 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3209 if( h->luma_weight[list][i] != luma_def
3210 || h->luma_offset[list][i] != 0)
3213 h->luma_weight[list][i]= luma_def;
3214 h->luma_offset[list][i]= 0;
3217 chroma_weight_flag= get_bits1(&s->gb);
3218 if(chroma_weight_flag){
3221 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3222 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3223 if( h->chroma_weight[list][i][j] != chroma_def
3224 || h->chroma_offset[list][i][j] != 0)
3225 h->use_weight_chroma= 1;
3230 h->chroma_weight[list][i][j]= chroma_def;
3231 h->chroma_offset[list][i][j]= 0;
3235 if(h->slice_type != B_TYPE) break;
3237 h->use_weight= h->use_weight || h->use_weight_chroma;
3241 static void implicit_weight_table(H264Context *h){
3242 MpegEncContext * const s = &h->s;
3244 int cur_poc = s->current_picture_ptr->poc;
3246 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3247 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3249 h->use_weight_chroma= 0;
3254 h->use_weight_chroma= 2;
3255 h->luma_log2_weight_denom= 5;
3256 h->chroma_log2_weight_denom= 5;
3258 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3259 int poc0 = h->ref_list[0][ref0].poc;
3260 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3261 int poc1 = h->ref_list[1][ref1].poc;
3262 int td = av_clip(poc1 - poc0, -128, 127);
3264 int tb = av_clip(cur_poc - poc0, -128, 127);
3265 int tx = (16384 + (FFABS(td) >> 1)) / td;
3266 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3267 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3268 h->implicit_weight[ref0][ref1] = 32;
3270 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3272 h->implicit_weight[ref0][ref1] = 32;
3278 * Mark a picture as no longer needed for reference. The refmask
3279 * argument allows unreferencing of individual fields or the whole frame.
3280 * If the picture becomes entirely unreferenced, but is being held for
3281 * display purposes, it is marked as such.
3282 * @param refmask mask of fields to unreference; the mask is bitwise
3283 * anded with the reference marking of pic
3284 * @return non-zero if pic becomes entirely unreferenced (except possibly
3285 * for display purposes) zero if one of the fields remains in
3288 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3290 if (pic->reference &= refmask) {
3293 if(pic == h->delayed_output_pic)
3294 pic->reference=DELAYED_PIC_REF;
3296 for(i = 0; h->delayed_pic[i]; i++)
3297 if(pic == h->delayed_pic[i]){
3298 pic->reference=DELAYED_PIC_REF;
3307 * instantaneous decoder refresh.
3309 static void idr(H264Context *h){
3312 for(i=0; i<16; i++){
3313 if (h->long_ref[i] != NULL) {
3314 unreference_pic(h, h->long_ref[i], 0);
3315 h->long_ref[i]= NULL;
3318 h->long_ref_count=0;
3320 for(i=0; i<h->short_ref_count; i++){
3321 unreference_pic(h, h->short_ref[i], 0);
3322 h->short_ref[i]= NULL;
3324 h->short_ref_count=0;
3327 /* forget old pics after a seek */
3328 static void flush_dpb(AVCodecContext *avctx){
3329 H264Context *h= avctx->priv_data;
3331 for(i=0; i<16; i++) {
3332 if(h->delayed_pic[i])
3333 h->delayed_pic[i]->reference= 0;
3334 h->delayed_pic[i]= NULL;
3336 if(h->delayed_output_pic)
3337 h->delayed_output_pic->reference= 0;
3338 h->delayed_output_pic= NULL;
3340 if(h->s.current_picture_ptr)
3341 h->s.current_picture_ptr->reference= 0;
3345 * Find a Picture in the short term reference list by frame number.
3346 * @param frame_num frame number to search for
3347 * @param idx the index into h->short_ref where returned picture is found
3348 * undefined if no picture found.
3349 * @return pointer to the found picture, or NULL if no pic with the provided
3350 * frame number is found
3352 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3353 MpegEncContext * const s = &h->s;
3356 for(i=0; i<h->short_ref_count; i++){
3357 Picture *pic= h->short_ref[i];
3358 if(s->avctx->debug&FF_DEBUG_MMCO)
3359 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3360 if(pic->frame_num == frame_num) {
3369 * Remove a picture from the short term reference list by its index in
3370 * that list. This does no checking on the provided index; it is assumed
3371 * to be valid. Other list entries are shifted down.
3372 * @param i index into h->short_ref of picture to remove.
3374 static void remove_short_at_index(H264Context *h, int i){
3375 assert(i > 0 && i < h->short_ref_count);
3376 h->short_ref[i]= NULL;
3377 if (--h->short_ref_count)
3378 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3383 * @return the removed picture or NULL if an error occurs
3385 static Picture * remove_short(H264Context *h, int frame_num){
3386 MpegEncContext * const s = &h->s;
3390 if(s->avctx->debug&FF_DEBUG_MMCO)
3391 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3393 pic = find_short(h, frame_num, &i);
3395 remove_short_at_index(h, i);
3401 * Remove a picture from the long term reference list by its index in
3402 * that list. This does no checking on the provided index; it is assumed
3403 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3404 * @param i index into h->long_ref of picture to remove.
3406 static void remove_long_at_index(H264Context *h, int i){
3407 h->long_ref[i]= NULL;
3408 h->long_ref_count--;
3413 * @return the removed picture or NULL if an error occurs
3415 static Picture * remove_long(H264Context *h, int i){
3418 pic= h->long_ref[i];
3420 remove_long_at_index(h, i);
3426 * print short term list
3428 static void print_short_term(H264Context *h) {
3430 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3431 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3432 for(i=0; i<h->short_ref_count; i++){
3433 Picture *pic= h->short_ref[i];
3434 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3440 * print long term list
3442 static void print_long_term(H264Context *h) {
3444 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3445 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3446 for(i = 0; i < 16; i++){
3447 Picture *pic= h->long_ref[i];
3449 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3456 * Executes the reference picture marking (memory management control operations).
3458 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3459 MpegEncContext * const s = &h->s;
3461 int current_is_long=0;
3464 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3465 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3467 for(i=0; i<mmco_count; i++){
3468 if(s->avctx->debug&FF_DEBUG_MMCO)
3469 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3471 switch(mmco[i].opcode){
3472 case MMCO_SHORT2UNUSED:
3473 pic= remove_short(h, mmco[i].short_pic_num);
3475 unreference_pic(h, pic, 0);
3476 else if(s->avctx->debug&FF_DEBUG_MMCO)
3477 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3479 case MMCO_SHORT2LONG:
3480 pic= remove_long(h, mmco[i].long_arg);
3481 if(pic) unreference_pic(h, pic, 0);
3483 h->long_ref[ mmco[i].long_arg ]= remove_short(h, mmco[i].short_pic_num);
3484 if (h->long_ref[ mmco[i].long_arg ]){
3485 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3486 h->long_ref_count++;
3489 case MMCO_LONG2UNUSED:
3490 pic= remove_long(h, mmco[i].long_arg);
3492 unreference_pic(h, pic, 0);
3493 else if(s->avctx->debug&FF_DEBUG_MMCO)
3494 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3497 pic= remove_long(h, mmco[i].long_arg);
3498 if(pic) unreference_pic(h, pic, 0);
3500 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3501 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3502 h->long_ref_count++;
3506 case MMCO_SET_MAX_LONG:
3507 assert(mmco[i].long_arg <= 16);
3508 // just remove the long term which index is greater than new max
3509 for(j = mmco[i].long_arg; j<16; j++){
3510 pic = remove_long(h, j);
3511 if (pic) unreference_pic(h, pic, 0);
3515 while(h->short_ref_count){
3516 pic= remove_short(h, h->short_ref[0]->frame_num);
3517 if(pic) unreference_pic(h, pic, 0);
3519 for(j = 0; j < 16; j++) {
3520 pic= remove_long(h, j);
3521 if(pic) unreference_pic(h, pic, 0);
3528 if(!current_is_long){
3529 pic= remove_short(h, s->current_picture_ptr->frame_num);
3531 unreference_pic(h, pic, 0);
3532 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3535 if(h->short_ref_count)
3536 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3538 h->short_ref[0]= s->current_picture_ptr;
3539 h->short_ref[0]->long_ref=0;
3540 h->short_ref_count++;
3543 print_short_term(h);
3548 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3549 MpegEncContext * const s = &h->s;
3552 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3553 s->broken_link= get_bits1(gb) -1;
3554 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3555 if(h->mmco[0].long_arg == -1)
3558 h->mmco[0].opcode= MMCO_LONG;
3562 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3563 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3564 MMCOOpcode opcode= get_ue_golomb(gb);
3566 h->mmco[i].opcode= opcode;
3567 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3568 h->mmco[i].short_pic_num= (h->frame_num - get_ue_golomb(gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
3569 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3570 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3574 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3575 unsigned int long_arg= get_ue_golomb(gb);
3576 if(/*h->mmco[i].long_arg >= h->long_ref_count || h->long_ref[ h->mmco[i].long_arg ] == NULL*/ long_arg >= 16){
3577 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3580 h->mmco[i].long_arg= long_arg;
3583 if(opcode > (unsigned)MMCO_LONG){
3584 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3587 if(opcode == MMCO_END)
3592 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3594 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
3595 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3596 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3606 static int init_poc(H264Context *h){
3607 MpegEncContext * const s = &h->s;
3608 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3611 if(h->nal_unit_type == NAL_IDR_SLICE){
3612 h->frame_num_offset= 0;
3614 if(h->frame_num < h->prev_frame_num)
3615 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3617 h->frame_num_offset= h->prev_frame_num_offset;
3620 if(h->sps.poc_type==0){
3621 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3623 if(h->nal_unit_type == NAL_IDR_SLICE){
3628 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3629 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3630 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3631 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3633 h->poc_msb = h->prev_poc_msb;
3634 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3636 field_poc[1] = h->poc_msb + h->poc_lsb;
3637 if(s->picture_structure == PICT_FRAME)
3638 field_poc[1] += h->delta_poc_bottom;
3639 }else if(h->sps.poc_type==1){
3640 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3643 if(h->sps.poc_cycle_length != 0)
3644 abs_frame_num = h->frame_num_offset + h->frame_num;
3648 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3651 expected_delta_per_poc_cycle = 0;
3652 for(i=0; i < h->sps.poc_cycle_length; i++)
3653 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3655 if(abs_frame_num > 0){
3656 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3657 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3659 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3660 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3661 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3665 if(h->nal_ref_idc == 0)
3666 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3668 field_poc[0] = expectedpoc + h->delta_poc[0];
3669 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3671 if(s->picture_structure == PICT_FRAME)
3672 field_poc[1] += h->delta_poc[1];
3675 if(h->nal_unit_type == NAL_IDR_SLICE){
3678 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3679 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3685 if(s->picture_structure != PICT_BOTTOM_FIELD)
3686 s->current_picture_ptr->field_poc[0]= field_poc[0];
3687 if(s->picture_structure != PICT_TOP_FIELD)
3688 s->current_picture_ptr->field_poc[1]= field_poc[1];
3689 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
3690 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
3697 * initialize scan tables
3699 static void init_scan_tables(H264Context *h){
3700 MpegEncContext * const s = &h->s;
3702 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3703 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3704 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3706 for(i=0; i<16; i++){
3707 #define T(x) (x>>2) | ((x<<2) & 0xF)
3708 h->zigzag_scan[i] = T(zigzag_scan[i]);
3709 h-> field_scan[i] = T( field_scan[i]);
3713 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3714 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3715 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3716 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3717 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3719 for(i=0; i<64; i++){
3720 #define T(x) (x>>3) | ((x&7)<<3)
3721 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3722 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3723 h->field_scan8x8[i] = T(field_scan8x8[i]);
3724 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3728 if(h->sps.transform_bypass){ //FIXME same ugly
3729 h->zigzag_scan_q0 = zigzag_scan;
3730 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3731 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3732 h->field_scan_q0 = field_scan;
3733 h->field_scan8x8_q0 = field_scan8x8;
3734 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3736 h->zigzag_scan_q0 = h->zigzag_scan;
3737 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3738 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3739 h->field_scan_q0 = h->field_scan;
3740 h->field_scan8x8_q0 = h->field_scan8x8;
3741 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3746 * Replicates H264 "master" context to thread contexts.
3748 static void clone_slice(H264Context *dst, H264Context *src)
3750 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3751 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3752 dst->s.current_picture = src->s.current_picture;
3753 dst->s.linesize = src->s.linesize;
3754 dst->s.uvlinesize = src->s.uvlinesize;
3756 dst->prev_poc_msb = src->prev_poc_msb;
3757 dst->prev_poc_lsb = src->prev_poc_lsb;
3758 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3759 dst->prev_frame_num = src->prev_frame_num;
3760 dst->short_ref_count = src->short_ref_count;
3762 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3763 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3764 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3765 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3767 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3768 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3772 * decodes a slice header.
3773 * this will allso call MPV_common_init() and frame_start() as needed
3775 * @param h h264context
3776 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3778 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3780 static int decode_slice_header(H264Context *h, H264Context *h0){
3781 MpegEncContext * const s = &h->s;
3782 unsigned int first_mb_in_slice;
3783 unsigned int pps_id;
3784 int num_ref_idx_active_override_flag;
3785 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3786 unsigned int slice_type, tmp, i;
3787 int default_ref_list_done = 0;
3789 s->dropable= h->nal_ref_idc == 0;
3791 first_mb_in_slice= get_ue_golomb(&s->gb);
3793 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3794 h0->current_slice = 0;
3795 s->current_picture_ptr= NULL;
3798 slice_type= get_ue_golomb(&s->gb);
3800 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3805 h->slice_type_fixed=1;
3807 h->slice_type_fixed=0;
3809 slice_type= slice_type_map[ slice_type ];
3810 if (slice_type == I_TYPE
3811 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3812 default_ref_list_done = 1;
3814 h->slice_type= slice_type;
3816 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3818 pps_id= get_ue_golomb(&s->gb);
3819 if(pps_id>=MAX_PPS_COUNT){
3820 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3823 if(!h0->pps_buffers[pps_id]) {
3824 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3827 h->pps= *h0->pps_buffers[pps_id];
3829 if(!h0->sps_buffers[h->pps.sps_id]) {
3830 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3833 h->sps = *h0->sps_buffers[h->pps.sps_id];
3835 if(h == h0 && h->dequant_coeff_pps != pps_id){
3836 h->dequant_coeff_pps = pps_id;
3837 init_dequant_tables(h);
3840 s->mb_width= h->sps.mb_width;
3841 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3843 h->b_stride= s->mb_width*4;
3844 h->b8_stride= s->mb_width*2;
3846 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3847 if(h->sps.frame_mbs_only_flag)
3848 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3850 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3852 if (s->context_initialized
3853 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3855 return -1; // width / height changed during parallelized decoding
3859 if (!s->context_initialized) {
3861 return -1; // we cant (re-)initialize context during parallel decoding
3862 if (MPV_common_init(s) < 0)
3865 init_scan_tables(h);
3868 for(i = 1; i < s->avctx->thread_count; i++) {
3870 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3871 memcpy(c, h, sizeof(MpegEncContext));
3872 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3875 init_scan_tables(c);
3879 for(i = 0; i < s->avctx->thread_count; i++)
3880 if(context_init(h->thread_context[i]) < 0)
3883 s->avctx->width = s->width;
3884 s->avctx->height = s->height;
3885 s->avctx->sample_aspect_ratio= h->sps.sar;
3886 if(!s->avctx->sample_aspect_ratio.den)
3887 s->avctx->sample_aspect_ratio.den = 1;
3889 if(h->sps.timing_info_present_flag){
3890 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3891 if(h->x264_build > 0 && h->x264_build < 44)
3892 s->avctx->time_base.den *= 2;
3893 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3894 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3898 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3901 h->mb_aff_frame = 0;
3902 if(h->sps.frame_mbs_only_flag){
3903 s->picture_structure= PICT_FRAME;
3905 if(get_bits1(&s->gb)) { //field_pic_flag
3906 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3907 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
3909 s->picture_structure= PICT_FRAME;
3910 h->mb_aff_frame = h->sps.mb_aff;
3914 if(h0->current_slice == 0){
3915 if(frame_start(h) < 0)
3921 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3923 assert(s->mb_num == s->mb_width * s->mb_height);
3924 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3925 first_mb_in_slice >= s->mb_num){
3926 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3929 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3930 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3931 if (s->picture_structure == PICT_BOTTOM_FIELD)
3932 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3933 assert(s->mb_y < s->mb_height);
3935 if(s->picture_structure==PICT_FRAME){
3936 h->curr_pic_num= h->frame_num;
3937 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3939 h->curr_pic_num= 2*h->frame_num + 1;
3940 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3943 if(h->nal_unit_type == NAL_IDR_SLICE){
3944 get_ue_golomb(&s->gb); /* idr_pic_id */
3947 if(h->sps.poc_type==0){
3948 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3950 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3951 h->delta_poc_bottom= get_se_golomb(&s->gb);
3955 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3956 h->delta_poc[0]= get_se_golomb(&s->gb);
3958 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3959 h->delta_poc[1]= get_se_golomb(&s->gb);
3964 if(h->pps.redundant_pic_cnt_present){
3965 h->redundant_pic_count= get_ue_golomb(&s->gb);
3968 //set defaults, might be overriden a few line later
3969 h->ref_count[0]= h->pps.ref_count[0];
3970 h->ref_count[1]= h->pps.ref_count[1];
3972 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
3973 if(h->slice_type == B_TYPE){
3974 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3975 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
3976 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
3978 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3980 if(num_ref_idx_active_override_flag){
3981 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3982 if(h->slice_type==B_TYPE)
3983 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3985 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3986 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3987 h->ref_count[0]= h->ref_count[1]= 1;
3991 if(h->slice_type == B_TYPE)
3998 if(!default_ref_list_done){
3999 fill_default_ref_list(h);
4002 if(decode_ref_pic_list_reordering(h) < 0)
4005 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4006 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4007 pred_weight_table(h);
4008 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4009 implicit_weight_table(h);
4014 decode_ref_pic_marking(h0, &s->gb);
4017 fill_mbaff_ref_list(h);
4019 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4020 tmp = get_ue_golomb(&s->gb);
4022 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4025 h->cabac_init_idc= tmp;
4028 h->last_qscale_diff = 0;
4029 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4031 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4035 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4036 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4037 //FIXME qscale / qp ... stuff
4038 if(h->slice_type == SP_TYPE){
4039 get_bits1(&s->gb); /* sp_for_switch_flag */
4041 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4042 get_se_golomb(&s->gb); /* slice_qs_delta */
4045 h->deblocking_filter = 1;
4046 h->slice_alpha_c0_offset = 0;
4047 h->slice_beta_offset = 0;
4048 if( h->pps.deblocking_filter_parameters_present ) {
4049 tmp= get_ue_golomb(&s->gb);
4051 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4054 h->deblocking_filter= tmp;
4055 if(h->deblocking_filter < 2)
4056 h->deblocking_filter^= 1; // 1<->0
4058 if( h->deblocking_filter ) {
4059 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4060 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4064 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4065 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4066 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4067 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4068 h->deblocking_filter= 0;
4070 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4071 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4072 /* Cheat slightly for speed:
4073 Dont bother to deblock across slices */
4074 h->deblocking_filter = 2;
4076 h0->max_contexts = 1;
4077 if(!h0->single_decode_warning) {
4078 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4079 h0->single_decode_warning = 1;
4082 return 1; // deblocking switched inside frame
4087 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4088 slice_group_change_cycle= get_bits(&s->gb, ?);
4091 h0->last_slice_type = slice_type;
4092 h->slice_num = ++h0->current_slice;
4094 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4095 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4097 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4098 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4100 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4102 av_get_pict_type_char(h->slice_type),
4103 pps_id, h->frame_num,
4104 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4105 h->ref_count[0], h->ref_count[1],
4107 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4109 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4113 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
4114 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4115 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4117 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4118 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4127 static inline int get_level_prefix(GetBitContext *gb){
4131 OPEN_READER(re, gb);
4132 UPDATE_CACHE(re, gb);
4133 buf=GET_CACHE(re, gb);
4135 log= 32 - av_log2(buf);
4137 print_bin(buf>>(32-log), log);
4138 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4141 LAST_SKIP_BITS(re, gb, log);
4142 CLOSE_READER(re, gb);
4147 static inline int get_dct8x8_allowed(H264Context *h){
4150 if(!IS_SUB_8X8(h->sub_mb_type[i])
4151 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4158 * decodes a residual block.
4159 * @param n block index
4160 * @param scantable scantable
4161 * @param max_coeff number of coefficients in the block
4162 * @return <0 if an error occured
4164 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4165 MpegEncContext * const s = &h->s;
4166 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4168 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4170 //FIXME put trailing_onex into the context
4172 if(n == CHROMA_DC_BLOCK_INDEX){
4173 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4174 total_coeff= coeff_token>>2;
4176 if(n == LUMA_DC_BLOCK_INDEX){
4177 total_coeff= pred_non_zero_count(h, 0);
4178 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4179 total_coeff= coeff_token>>2;
4181 total_coeff= pred_non_zero_count(h, n);
4182 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4183 total_coeff= coeff_token>>2;
4184 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4188 //FIXME set last_non_zero?
4192 if(total_coeff > (unsigned)max_coeff) {
4193 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4197 trailing_ones= coeff_token&3;
4198 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4199 assert(total_coeff<=16);
4201 for(i=0; i<trailing_ones; i++){
4202 level[i]= 1 - 2*get_bits1(gb);
4206 int level_code, mask;
4207 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4208 int prefix= get_level_prefix(gb);
4210 //first coefficient has suffix_length equal to 0 or 1
4211 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4213 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4215 level_code= (prefix<<suffix_length); //part
4216 }else if(prefix==14){
4218 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4220 level_code= prefix + get_bits(gb, 4); //part
4221 }else if(prefix==15){
4222 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4223 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4225 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4229 if(trailing_ones < 3) level_code += 2;
4234 mask= -(level_code&1);
4235 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4238 //remaining coefficients have suffix_length > 0
4239 for(;i<total_coeff;i++) {
4240 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4241 prefix = get_level_prefix(gb);
4243 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4244 }else if(prefix==15){
4245 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4247 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4250 mask= -(level_code&1);
4251 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4252 if(level_code > suffix_limit[suffix_length])
4257 if(total_coeff == max_coeff)
4260 if(n == CHROMA_DC_BLOCK_INDEX)
4261 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4263 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4266 coeff_num = zeros_left + total_coeff - 1;
4267 j = scantable[coeff_num];
4269 block[j] = level[0];
4270 for(i=1;i<total_coeff;i++) {
4273 else if(zeros_left < 7){
4274 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4276 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4278 zeros_left -= run_before;
4279 coeff_num -= 1 + run_before;
4280 j= scantable[ coeff_num ];
4285 block[j] = (level[0] * qmul[j] + 32)>>6;
4286 for(i=1;i<total_coeff;i++) {
4289 else if(zeros_left < 7){
4290 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4292 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4294 zeros_left -= run_before;
4295 coeff_num -= 1 + run_before;
4296 j= scantable[ coeff_num ];
4298 block[j]= (level[i] * qmul[j] + 32)>>6;
4303 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4310 static void predict_field_decoding_flag(H264Context *h){
4311 MpegEncContext * const s = &h->s;
4312 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4313 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4314 ? s->current_picture.mb_type[mb_xy-1]
4315 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4316 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4318 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4322 * decodes a P_SKIP or B_SKIP macroblock
4324 static void decode_mb_skip(H264Context *h){
4325 MpegEncContext * const s = &h->s;
4326 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4329 memset(h->non_zero_count[mb_xy], 0, 16);
4330 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4333 mb_type|= MB_TYPE_INTERLACED;
4335 if( h->slice_type == B_TYPE )
4337 // just for fill_caches. pred_direct_motion will set the real mb_type
4338 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4340 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4341 pred_direct_motion(h, &mb_type);
4342 mb_type|= MB_TYPE_SKIP;
4347 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4349 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4350 pred_pskip_motion(h, &mx, &my);
4351 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4352 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4355 write_back_motion(h, mb_type);
4356 s->current_picture.mb_type[mb_xy]= mb_type;
4357 s->current_picture.qscale_table[mb_xy]= s->qscale;
4358 h->slice_table[ mb_xy ]= h->slice_num;
4359 h->prev_mb_skipped= 1;
4363 * decodes a macroblock
4364 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4366 static int decode_mb_cavlc(H264Context *h){
4367 MpegEncContext * const s = &h->s;
4368 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4369 int partition_count;
4370 unsigned int mb_type, cbp;
4371 int dct8x8_allowed= h->pps.transform_8x8_mode;
4373 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4375 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4376 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4378 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4379 if(s->mb_skip_run==-1)
4380 s->mb_skip_run= get_ue_golomb(&s->gb);
4382 if (s->mb_skip_run--) {
4383 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4384 if(s->mb_skip_run==0)
4385 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4387 predict_field_decoding_flag(h);
4394 if( (s->mb_y&1) == 0 )
4395 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4397 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4399 h->prev_mb_skipped= 0;
4401 mb_type= get_ue_golomb(&s->gb);
4402 if(h->slice_type == B_TYPE){
4404 partition_count= b_mb_type_info[mb_type].partition_count;
4405 mb_type= b_mb_type_info[mb_type].type;
4408 goto decode_intra_mb;
4410 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4412 partition_count= p_mb_type_info[mb_type].partition_count;
4413 mb_type= p_mb_type_info[mb_type].type;
4416 goto decode_intra_mb;
4419 assert(h->slice_type == I_TYPE);
4422 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4426 cbp= i_mb_type_info[mb_type].cbp;
4427 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4428 mb_type= i_mb_type_info[mb_type].type;
4432 mb_type |= MB_TYPE_INTERLACED;
4434 h->slice_table[ mb_xy ]= h->slice_num;
4436 if(IS_INTRA_PCM(mb_type)){
4439 // We assume these blocks are very rare so we do not optimize it.
4440 align_get_bits(&s->gb);
4442 // The pixels are stored in the same order as levels in h->mb array.
4443 for(y=0; y<16; y++){
4444 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4445 for(x=0; x<16; x++){
4446 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4447 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4451 const int index= 256 + 4*(y&3) + 32*(y>>2);
4453 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4454 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4458 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4460 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4461 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4465 // In deblocking, the quantizer is 0
4466 s->current_picture.qscale_table[mb_xy]= 0;
4467 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4468 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4469 // All coeffs are present
4470 memset(h->non_zero_count[mb_xy], 16, 16);
4472 s->current_picture.mb_type[mb_xy]= mb_type;
4477 h->ref_count[0] <<= 1;
4478 h->ref_count[1] <<= 1;
4481 fill_caches(h, mb_type, 0);
4484 if(IS_INTRA(mb_type)){
4486 // init_top_left_availability(h);
4487 if(IS_INTRA4x4(mb_type)){
4490 if(dct8x8_allowed && get_bits1(&s->gb)){
4491 mb_type |= MB_TYPE_8x8DCT;
4495 // fill_intra4x4_pred_table(h);
4496 for(i=0; i<16; i+=di){
4497 int mode= pred_intra_mode(h, i);
4499 if(!get_bits1(&s->gb)){
4500 const int rem_mode= get_bits(&s->gb, 3);
4501 mode = rem_mode + (rem_mode >= mode);
4505 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4507 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4509 write_back_intra_pred_mode(h);
4510 if( check_intra4x4_pred_mode(h) < 0)
4513 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4514 if(h->intra16x16_pred_mode < 0)
4518 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4521 h->chroma_pred_mode= pred_mode;
4522 }else if(partition_count==4){
4523 int i, j, sub_partition_count[4], list, ref[2][4];
4525 if(h->slice_type == B_TYPE){
4527 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4528 if(h->sub_mb_type[i] >=13){
4529 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4532 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4533 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4535 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4536 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4537 pred_direct_motion(h, &mb_type);
4538 h->ref_cache[0][scan8[4]] =
4539 h->ref_cache[1][scan8[4]] =
4540 h->ref_cache[0][scan8[12]] =
4541 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4544 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4546 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4547 if(h->sub_mb_type[i] >=4){
4548 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4551 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4552 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4556 for(list=0; list<h->list_count; list++){
4557 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4559 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4560 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4561 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4563 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4575 dct8x8_allowed = get_dct8x8_allowed(h);
4577 for(list=0; list<h->list_count; list++){
4579 if(IS_DIRECT(h->sub_mb_type[i])) {
4580 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4583 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4584 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4586 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4587 const int sub_mb_type= h->sub_mb_type[i];
4588 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4589 for(j=0; j<sub_partition_count[i]; j++){
4591 const int index= 4*i + block_width*j;
4592 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4593 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4594 mx += get_se_golomb(&s->gb);
4595 my += get_se_golomb(&s->gb);
4596 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4598 if(IS_SUB_8X8(sub_mb_type)){
4600 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4602 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4603 }else if(IS_SUB_8X4(sub_mb_type)){
4604 mv_cache[ 1 ][0]= mx;
4605 mv_cache[ 1 ][1]= my;
4606 }else if(IS_SUB_4X8(sub_mb_type)){
4607 mv_cache[ 8 ][0]= mx;
4608 mv_cache[ 8 ][1]= my;
4610 mv_cache[ 0 ][0]= mx;
4611 mv_cache[ 0 ][1]= my;
4614 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4620 }else if(IS_DIRECT(mb_type)){
4621 pred_direct_motion(h, &mb_type);
4622 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4624 int list, mx, my, i;
4625 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4626 if(IS_16X16(mb_type)){
4627 for(list=0; list<h->list_count; list++){
4629 if(IS_DIR(mb_type, 0, list)){
4630 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4631 if(val >= h->ref_count[list]){
4632 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4636 val= LIST_NOT_USED&0xFF;
4637 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4639 for(list=0; list<h->list_count; list++){
4641 if(IS_DIR(mb_type, 0, list)){
4642 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4643 mx += get_se_golomb(&s->gb);
4644 my += get_se_golomb(&s->gb);
4645 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4647 val= pack16to32(mx,my);
4650 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4653 else if(IS_16X8(mb_type)){
4654 for(list=0; list<h->list_count; list++){
4657 if(IS_DIR(mb_type, i, list)){
4658 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4659 if(val >= h->ref_count[list]){
4660 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4664 val= LIST_NOT_USED&0xFF;
4665 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4668 for(list=0; list<h->list_count; list++){
4671 if(IS_DIR(mb_type, i, list)){
4672 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4673 mx += get_se_golomb(&s->gb);
4674 my += get_se_golomb(&s->gb);
4675 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4677 val= pack16to32(mx,my);
4680 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4684 assert(IS_8X16(mb_type));
4685 for(list=0; list<h->list_count; list++){
4688 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4689 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4690 if(val >= h->ref_count[list]){
4691 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4695 val= LIST_NOT_USED&0xFF;
4696 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4699 for(list=0; list<h->list_count; list++){
4702 if(IS_DIR(mb_type, i, list)){
4703 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4704 mx += get_se_golomb(&s->gb);
4705 my += get_se_golomb(&s->gb);
4706 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4708 val= pack16to32(mx,my);
4711 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4717 if(IS_INTER(mb_type))
4718 write_back_motion(h, mb_type);
4720 if(!IS_INTRA16x16(mb_type)){
4721 cbp= get_ue_golomb(&s->gb);
4723 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4727 if(IS_INTRA4x4(mb_type))
4728 cbp= golomb_to_intra4x4_cbp[cbp];
4730 cbp= golomb_to_inter_cbp[cbp];
4734 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4735 if(get_bits1(&s->gb))
4736 mb_type |= MB_TYPE_8x8DCT;
4738 s->current_picture.mb_type[mb_xy]= mb_type;
4740 if(cbp || IS_INTRA16x16(mb_type)){
4741 int i8x8, i4x4, chroma_idx;
4743 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4744 const uint8_t *scan, *scan8x8, *dc_scan;
4746 // fill_non_zero_count_cache(h);
4748 if(IS_INTERLACED(mb_type)){
4749 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4750 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4751 dc_scan= luma_dc_field_scan;
4753 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4754 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4755 dc_scan= luma_dc_zigzag_scan;
4758 dquant= get_se_golomb(&s->gb);
4760 if( dquant > 25 || dquant < -26 ){
4761 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4765 s->qscale += dquant;
4766 if(((unsigned)s->qscale) > 51){
4767 if(s->qscale<0) s->qscale+= 52;
4768 else s->qscale-= 52;
4771 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4772 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4773 if(IS_INTRA16x16(mb_type)){
4774 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4775 return -1; //FIXME continue if partitioned and other return -1 too
4778 assert((cbp&15) == 0 || (cbp&15) == 15);
4781 for(i8x8=0; i8x8<4; i8x8++){
4782 for(i4x4=0; i4x4<4; i4x4++){
4783 const int index= i4x4 + 4*i8x8;
4784 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4790 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4793 for(i8x8=0; i8x8<4; i8x8++){
4794 if(cbp & (1<<i8x8)){
4795 if(IS_8x8DCT(mb_type)){
4796 DCTELEM *buf = &h->mb[64*i8x8];
4798 for(i4x4=0; i4x4<4; i4x4++){
4799 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4800 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4803 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4804 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4806 for(i4x4=0; i4x4<4; i4x4++){
4807 const int index= i4x4 + 4*i8x8;
4809 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4815 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4816 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4822 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4823 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4829 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4830 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4831 for(i4x4=0; i4x4<4; i4x4++){
4832 const int index= 16 + 4*chroma_idx + i4x4;
4833 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4839 uint8_t * const nnz= &h->non_zero_count_cache[0];
4840 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4841 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4844 uint8_t * const nnz= &h->non_zero_count_cache[0];
4845 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4846 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4847 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4849 s->current_picture.qscale_table[mb_xy]= s->qscale;
4850 write_back_non_zero_count(h);
4853 h->ref_count[0] >>= 1;
4854 h->ref_count[1] >>= 1;
4860 static int decode_cabac_field_decoding_flag(H264Context *h) {
4861 MpegEncContext * const s = &h->s;
4862 const int mb_x = s->mb_x;
4863 const int mb_y = s->mb_y & ~1;
4864 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4865 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4867 unsigned int ctx = 0;
4869 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4872 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4876 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4879 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4880 uint8_t *state= &h->cabac_state[ctx_base];
4884 MpegEncContext * const s = &h->s;
4885 const int mba_xy = h->left_mb_xy[0];
4886 const int mbb_xy = h->top_mb_xy;
4888 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4890 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4892 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4893 return 0; /* I4x4 */
4896 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4897 return 0; /* I4x4 */
4900 if( get_cabac_terminate( &h->cabac ) )
4901 return 25; /* PCM */
4903 mb_type = 1; /* I16x16 */
4904 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4905 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4906 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4907 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4908 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4912 static int decode_cabac_mb_type( H264Context *h ) {
4913 MpegEncContext * const s = &h->s;
4915 if( h->slice_type == I_TYPE ) {
4916 return decode_cabac_intra_mb_type(h, 3, 1);
4917 } else if( h->slice_type == P_TYPE ) {
4918 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4920 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4921 /* P_L0_D16x16, P_8x8 */
4922 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4924 /* P_L0_D8x16, P_L0_D16x8 */
4925 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4928 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4930 } else if( h->slice_type == B_TYPE ) {
4931 const int mba_xy = h->left_mb_xy[0];
4932 const int mbb_xy = h->top_mb_xy;
4936 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4938 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4941 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4942 return 0; /* B_Direct_16x16 */
4944 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4945 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4948 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4949 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4950 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4951 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4953 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4954 else if( bits == 13 ) {
4955 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4956 } else if( bits == 14 )
4957 return 11; /* B_L1_L0_8x16 */
4958 else if( bits == 15 )
4959 return 22; /* B_8x8 */
4961 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4962 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4964 /* TODO SI/SP frames? */
4969 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4970 MpegEncContext * const s = &h->s;
4974 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4975 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4978 && h->slice_table[mba_xy] == h->slice_num
4979 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4980 mba_xy += s->mb_stride;
4982 mbb_xy = mb_xy - s->mb_stride;
4984 && h->slice_table[mbb_xy] == h->slice_num
4985 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4986 mbb_xy -= s->mb_stride;
4988 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4990 int mb_xy = mb_x + mb_y*s->mb_stride;
4992 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4995 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4997 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5000 if( h->slice_type == B_TYPE )
5002 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5005 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5008 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5011 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5012 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5013 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5015 if( mode >= pred_mode )
5021 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5022 const int mba_xy = h->left_mb_xy[0];
5023 const int mbb_xy = h->top_mb_xy;
5027 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5028 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5031 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5034 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5037 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5039 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5045 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5046 int cbp_b, cbp_a, ctx, cbp = 0;
5048 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5049 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5051 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5052 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5053 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5054 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5055 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5056 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5057 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5058 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5061 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5065 cbp_a = (h->left_cbp>>4)&0x03;
5066 cbp_b = (h-> top_cbp>>4)&0x03;
5069 if( cbp_a > 0 ) ctx++;
5070 if( cbp_b > 0 ) ctx += 2;
5071 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5075 if( cbp_a == 2 ) ctx++;
5076 if( cbp_b == 2 ) ctx += 2;
5077 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5079 static int decode_cabac_mb_dqp( H264Context *h) {
5083 if( h->last_qscale_diff != 0 )
5086 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5092 if(val > 102) //prevent infinite loop
5099 return -(val + 1)/2;
5101 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5102 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5104 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5106 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5110 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5112 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5113 return 0; /* B_Direct_8x8 */
5114 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5115 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5117 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5118 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5119 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5122 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5123 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5127 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5128 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5131 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5132 int refa = h->ref_cache[list][scan8[n] - 1];
5133 int refb = h->ref_cache[list][scan8[n] - 8];
5137 if( h->slice_type == B_TYPE) {
5138 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5140 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5149 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5155 if(ref >= 32 /*h->ref_list[list]*/){
5156 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5157 return 0; //FIXME we should return -1 and check the return everywhere
5163 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5164 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5165 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5166 int ctxbase = (l == 0) ? 40 : 47;
5171 else if( amvd > 32 )
5176 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5181 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5189 while( get_cabac_bypass( &h->cabac ) ) {
5193 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5198 if( get_cabac_bypass( &h->cabac ) )
5202 return get_cabac_bypass_sign( &h->cabac, -mvd );
5205 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5210 nza = h->left_cbp&0x100;
5211 nzb = h-> top_cbp&0x100;
5212 } else if( cat == 1 || cat == 2 ) {
5213 nza = h->non_zero_count_cache[scan8[idx] - 1];
5214 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5215 } else if( cat == 3 ) {
5216 nza = (h->left_cbp>>(6+idx))&0x01;
5217 nzb = (h-> top_cbp>>(6+idx))&0x01;
5220 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5221 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5230 return ctx + 4 * cat;
5233 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5234 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5235 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5236 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5237 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5240 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5241 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5242 static const int significant_coeff_flag_offset[2][6] = {
5243 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5244 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5246 static const int last_coeff_flag_offset[2][6] = {
5247 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5248 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5250 static const int coeff_abs_level_m1_offset[6] = {
5251 227+0, 227+10, 227+20, 227+30, 227+39, 426
5253 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5254 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5255 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5256 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5257 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5258 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5259 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5260 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5261 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5267 int coeff_count = 0;
5270 int abslevelgt1 = 0;
5272 uint8_t *significant_coeff_ctx_base;
5273 uint8_t *last_coeff_ctx_base;
5274 uint8_t *abs_level_m1_ctx_base;
5277 #define CABAC_ON_STACK
5279 #ifdef CABAC_ON_STACK
5282 cc.range = h->cabac.range;
5283 cc.low = h->cabac.low;
5284 cc.bytestream= h->cabac.bytestream;
5286 #define CC &h->cabac
5290 /* cat: 0-> DC 16x16 n = 0
5291 * 1-> AC 16x16 n = luma4x4idx
5292 * 2-> Luma4x4 n = luma4x4idx
5293 * 3-> DC Chroma n = iCbCr
5294 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5295 * 5-> Luma8x8 n = 4 * luma8x8idx
5298 /* read coded block flag */
5300 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5301 if( cat == 1 || cat == 2 )
5302 h->non_zero_count_cache[scan8[n]] = 0;
5304 h->non_zero_count_cache[scan8[16+n]] = 0;
5305 #ifdef CABAC_ON_STACK
5306 h->cabac.range = cc.range ;
5307 h->cabac.low = cc.low ;
5308 h->cabac.bytestream= cc.bytestream;
5314 significant_coeff_ctx_base = h->cabac_state
5315 + significant_coeff_flag_offset[MB_FIELD][cat];
5316 last_coeff_ctx_base = h->cabac_state
5317 + last_coeff_flag_offset[MB_FIELD][cat];
5318 abs_level_m1_ctx_base = h->cabac_state
5319 + coeff_abs_level_m1_offset[cat];
5322 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5323 for(last= 0; last < coefs; last++) { \
5324 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5325 if( get_cabac( CC, sig_ctx )) { \
5326 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5327 index[coeff_count++] = last; \
5328 if( get_cabac( CC, last_ctx ) ) { \
5334 if( last == max_coeff -1 ) {\
5335 index[coeff_count++] = last;\
5337 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5338 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5339 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5341 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5343 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5345 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5348 assert(coeff_count > 0);
5351 h->cbp_table[mb_xy] |= 0x100;
5352 else if( cat == 1 || cat == 2 )
5353 h->non_zero_count_cache[scan8[n]] = coeff_count;
5355 h->cbp_table[mb_xy] |= 0x40 << n;
5357 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5360 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5363 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5364 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5365 int j= scantable[index[coeff_count]];
5367 if( get_cabac( CC, ctx ) == 0 ) {
5369 block[j] = get_cabac_bypass_sign( CC, -1);
5371 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5377 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5378 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5382 if( coeff_abs >= 15 ) {
5384 while( get_cabac_bypass( CC ) ) {
5390 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5396 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5397 else block[j] = coeff_abs;
5399 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5400 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5406 #ifdef CABAC_ON_STACK
5407 h->cabac.range = cc.range ;
5408 h->cabac.low = cc.low ;
5409 h->cabac.bytestream= cc.bytestream;
5414 static inline void compute_mb_neighbors(H264Context *h)
5416 MpegEncContext * const s = &h->s;
5417 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5418 h->top_mb_xy = mb_xy - s->mb_stride;
5419 h->left_mb_xy[0] = mb_xy - 1;
5421 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5422 const int top_pair_xy = pair_xy - s->mb_stride;
5423 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5424 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5425 const int curr_mb_frame_flag = !MB_FIELD;
5426 const int bottom = (s->mb_y & 1);
5428 ? !curr_mb_frame_flag // bottom macroblock
5429 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5431 h->top_mb_xy -= s->mb_stride;
5433 if (left_mb_frame_flag != curr_mb_frame_flag) {
5434 h->left_mb_xy[0] = pair_xy - 1;
5436 } else if (FIELD_PICTURE) {
5437 h->top_mb_xy -= s->mb_stride;
5443 * decodes a macroblock
5444 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5446 static int decode_mb_cabac(H264Context *h) {
5447 MpegEncContext * const s = &h->s;
5448 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5449 int mb_type, partition_count, cbp = 0;
5450 int dct8x8_allowed= h->pps.transform_8x8_mode;
5452 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5454 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5455 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5457 /* a skipped mb needs the aff flag from the following mb */
5458 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5459 predict_field_decoding_flag(h);
5460 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5461 skip = h->next_mb_skipped;
5463 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5464 /* read skip flags */
5466 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5467 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5468 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5469 if(h->next_mb_skipped)
5470 predict_field_decoding_flag(h);
5472 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5477 h->cbp_table[mb_xy] = 0;
5478 h->chroma_pred_mode_table[mb_xy] = 0;
5479 h->last_qscale_diff = 0;
5486 if( (s->mb_y&1) == 0 )
5488 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5490 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5492 h->prev_mb_skipped = 0;
5494 compute_mb_neighbors(h);
5495 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5496 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5500 if( h->slice_type == B_TYPE ) {
5502 partition_count= b_mb_type_info[mb_type].partition_count;
5503 mb_type= b_mb_type_info[mb_type].type;
5506 goto decode_intra_mb;
5508 } else if( h->slice_type == P_TYPE ) {
5510 partition_count= p_mb_type_info[mb_type].partition_count;
5511 mb_type= p_mb_type_info[mb_type].type;
5514 goto decode_intra_mb;
5517 assert(h->slice_type == I_TYPE);
5519 partition_count = 0;
5520 cbp= i_mb_type_info[mb_type].cbp;
5521 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5522 mb_type= i_mb_type_info[mb_type].type;
5525 mb_type |= MB_TYPE_INTERLACED;
5527 h->slice_table[ mb_xy ]= h->slice_num;
5529 if(IS_INTRA_PCM(mb_type)) {
5533 // We assume these blocks are very rare so we do not optimize it.
5534 // FIXME The two following lines get the bitstream position in the cabac
5535 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5536 ptr= h->cabac.bytestream;
5537 if(h->cabac.low&0x1) ptr--;
5539 if(h->cabac.low&0x1FF) ptr--;
5542 // The pixels are stored in the same order as levels in h->mb array.
5543 for(y=0; y<16; y++){
5544 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5545 for(x=0; x<16; x++){
5546 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5547 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5551 const int index= 256 + 4*(y&3) + 32*(y>>2);
5553 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5554 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5558 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5560 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5561 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5565 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5567 // All blocks are present
5568 h->cbp_table[mb_xy] = 0x1ef;
5569 h->chroma_pred_mode_table[mb_xy] = 0;
5570 // In deblocking, the quantizer is 0
5571 s->current_picture.qscale_table[mb_xy]= 0;
5572 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5573 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5574 // All coeffs are present
5575 memset(h->non_zero_count[mb_xy], 16, 16);
5576 s->current_picture.mb_type[mb_xy]= mb_type;
5581 h->ref_count[0] <<= 1;
5582 h->ref_count[1] <<= 1;
5585 fill_caches(h, mb_type, 0);
5587 if( IS_INTRA( mb_type ) ) {
5589 if( IS_INTRA4x4( mb_type ) ) {
5590 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5591 mb_type |= MB_TYPE_8x8DCT;
5592 for( i = 0; i < 16; i+=4 ) {
5593 int pred = pred_intra_mode( h, i );
5594 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5595 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5598 for( i = 0; i < 16; i++ ) {
5599 int pred = pred_intra_mode( h, i );
5600 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5602 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5605 write_back_intra_pred_mode(h);
5606 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5608 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5609 if( h->intra16x16_pred_mode < 0 ) return -1;
5611 h->chroma_pred_mode_table[mb_xy] =
5612 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5614 pred_mode= check_intra_pred_mode( h, pred_mode );
5615 if( pred_mode < 0 ) return -1;
5616 h->chroma_pred_mode= pred_mode;
5617 } else if( partition_count == 4 ) {
5618 int i, j, sub_partition_count[4], list, ref[2][4];
5620 if( h->slice_type == B_TYPE ) {
5621 for( i = 0; i < 4; i++ ) {
5622 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5623 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5624 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5626 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5627 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5628 pred_direct_motion(h, &mb_type);
5629 h->ref_cache[0][scan8[4]] =
5630 h->ref_cache[1][scan8[4]] =
5631 h->ref_cache[0][scan8[12]] =
5632 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5633 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5634 for( i = 0; i < 4; i++ )
5635 if( IS_DIRECT(h->sub_mb_type[i]) )
5636 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5640 for( i = 0; i < 4; i++ ) {
5641 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5642 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5643 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5647 for( list = 0; list < h->list_count; list++ ) {
5648 for( i = 0; i < 4; i++ ) {
5649 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5650 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5651 if( h->ref_count[list] > 1 )
5652 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5658 h->ref_cache[list][ scan8[4*i]+1 ]=
5659 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5664 dct8x8_allowed = get_dct8x8_allowed(h);
5666 for(list=0; list<h->list_count; list++){
5668 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5669 if(IS_DIRECT(h->sub_mb_type[i])){
5670 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5674 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5675 const int sub_mb_type= h->sub_mb_type[i];
5676 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5677 for(j=0; j<sub_partition_count[i]; j++){
5680 const int index= 4*i + block_width*j;
5681 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5682 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5683 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5685 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5686 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5687 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5689 if(IS_SUB_8X8(sub_mb_type)){
5691 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5693 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5696 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5698 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5699 }else if(IS_SUB_8X4(sub_mb_type)){
5700 mv_cache[ 1 ][0]= mx;
5701 mv_cache[ 1 ][1]= my;
5703 mvd_cache[ 1 ][0]= mx - mpx;
5704 mvd_cache[ 1 ][1]= my - mpy;
5705 }else if(IS_SUB_4X8(sub_mb_type)){
5706 mv_cache[ 8 ][0]= mx;
5707 mv_cache[ 8 ][1]= my;
5709 mvd_cache[ 8 ][0]= mx - mpx;
5710 mvd_cache[ 8 ][1]= my - mpy;
5712 mv_cache[ 0 ][0]= mx;
5713 mv_cache[ 0 ][1]= my;
5715 mvd_cache[ 0 ][0]= mx - mpx;
5716 mvd_cache[ 0 ][1]= my - mpy;
5719 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5720 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5721 p[0] = p[1] = p[8] = p[9] = 0;
5722 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5726 } else if( IS_DIRECT(mb_type) ) {
5727 pred_direct_motion(h, &mb_type);
5728 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5729 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5730 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5732 int list, mx, my, i, mpx, mpy;
5733 if(IS_16X16(mb_type)){
5734 for(list=0; list<h->list_count; list++){
5735 if(IS_DIR(mb_type, 0, list)){
5736 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5737 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5739 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5741 for(list=0; list<h->list_count; list++){
5742 if(IS_DIR(mb_type, 0, list)){
5743 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5745 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5746 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5747 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5749 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5750 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5752 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5755 else if(IS_16X8(mb_type)){
5756 for(list=0; list<h->list_count; list++){
5758 if(IS_DIR(mb_type, i, list)){
5759 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5760 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5762 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5765 for(list=0; list<h->list_count; list++){
5767 if(IS_DIR(mb_type, i, list)){
5768 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5769 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5770 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5771 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5773 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5774 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5776 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5777 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5782 assert(IS_8X16(mb_type));
5783 for(list=0; list<h->list_count; list++){
5785 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5786 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5787 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5789 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5792 for(list=0; list<h->list_count; list++){
5794 if(IS_DIR(mb_type, i, list)){
5795 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5796 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5797 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5799 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5800 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5801 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5803 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5804 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5811 if( IS_INTER( mb_type ) ) {
5812 h->chroma_pred_mode_table[mb_xy] = 0;
5813 write_back_motion( h, mb_type );
5816 if( !IS_INTRA16x16( mb_type ) ) {
5817 cbp = decode_cabac_mb_cbp_luma( h );
5818 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5821 h->cbp_table[mb_xy] = h->cbp = cbp;
5823 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5824 if( decode_cabac_mb_transform_size( h ) )
5825 mb_type |= MB_TYPE_8x8DCT;
5827 s->current_picture.mb_type[mb_xy]= mb_type;
5829 if( cbp || IS_INTRA16x16( mb_type ) ) {
5830 const uint8_t *scan, *scan8x8, *dc_scan;
5831 const uint32_t *qmul;
5834 if(IS_INTERLACED(mb_type)){
5835 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5836 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5837 dc_scan= luma_dc_field_scan;
5839 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5840 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5841 dc_scan= luma_dc_zigzag_scan;
5844 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5845 if( dqp == INT_MIN ){
5846 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5850 if(((unsigned)s->qscale) > 51){
5851 if(s->qscale<0) s->qscale+= 52;
5852 else s->qscale-= 52;
5854 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5855 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5857 if( IS_INTRA16x16( mb_type ) ) {
5859 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5860 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5863 qmul = h->dequant4_coeff[0][s->qscale];
5864 for( i = 0; i < 16; i++ ) {
5865 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5866 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5869 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5873 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5874 if( cbp & (1<<i8x8) ) {
5875 if( IS_8x8DCT(mb_type) ) {
5876 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5877 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5879 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5880 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5881 const int index = 4*i8x8 + i4x4;
5882 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5884 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5885 //STOP_TIMER("decode_residual")
5889 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5890 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5897 for( c = 0; c < 2; c++ ) {
5898 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5899 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5905 for( c = 0; c < 2; c++ ) {
5906 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5907 for( i = 0; i < 4; i++ ) {
5908 const int index = 16 + 4 * c + i;
5909 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5910 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5914 uint8_t * const nnz= &h->non_zero_count_cache[0];
5915 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5916 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5919 uint8_t * const nnz= &h->non_zero_count_cache[0];
5920 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5921 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5922 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5923 h->last_qscale_diff = 0;
5926 s->current_picture.qscale_table[mb_xy]= s->qscale;
5927 write_back_non_zero_count(h);
5930 h->ref_count[0] >>= 1;
5931 h->ref_count[1] >>= 1;
5938 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5940 const int index_a = qp + h->slice_alpha_c0_offset;
5941 const int alpha = (alpha_table+52)[index_a];
5942 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5947 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5948 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5950 /* 16px edge length, because bS=4 is triggered by being at
5951 * the edge of an intra MB, so all 4 bS are the same */
5952 for( d = 0; d < 16; d++ ) {
5953 const int p0 = pix[-1];
5954 const int p1 = pix[-2];
5955 const int p2 = pix[-3];
5957 const int q0 = pix[0];
5958 const int q1 = pix[1];
5959 const int q2 = pix[2];
5961 if( FFABS( p0 - q0 ) < alpha &&
5962 FFABS( p1 - p0 ) < beta &&
5963 FFABS( q1 - q0 ) < beta ) {
5965 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5966 if( FFABS( p2 - p0 ) < beta)
5968 const int p3 = pix[-4];
5970 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5971 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5972 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5975 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5977 if( FFABS( q2 - q0 ) < beta)
5979 const int q3 = pix[3];
5981 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5982 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5983 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5986 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5990 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5991 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5993 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5999 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6001 const int index_a = qp + h->slice_alpha_c0_offset;
6002 const int alpha = (alpha_table+52)[index_a];
6003 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6008 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6009 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6011 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6015 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6017 for( i = 0; i < 16; i++, pix += stride) {
6023 int bS_index = (i >> 1);
6026 bS_index |= (i & 1);
6029 if( bS[bS_index] == 0 ) {
6033 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6034 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6035 alpha = (alpha_table+52)[index_a];
6036 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6038 if( bS[bS_index] < 4 ) {
6039 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6040 const int p0 = pix[-1];
6041 const int p1 = pix[-2];
6042 const int p2 = pix[-3];
6043 const int q0 = pix[0];
6044 const int q1 = pix[1];
6045 const int q2 = pix[2];
6047 if( FFABS( p0 - q0 ) < alpha &&
6048 FFABS( p1 - p0 ) < beta &&
6049 FFABS( q1 - q0 ) < beta ) {
6053 if( FFABS( p2 - p0 ) < beta ) {
6054 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6057 if( FFABS( q2 - q0 ) < beta ) {
6058 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6062 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6063 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6064 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6065 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6068 const int p0 = pix[-1];
6069 const int p1 = pix[-2];
6070 const int p2 = pix[-3];
6072 const int q0 = pix[0];
6073 const int q1 = pix[1];
6074 const int q2 = pix[2];
6076 if( FFABS( p0 - q0 ) < alpha &&
6077 FFABS( p1 - p0 ) < beta &&
6078 FFABS( q1 - q0 ) < beta ) {
6080 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6081 if( FFABS( p2 - p0 ) < beta)
6083 const int p3 = pix[-4];
6085 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6086 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6087 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6090 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6092 if( FFABS( q2 - q0 ) < beta)
6094 const int q3 = pix[3];
6096 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6097 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6098 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6101 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6105 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6106 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6108 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6113 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6115 for( i = 0; i < 8; i++, pix += stride) {
6123 if( bS[bS_index] == 0 ) {
6127 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6128 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6129 alpha = (alpha_table+52)[index_a];
6130 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6132 if( bS[bS_index] < 4 ) {
6133 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6134 const int p0 = pix[-1];
6135 const int p1 = pix[-2];
6136 const int q0 = pix[0];
6137 const int q1 = pix[1];
6139 if( FFABS( p0 - q0 ) < alpha &&
6140 FFABS( p1 - p0 ) < beta &&
6141 FFABS( q1 - q0 ) < beta ) {
6142 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6144 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6145 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6146 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6149 const int p0 = pix[-1];
6150 const int p1 = pix[-2];
6151 const int q0 = pix[0];
6152 const int q1 = pix[1];
6154 if( FFABS( p0 - q0 ) < alpha &&
6155 FFABS( p1 - p0 ) < beta &&
6156 FFABS( q1 - q0 ) < beta ) {
6158 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6159 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6160 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6166 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6168 const int index_a = qp + h->slice_alpha_c0_offset;
6169 const int alpha = (alpha_table+52)[index_a];
6170 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6171 const int pix_next = stride;
6176 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6177 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6179 /* 16px edge length, see filter_mb_edgev */
6180 for( d = 0; d < 16; d++ ) {
6181 const int p0 = pix[-1*pix_next];
6182 const int p1 = pix[-2*pix_next];
6183 const int p2 = pix[-3*pix_next];
6184 const int q0 = pix[0];
6185 const int q1 = pix[1*pix_next];
6186 const int q2 = pix[2*pix_next];
6188 if( FFABS( p0 - q0 ) < alpha &&
6189 FFABS( p1 - p0 ) < beta &&
6190 FFABS( q1 - q0 ) < beta ) {
6192 const int p3 = pix[-4*pix_next];
6193 const int q3 = pix[ 3*pix_next];
6195 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6196 if( FFABS( p2 - p0 ) < beta) {
6198 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6199 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6200 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6203 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6205 if( FFABS( q2 - q0 ) < beta) {
6207 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6208 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6209 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6212 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6216 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6217 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6219 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6226 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6228 const int index_a = qp + h->slice_alpha_c0_offset;
6229 const int alpha = (alpha_table+52)[index_a];
6230 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6235 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6236 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6238 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6242 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6243 MpegEncContext * const s = &h->s;
6245 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6247 mb_xy = mb_x + mb_y*s->mb_stride;
6249 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6250 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6251 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6252 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6255 assert(!FRAME_MBAFF);
6257 mb_type = s->current_picture.mb_type[mb_xy];
6258 qp = s->current_picture.qscale_table[mb_xy];
6259 qp0 = s->current_picture.qscale_table[mb_xy-1];
6260 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6261 qpc = get_chroma_qp( h, 0, qp );
6262 qpc0 = get_chroma_qp( h, 0, qp0 );
6263 qpc1 = get_chroma_qp( h, 0, qp1 );
6264 qp0 = (qp + qp0 + 1) >> 1;
6265 qp1 = (qp + qp1 + 1) >> 1;
6266 qpc0 = (qpc + qpc0 + 1) >> 1;
6267 qpc1 = (qpc + qpc1 + 1) >> 1;
6268 qp_thresh = 15 - h->slice_alpha_c0_offset;
6269 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6270 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6273 if( IS_INTRA(mb_type) ) {
6274 int16_t bS4[4] = {4,4,4,4};
6275 int16_t bS3[4] = {3,3,3,3};
6276 if( IS_8x8DCT(mb_type) ) {
6277 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6278 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6279 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6280 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6282 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6283 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6284 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6285 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6286 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6287 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6288 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6289 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6291 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6292 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6293 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6294 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6295 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6296 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6297 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6298 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6301 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6302 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6304 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6306 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6308 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6309 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6310 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6311 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6313 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6314 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6315 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6316 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6318 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6319 bSv[0][0] = 0x0004000400040004ULL;
6320 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6321 bSv[1][0] = 0x0004000400040004ULL;
6323 #define FILTER(hv,dir,edge)\
6324 if(bSv[dir][edge]) {\
6325 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6327 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6328 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6334 } else if( IS_8x8DCT(mb_type) ) {
6353 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6354 MpegEncContext * const s = &h->s;
6355 const int mb_xy= mb_x + mb_y*s->mb_stride;
6356 const int mb_type = s->current_picture.mb_type[mb_xy];
6357 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6358 int first_vertical_edge_done = 0;
6360 /* FIXME: A given frame may occupy more than one position in
6361 * the reference list. So ref2frm should be populated with
6362 * frame numbers, not indices. */
6363 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6364 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6366 //for sufficiently low qp, filtering wouldn't do anything
6367 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6369 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6370 int qp = s->current_picture.qscale_table[mb_xy];
6372 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6373 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6379 // left mb is in picture
6380 && h->slice_table[mb_xy-1] != 255
6381 // and current and left pair do not have the same interlaced type
6382 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6383 // and left mb is in the same slice if deblocking_filter == 2
6384 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6385 /* First vertical edge is different in MBAFF frames
6386 * There are 8 different bS to compute and 2 different Qp
6388 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6389 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6394 int mb_qp, mbn0_qp, mbn1_qp;
6396 first_vertical_edge_done = 1;
6398 if( IS_INTRA(mb_type) )
6399 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6401 for( i = 0; i < 8; i++ ) {
6402 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6404 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6406 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6407 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6408 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6415 mb_qp = s->current_picture.qscale_table[mb_xy];
6416 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6417 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6418 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6419 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6420 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6421 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6422 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6423 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6424 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6425 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6426 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6427 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6430 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6431 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6432 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6433 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6434 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6436 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6437 for( dir = 0; dir < 2; dir++ )
6440 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6441 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6442 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6444 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6445 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6446 // how often to recheck mv-based bS when iterating between edges
6447 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6448 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6449 // how often to recheck mv-based bS when iterating along each edge
6450 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6452 if (first_vertical_edge_done) {
6454 first_vertical_edge_done = 0;
6457 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6460 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6461 && !IS_INTERLACED(mb_type)
6462 && IS_INTERLACED(mbm_type)
6464 // This is a special case in the norm where the filtering must
6465 // be done twice (one each of the field) even if we are in a
6466 // frame macroblock.
6468 static const int nnz_idx[4] = {4,5,6,3};
6469 unsigned int tmp_linesize = 2 * linesize;
6470 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6471 int mbn_xy = mb_xy - 2 * s->mb_stride;
6476 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6477 if( IS_INTRA(mb_type) ||
6478 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6479 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6481 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6482 for( i = 0; i < 4; i++ ) {
6483 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6484 mbn_nnz[nnz_idx[i]] != 0 )
6490 // Do not use s->qscale as luma quantizer because it has not the same
6491 // value in IPCM macroblocks.
6492 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6493 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6494 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6495 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6496 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6497 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6498 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6499 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6506 for( edge = start; edge < edges; edge++ ) {
6507 /* mbn_xy: neighbor macroblock */
6508 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6509 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6513 if( (edge&1) && IS_8x8DCT(mb_type) )
6516 if( IS_INTRA(mb_type) ||
6517 IS_INTRA(mbn_type) ) {
6520 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6521 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6530 bS[0] = bS[1] = bS[2] = bS[3] = value;
6535 if( edge & mask_edge ) {
6536 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6539 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6540 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6543 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6544 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6545 int bn_idx= b_idx - (dir ? 8:1);
6547 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6548 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6549 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6550 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6552 bS[0] = bS[1] = bS[2] = bS[3] = v;
6558 for( i = 0; i < 4; i++ ) {
6559 int x = dir == 0 ? edge : i;
6560 int y = dir == 0 ? i : edge;
6561 int b_idx= 8 + 4 + x + 8*y;
6562 int bn_idx= b_idx - (dir ? 8:1);
6564 if( h->non_zero_count_cache[b_idx] != 0 ||
6565 h->non_zero_count_cache[bn_idx] != 0 ) {
6571 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6572 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6573 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6574 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6582 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6587 // Do not use s->qscale as luma quantizer because it has not the same
6588 // value in IPCM macroblocks.
6589 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6590 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6591 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6592 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6594 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6595 if( (edge&1) == 0 ) {
6596 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6597 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6598 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6599 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6602 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6603 if( (edge&1) == 0 ) {
6604 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6605 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6606 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6607 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6614 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6615 MpegEncContext * const s = &h->s;
6616 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6620 if( h->pps.cabac ) {
6624 align_get_bits( &s->gb );
6627 ff_init_cabac_states( &h->cabac);
6628 ff_init_cabac_decoder( &h->cabac,
6629 s->gb.buffer + get_bits_count(&s->gb)/8,
6630 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6631 /* calculate pre-state */
6632 for( i= 0; i < 460; i++ ) {
6634 if( h->slice_type == I_TYPE )
6635 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6637 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6640 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6642 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6647 int ret = decode_mb_cabac(h);
6649 //STOP_TIMER("decode_mb_cabac")
6651 if(ret>=0) hl_decode_mb(h);
6653 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6656 if(ret>=0) ret = decode_mb_cabac(h);
6658 if(ret>=0) hl_decode_mb(h);
6661 eos = get_cabac_terminate( &h->cabac );
6663 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6664 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6665 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6669 if( ++s->mb_x >= s->mb_width ) {
6671 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6673 if(FIELD_OR_MBAFF_PICTURE) {
6678 if( eos || s->mb_y >= s->mb_height ) {
6679 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6680 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6687 int ret = decode_mb_cavlc(h);
6689 if(ret>=0) hl_decode_mb(h);
6691 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6693 ret = decode_mb_cavlc(h);
6695 if(ret>=0) hl_decode_mb(h);
6700 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6701 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6706 if(++s->mb_x >= s->mb_width){
6708 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6710 if(FIELD_OR_MBAFF_PICTURE) {
6713 if(s->mb_y >= s->mb_height){
6714 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6716 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6717 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6721 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6728 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6729 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6730 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6731 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6735 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6744 for(;s->mb_y < s->mb_height; s->mb_y++){
6745 for(;s->mb_x < s->mb_width; s->mb_x++){
6746 int ret= decode_mb(h);
6751 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6752 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6757 if(++s->mb_x >= s->mb_width){
6759 if(++s->mb_y >= s->mb_height){
6760 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6761 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6765 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6772 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6773 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6774 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6778 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6785 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6788 return -1; //not reached
6791 static int decode_unregistered_user_data(H264Context *h, int size){
6792 MpegEncContext * const s = &h->s;
6793 uint8_t user_data[16+256];
6799 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6800 user_data[i]= get_bits(&s->gb, 8);
6804 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6805 if(e==1 && build>=0)
6806 h->x264_build= build;
6808 if(s->avctx->debug & FF_DEBUG_BUGS)
6809 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6812 skip_bits(&s->gb, 8);
6817 static int decode_sei(H264Context *h){
6818 MpegEncContext * const s = &h->s;
6820 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6825 type+= show_bits(&s->gb, 8);
6826 }while(get_bits(&s->gb, 8) == 255);
6830 size+= show_bits(&s->gb, 8);
6831 }while(get_bits(&s->gb, 8) == 255);
6835 if(decode_unregistered_user_data(h, size) < 0)
6839 skip_bits(&s->gb, 8*size);
6842 //FIXME check bits here
6843 align_get_bits(&s->gb);
6849 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6850 MpegEncContext * const s = &h->s;
6852 cpb_count = get_ue_golomb(&s->gb) + 1;
6853 get_bits(&s->gb, 4); /* bit_rate_scale */
6854 get_bits(&s->gb, 4); /* cpb_size_scale */
6855 for(i=0; i<cpb_count; i++){
6856 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6857 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6858 get_bits1(&s->gb); /* cbr_flag */
6860 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6861 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6862 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6863 get_bits(&s->gb, 5); /* time_offset_length */
6866 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6867 MpegEncContext * const s = &h->s;
6868 int aspect_ratio_info_present_flag;
6869 unsigned int aspect_ratio_idc;
6870 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6872 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6874 if( aspect_ratio_info_present_flag ) {
6875 aspect_ratio_idc= get_bits(&s->gb, 8);
6876 if( aspect_ratio_idc == EXTENDED_SAR ) {
6877 sps->sar.num= get_bits(&s->gb, 16);
6878 sps->sar.den= get_bits(&s->gb, 16);
6879 }else if(aspect_ratio_idc < 14){
6880 sps->sar= pixel_aspect[aspect_ratio_idc];
6882 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6889 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6891 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6892 get_bits1(&s->gb); /* overscan_appropriate_flag */
6895 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6896 get_bits(&s->gb, 3); /* video_format */
6897 get_bits1(&s->gb); /* video_full_range_flag */
6898 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6899 get_bits(&s->gb, 8); /* colour_primaries */
6900 get_bits(&s->gb, 8); /* transfer_characteristics */
6901 get_bits(&s->gb, 8); /* matrix_coefficients */
6905 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6906 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6907 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6910 sps->timing_info_present_flag = get_bits1(&s->gb);
6911 if(sps->timing_info_present_flag){
6912 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6913 sps->time_scale = get_bits_long(&s->gb, 32);
6914 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6917 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6918 if(nal_hrd_parameters_present_flag)
6919 decode_hrd_parameters(h, sps);
6920 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6921 if(vcl_hrd_parameters_present_flag)
6922 decode_hrd_parameters(h, sps);
6923 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6924 get_bits1(&s->gb); /* low_delay_hrd_flag */
6925 get_bits1(&s->gb); /* pic_struct_present_flag */
6927 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6928 if(sps->bitstream_restriction_flag){
6929 unsigned int num_reorder_frames;
6930 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6931 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6932 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6933 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6934 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6935 num_reorder_frames= get_ue_golomb(&s->gb);
6936 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6938 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6939 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6943 sps->num_reorder_frames= num_reorder_frames;
6949 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6950 const uint8_t *jvt_list, const uint8_t *fallback_list){
6951 MpegEncContext * const s = &h->s;
6952 int i, last = 8, next = 8;
6953 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6954 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6955 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6957 for(i=0;i<size;i++){
6959 next = (last + get_se_golomb(&s->gb)) & 0xff;
6960 if(!i && !next){ /* matrix not written, we use the preset one */
6961 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6964 last = factors[scan[i]] = next ? next : last;
6968 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6969 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6970 MpegEncContext * const s = &h->s;
6971 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6972 const uint8_t *fallback[4] = {
6973 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6974 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6975 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6976 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6978 if(get_bits1(&s->gb)){
6979 sps->scaling_matrix_present |= is_sps;
6980 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6981 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6982 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6983 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6984 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6985 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6986 if(is_sps || pps->transform_8x8_mode){
6987 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6988 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6990 } else if(fallback_sps) {
6991 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
6992 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
6997 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7000 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7001 const size_t size, const char *name)
7004 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7009 vec[id] = av_mallocz(size);
7011 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7016 static inline int decode_seq_parameter_set(H264Context *h){
7017 MpegEncContext * const s = &h->s;
7018 int profile_idc, level_idc;
7019 unsigned int sps_id, tmp, mb_width, mb_height;
7023 profile_idc= get_bits(&s->gb, 8);
7024 get_bits1(&s->gb); //constraint_set0_flag
7025 get_bits1(&s->gb); //constraint_set1_flag
7026 get_bits1(&s->gb); //constraint_set2_flag
7027 get_bits1(&s->gb); //constraint_set3_flag
7028 get_bits(&s->gb, 4); // reserved
7029 level_idc= get_bits(&s->gb, 8);
7030 sps_id= get_ue_golomb(&s->gb);
7032 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7036 sps->profile_idc= profile_idc;
7037 sps->level_idc= level_idc;
7039 if(sps->profile_idc >= 100){ //high profile
7040 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7041 get_bits1(&s->gb); //residual_color_transform_flag
7042 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7043 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7044 sps->transform_bypass = get_bits1(&s->gb);
7045 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7047 sps->scaling_matrix_present = 0;
7049 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7050 sps->poc_type= get_ue_golomb(&s->gb);
7052 if(sps->poc_type == 0){ //FIXME #define
7053 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7054 } else if(sps->poc_type == 1){//FIXME #define
7055 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7056 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7057 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7058 tmp= get_ue_golomb(&s->gb);
7060 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7061 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7064 sps->poc_cycle_length= tmp;
7066 for(i=0; i<sps->poc_cycle_length; i++)
7067 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7068 }else if(sps->poc_type != 2){
7069 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7073 tmp= get_ue_golomb(&s->gb);
7074 if(tmp > MAX_PICTURE_COUNT-2){
7075 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7077 sps->ref_frame_count= tmp;
7078 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7079 mb_width= get_ue_golomb(&s->gb) + 1;
7080 mb_height= get_ue_golomb(&s->gb) + 1;
7081 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7082 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7083 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7086 sps->mb_width = mb_width;
7087 sps->mb_height= mb_height;
7089 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7090 if(!sps->frame_mbs_only_flag)
7091 sps->mb_aff= get_bits1(&s->gb);
7095 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7097 #ifndef ALLOW_INTERLACE
7099 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7101 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7102 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7104 sps->crop= get_bits1(&s->gb);
7106 sps->crop_left = get_ue_golomb(&s->gb);
7107 sps->crop_right = get_ue_golomb(&s->gb);
7108 sps->crop_top = get_ue_golomb(&s->gb);
7109 sps->crop_bottom= get_ue_golomb(&s->gb);
7110 if(sps->crop_left || sps->crop_top){
7111 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7117 sps->crop_bottom= 0;
7120 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7121 if( sps->vui_parameters_present_flag )
7122 decode_vui_parameters(h, sps);
7124 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7125 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7126 sps_id, sps->profile_idc, sps->level_idc,
7128 sps->ref_frame_count,
7129 sps->mb_width, sps->mb_height,
7130 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7131 sps->direct_8x8_inference_flag ? "8B8" : "",
7132 sps->crop_left, sps->crop_right,
7133 sps->crop_top, sps->crop_bottom,
7134 sps->vui_parameters_present_flag ? "VUI" : ""
7141 build_qp_table(PPS *pps, int t, int index)
7144 for(i = 0; i < 255; i++)
7145 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7148 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7149 MpegEncContext * const s = &h->s;
7150 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7153 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7157 tmp= get_ue_golomb(&s->gb);
7158 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7159 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7164 pps->cabac= get_bits1(&s->gb);
7165 pps->pic_order_present= get_bits1(&s->gb);
7166 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7167 if(pps->slice_group_count > 1 ){
7168 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7169 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7170 switch(pps->mb_slice_group_map_type){
7173 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7174 | run_length[ i ] |1 |ue(v) |
7179 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7181 | top_left_mb[ i ] |1 |ue(v) |
7182 | bottom_right_mb[ i ] |1 |ue(v) |
7190 | slice_group_change_direction_flag |1 |u(1) |
7191 | slice_group_change_rate_minus1 |1 |ue(v) |
7196 | slice_group_id_cnt_minus1 |1 |ue(v) |
7197 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7199 | slice_group_id[ i ] |1 |u(v) |
7204 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7205 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7206 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7207 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7208 pps->ref_count[0]= pps->ref_count[1]= 1;
7212 pps->weighted_pred= get_bits1(&s->gb);
7213 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7214 pps->init_qp= get_se_golomb(&s->gb) + 26;
7215 pps->init_qs= get_se_golomb(&s->gb) + 26;
7216 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7217 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7218 pps->constrained_intra_pred= get_bits1(&s->gb);
7219 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7221 pps->transform_8x8_mode= 0;
7222 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7223 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7224 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7226 if(get_bits_count(&s->gb) < bit_length){
7227 pps->transform_8x8_mode= get_bits1(&s->gb);
7228 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7229 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7231 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7234 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7235 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7236 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7237 h->pps.chroma_qp_diff= 1;
7239 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7241 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7242 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7243 pps_id, pps->sps_id,
7244 pps->cabac ? "CABAC" : "CAVLC",
7245 pps->slice_group_count,
7246 pps->ref_count[0], pps->ref_count[1],
7247 pps->weighted_pred ? "weighted" : "",
7248 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7249 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7250 pps->constrained_intra_pred ? "CONSTR" : "",
7251 pps->redundant_pic_cnt_present ? "REDU" : "",
7252 pps->transform_8x8_mode ? "8x8DCT" : ""
7260 * Call decode_slice() for each context.
7262 * @param h h264 master context
7263 * @param context_count number of contexts to execute
7265 static void execute_decode_slices(H264Context *h, int context_count){
7266 MpegEncContext * const s = &h->s;
7267 AVCodecContext * const avctx= s->avctx;
7271 if(context_count == 1) {
7272 decode_slice(avctx, h);
7274 for(i = 1; i < context_count; i++) {
7275 hx = h->thread_context[i];
7276 hx->s.error_resilience = avctx->error_resilience;
7277 hx->s.error_count = 0;
7280 avctx->execute(avctx, (void *)decode_slice,
7281 (void **)h->thread_context, NULL, context_count);
7283 /* pull back stuff from slices to master context */
7284 hx = h->thread_context[context_count - 1];
7285 s->mb_x = hx->s.mb_x;
7286 s->mb_y = hx->s.mb_y;
7287 for(i = 1; i < context_count; i++)
7288 h->s.error_count += h->thread_context[i]->s.error_count;
7293 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7294 MpegEncContext * const s = &h->s;
7295 AVCodecContext * const avctx= s->avctx;
7297 H264Context *hx; ///< thread context
7298 int context_count = 0;
7300 h->max_contexts = avctx->thread_count;
7303 for(i=0; i<50; i++){
7304 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7307 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7308 h->current_slice = 0;
7309 s->current_picture_ptr= NULL;
7321 if(buf_index >= buf_size) break;
7323 for(i = 0; i < h->nal_length_size; i++)
7324 nalsize = (nalsize << 8) | buf[buf_index++];
7325 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7330 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7335 // start code prefix search
7336 for(; buf_index + 3 < buf_size; buf_index++){
7337 // This should always succeed in the first iteration.
7338 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7342 if(buf_index+3 >= buf_size) break;
7347 hx = h->thread_context[context_count];
7349 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7350 if (ptr==NULL || dst_length < 0){
7353 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7355 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7357 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7358 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7361 if (h->is_avc && (nalsize != consumed))
7362 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7364 buf_index += consumed;
7366 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7367 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7372 switch(hx->nal_unit_type){
7374 if (h->nal_unit_type != NAL_IDR_SLICE) {
7375 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7378 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7380 init_get_bits(&hx->s.gb, ptr, bit_length);
7382 hx->inter_gb_ptr= &hx->s.gb;
7383 hx->s.data_partitioning = 0;
7385 if((err = decode_slice_header(hx, h)))
7388 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7389 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7390 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7391 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7392 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7393 && avctx->skip_frame < AVDISCARD_ALL)
7397 init_get_bits(&hx->s.gb, ptr, bit_length);
7399 hx->inter_gb_ptr= NULL;
7400 hx->s.data_partitioning = 1;
7402 err = decode_slice_header(hx, h);
7405 init_get_bits(&hx->intra_gb, ptr, bit_length);
7406 hx->intra_gb_ptr= &hx->intra_gb;
7409 init_get_bits(&hx->inter_gb, ptr, bit_length);
7410 hx->inter_gb_ptr= &hx->inter_gb;
7412 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7413 && s->context_initialized
7415 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7416 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7417 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7418 && avctx->skip_frame < AVDISCARD_ALL)
7422 init_get_bits(&s->gb, ptr, bit_length);
7426 init_get_bits(&s->gb, ptr, bit_length);
7427 decode_seq_parameter_set(h);
7429 if(s->flags& CODEC_FLAG_LOW_DELAY)
7432 if(avctx->has_b_frames < 2)
7433 avctx->has_b_frames= !s->low_delay;
7436 init_get_bits(&s->gb, ptr, bit_length);
7438 decode_picture_parameter_set(h, bit_length);
7442 case NAL_END_SEQUENCE:
7443 case NAL_END_STREAM:
7444 case NAL_FILLER_DATA:
7446 case NAL_AUXILIARY_SLICE:
7449 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7452 if(context_count == h->max_contexts) {
7453 execute_decode_slices(h, context_count);
7458 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7460 /* Slice could not be decoded in parallel mode, copy down
7461 * NAL unit stuff to context 0 and restart. Note that
7462 * rbsp_buffer is not transfered, but since we no longer
7463 * run in parallel mode this should not be an issue. */
7464 h->nal_unit_type = hx->nal_unit_type;
7465 h->nal_ref_idc = hx->nal_ref_idc;
7471 execute_decode_slices(h, context_count);
7476 * returns the number of bytes consumed for building the current frame
7478 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7479 if(s->flags&CODEC_FLAG_TRUNCATED){
7480 pos -= s->parse_context.last_index;
7481 if(pos<0) pos=0; // FIXME remove (unneeded?)
7485 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7486 if(pos+10>buf_size) pos=buf_size; // oops ;)
7492 static int decode_frame(AVCodecContext *avctx,
7493 void *data, int *data_size,
7494 uint8_t *buf, int buf_size)
7496 H264Context *h = avctx->priv_data;
7497 MpegEncContext *s = &h->s;
7498 AVFrame *pict = data;
7501 s->flags= avctx->flags;
7502 s->flags2= avctx->flags2;
7504 /* no supplementary picture */
7505 if (buf_size == 0) {
7509 //FIXME factorize this with the output code below
7510 out = h->delayed_pic[0];
7512 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7513 if(h->delayed_pic[i]->poc < out->poc){
7514 out = h->delayed_pic[i];
7518 for(i=out_idx; h->delayed_pic[i]; i++)
7519 h->delayed_pic[i] = h->delayed_pic[i+1];
7522 *data_size = sizeof(AVFrame);
7523 *pict= *(AVFrame*)out;
7529 if(s->flags&CODEC_FLAG_TRUNCATED){
7530 int next= ff_h264_find_frame_end(h, buf, buf_size);
7532 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7534 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7537 if(h->is_avc && !h->got_avcC) {
7538 int i, cnt, nalsize;
7539 unsigned char *p = avctx->extradata;
7540 if(avctx->extradata_size < 7) {
7541 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7545 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7548 /* sps and pps in the avcC always have length coded with 2 bytes,
7549 so put a fake nal_length_size = 2 while parsing them */
7550 h->nal_length_size = 2;
7551 // Decode sps from avcC
7552 cnt = *(p+5) & 0x1f; // Number of sps
7554 for (i = 0; i < cnt; i++) {
7555 nalsize = AV_RB16(p) + 2;
7556 if(decode_nal_units(h, p, nalsize) < 0) {
7557 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7562 // Decode pps from avcC
7563 cnt = *(p++); // Number of pps
7564 for (i = 0; i < cnt; i++) {
7565 nalsize = AV_RB16(p) + 2;
7566 if(decode_nal_units(h, p, nalsize) != nalsize) {
7567 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7572 // Now store right nal length size, that will be use to parse all other nals
7573 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7574 // Do not reparse avcC
7578 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7579 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7583 buf_index=decode_nal_units(h, buf, buf_size);
7587 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7588 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7589 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7593 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7594 Picture *out = s->current_picture_ptr;
7595 Picture *cur = s->current_picture_ptr;
7596 Picture *prev = h->delayed_output_pic;
7597 int i, pics, cross_idr, out_of_order, out_idx;
7601 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7602 s->current_picture_ptr->pict_type= s->pict_type;
7604 h->prev_frame_num_offset= h->frame_num_offset;
7605 h->prev_frame_num= h->frame_num;
7606 if(s->current_picture_ptr->reference & s->picture_structure){
7607 h->prev_poc_msb= h->poc_msb;
7608 h->prev_poc_lsb= h->poc_lsb;
7609 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7616 //FIXME do something with unavailable reference frames
7618 #if 0 //decode order
7619 *data_size = sizeof(AVFrame);
7621 /* Sort B-frames into display order */
7623 if(h->sps.bitstream_restriction_flag
7624 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7625 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7630 while(h->delayed_pic[pics]) pics++;
7632 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7634 h->delayed_pic[pics++] = cur;
7635 if(cur->reference == 0)
7636 cur->reference = DELAYED_PIC_REF;
7639 for(i=0; h->delayed_pic[i]; i++)
7640 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7643 out = h->delayed_pic[0];
7645 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7646 if(h->delayed_pic[i]->poc < out->poc){
7647 out = h->delayed_pic[i];
7651 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7652 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7654 else if(prev && pics <= s->avctx->has_b_frames)
7656 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7658 ((!cross_idr && prev && out->poc > prev->poc + 2)
7659 || cur->pict_type == B_TYPE)))
7662 s->avctx->has_b_frames++;
7665 else if(out_of_order)
7668 if(out_of_order || pics > s->avctx->has_b_frames){
7669 for(i=out_idx; h->delayed_pic[i]; i++)
7670 h->delayed_pic[i] = h->delayed_pic[i+1];
7676 *data_size = sizeof(AVFrame);
7677 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7678 prev->reference = 0;
7679 h->delayed_output_pic = out;
7683 *pict= *(AVFrame*)out;
7685 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7688 assert(pict->data[0] || !*data_size);
7689 ff_print_debug_info(s, pict);
7690 //printf("out %d\n", (int)pict->data[0]);
7693 /* Return the Picture timestamp as the frame number */
7694 /* we substract 1 because it is added on utils.c */
7695 avctx->frame_number = s->picture_number - 1;
7697 return get_consumed_bytes(s, buf_index, buf_size);
7700 static inline void fill_mb_avail(H264Context *h){
7701 MpegEncContext * const s = &h->s;
7702 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7705 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7706 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7707 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7713 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7714 h->mb_avail[4]= 1; //FIXME move out
7715 h->mb_avail[5]= 0; //FIXME move out
7722 #define SIZE (COUNT*40)
7728 // int int_temp[10000];
7730 AVCodecContext avctx;
7732 dsputil_init(&dsp, &avctx);
7734 init_put_bits(&pb, temp, SIZE);
7735 printf("testing unsigned exp golomb\n");
7736 for(i=0; i<COUNT; i++){
7738 set_ue_golomb(&pb, i);
7739 STOP_TIMER("set_ue_golomb");
7741 flush_put_bits(&pb);
7743 init_get_bits(&gb, temp, 8*SIZE);
7744 for(i=0; i<COUNT; i++){
7747 s= show_bits(&gb, 24);
7750 j= get_ue_golomb(&gb);
7752 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7755 STOP_TIMER("get_ue_golomb");
7759 init_put_bits(&pb, temp, SIZE);
7760 printf("testing signed exp golomb\n");
7761 for(i=0; i<COUNT; i++){
7763 set_se_golomb(&pb, i - COUNT/2);
7764 STOP_TIMER("set_se_golomb");
7766 flush_put_bits(&pb);
7768 init_get_bits(&gb, temp, 8*SIZE);
7769 for(i=0; i<COUNT; i++){
7772 s= show_bits(&gb, 24);
7775 j= get_se_golomb(&gb);
7776 if(j != i - COUNT/2){
7777 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7780 STOP_TIMER("get_se_golomb");
7783 printf("testing 4x4 (I)DCT\n");
7786 uint8_t src[16], ref[16];
7787 uint64_t error= 0, max_error=0;
7789 for(i=0; i<COUNT; i++){
7791 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7792 for(j=0; j<16; j++){
7793 ref[j]= random()%255;
7794 src[j]= random()%255;
7797 h264_diff_dct_c(block, src, ref, 4);
7800 for(j=0; j<16; j++){
7801 // printf("%d ", block[j]);
7802 block[j]= block[j]*4;
7803 if(j&1) block[j]= (block[j]*4 + 2)/5;
7804 if(j&4) block[j]= (block[j]*4 + 2)/5;
7808 s->dsp.h264_idct_add(ref, block, 4);
7809 /* for(j=0; j<16; j++){
7810 printf("%d ", ref[j]);
7814 for(j=0; j<16; j++){
7815 int diff= FFABS(src[j] - ref[j]);
7818 max_error= FFMAX(max_error, diff);
7821 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7823 printf("testing quantizer\n");
7824 for(qp=0; qp<52; qp++){
7826 src1_block[i]= src2_block[i]= random()%255;
7830 printf("Testing NAL layer\n");
7832 uint8_t bitstream[COUNT];
7833 uint8_t nal[COUNT*2];
7835 memset(&h, 0, sizeof(H264Context));
7837 for(i=0; i<COUNT; i++){
7845 for(j=0; j<COUNT; j++){
7846 bitstream[j]= (random() % 255) + 1;
7849 for(j=0; j<zeros; j++){
7850 int pos= random() % COUNT;
7851 while(bitstream[pos] == 0){
7860 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7862 printf("encoding failed\n");
7866 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7870 if(out_length != COUNT){
7871 printf("incorrect length %d %d\n", out_length, COUNT);
7875 if(consumed != nal_length){
7876 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7880 if(memcmp(bitstream, out, COUNT)){
7881 printf("mismatch\n");
7886 printf("Testing RBSP\n");
7894 static int decode_end(AVCodecContext *avctx)
7896 H264Context *h = avctx->priv_data;
7897 MpegEncContext *s = &h->s;
7899 av_freep(&h->rbsp_buffer[0]);
7900 av_freep(&h->rbsp_buffer[1]);
7901 free_tables(h); //FIXME cleanup init stuff perhaps
7904 // memset(h, 0, sizeof(H264Context));
7910 AVCodec h264_decoder = {
7914 sizeof(H264Context),
7919 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,