2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
42 * Value of Picture.reference when Picture is not a reference picture, but
43 * is held for delayed output.
45 #define DELAYED_PIC_REF 4
47 static VLC coeff_token_vlc[4];
48 static VLC chroma_dc_coeff_token_vlc;
50 static VLC total_zeros_vlc[15];
51 static VLC chroma_dc_total_zeros_vlc[3];
53 static VLC run_vlc[6];
56 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
57 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
58 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
59 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
61 static av_always_inline uint32_t pack16to32(int a, int b){
62 #ifdef WORDS_BIGENDIAN
63 return (b&0xFFFF) + (a<<16);
65 return (a&0xFFFF) + (b<<16);
69 const uint8_t ff_rem6[52]={
70 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
73 const uint8_t ff_div6[52]={
74 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
80 * @param h height of the rectangle, should be a constant
81 * @param w width of the rectangle, should be a constant
82 * @param size the size of val (1 or 4), should be a constant
84 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
85 uint8_t *p= (uint8_t*)vp;
86 assert(size==1 || size==4);
92 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
93 assert((stride&(w-1))==0);
95 const uint16_t v= size==4 ? val : val*0x0101;
96 *(uint16_t*)(p + 0*stride)= v;
98 *(uint16_t*)(p + 1*stride)= v;
100 *(uint16_t*)(p + 2*stride)= v;
101 *(uint16_t*)(p + 3*stride)= v;
103 const uint32_t v= size==4 ? val : val*0x01010101;
104 *(uint32_t*)(p + 0*stride)= v;
106 *(uint32_t*)(p + 1*stride)= v;
108 *(uint32_t*)(p + 2*stride)= v;
109 *(uint32_t*)(p + 3*stride)= v;
111 //gcc can't optimize 64bit math on x86_32
112 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
113 const uint64_t v= val*0x0100000001ULL;
114 *(uint64_t*)(p + 0*stride)= v;
116 *(uint64_t*)(p + 1*stride)= v;
118 *(uint64_t*)(p + 2*stride)= v;
119 *(uint64_t*)(p + 3*stride)= v;
121 const uint64_t v= val*0x0100000001ULL;
122 *(uint64_t*)(p + 0+0*stride)= v;
123 *(uint64_t*)(p + 8+0*stride)= v;
124 *(uint64_t*)(p + 0+1*stride)= v;
125 *(uint64_t*)(p + 8+1*stride)= v;
127 *(uint64_t*)(p + 0+2*stride)= v;
128 *(uint64_t*)(p + 8+2*stride)= v;
129 *(uint64_t*)(p + 0+3*stride)= v;
130 *(uint64_t*)(p + 8+3*stride)= v;
132 *(uint32_t*)(p + 0+0*stride)= val;
133 *(uint32_t*)(p + 4+0*stride)= val;
135 *(uint32_t*)(p + 0+1*stride)= val;
136 *(uint32_t*)(p + 4+1*stride)= val;
138 *(uint32_t*)(p + 0+2*stride)= val;
139 *(uint32_t*)(p + 4+2*stride)= val;
140 *(uint32_t*)(p + 0+3*stride)= val;
141 *(uint32_t*)(p + 4+3*stride)= val;
143 *(uint32_t*)(p + 0+0*stride)= val;
144 *(uint32_t*)(p + 4+0*stride)= val;
145 *(uint32_t*)(p + 8+0*stride)= val;
146 *(uint32_t*)(p +12+0*stride)= val;
147 *(uint32_t*)(p + 0+1*stride)= val;
148 *(uint32_t*)(p + 4+1*stride)= val;
149 *(uint32_t*)(p + 8+1*stride)= val;
150 *(uint32_t*)(p +12+1*stride)= val;
152 *(uint32_t*)(p + 0+2*stride)= val;
153 *(uint32_t*)(p + 4+2*stride)= val;
154 *(uint32_t*)(p + 8+2*stride)= val;
155 *(uint32_t*)(p +12+2*stride)= val;
156 *(uint32_t*)(p + 0+3*stride)= val;
157 *(uint32_t*)(p + 4+3*stride)= val;
158 *(uint32_t*)(p + 8+3*stride)= val;
159 *(uint32_t*)(p +12+3*stride)= val;
166 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
167 MpegEncContext * const s = &h->s;
168 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
169 int topleft_xy, top_xy, topright_xy, left_xy[2];
170 int topleft_type, top_type, topright_type, left_type[2];
174 //FIXME deblocking could skip the intra and nnz parts.
175 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
178 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
180 top_xy = mb_xy - s->mb_stride;
181 topleft_xy = top_xy - 1;
182 topright_xy= top_xy + 1;
183 left_xy[1] = left_xy[0] = mb_xy-1;
193 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
194 const int top_pair_xy = pair_xy - s->mb_stride;
195 const int topleft_pair_xy = top_pair_xy - 1;
196 const int topright_pair_xy = top_pair_xy + 1;
197 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
198 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
199 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
200 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
201 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
202 const int bottom = (s->mb_y & 1);
203 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
208 top_xy -= s->mb_stride;
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
214 topleft_xy -= s->mb_stride;
217 ? !curr_mb_frame_flag // bottom macroblock
218 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
220 topright_xy -= s->mb_stride;
222 if (left_mb_frame_flag != curr_mb_frame_flag) {
223 left_xy[1] = left_xy[0] = pair_xy - 1;
224 if (curr_mb_frame_flag) {
245 left_xy[1] += s->mb_stride;
258 h->top_mb_xy = top_xy;
259 h->left_mb_xy[0] = left_xy[0];
260 h->left_mb_xy[1] = left_xy[1];
264 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
265 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
266 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
268 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
270 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
272 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
273 for(list=0; list<h->list_count; list++){
274 if(USES_LIST(mb_type,list)){
275 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
276 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
277 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
278 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
284 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
285 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
287 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
288 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
290 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
291 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
296 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
297 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
298 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
299 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
300 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
303 if(IS_INTRA(mb_type)){
304 h->topleft_samples_available=
305 h->top_samples_available=
306 h->left_samples_available= 0xFFFF;
307 h->topright_samples_available= 0xEEEA;
309 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available= 0xB3FF;
311 h->top_samples_available= 0x33FF;
312 h->topright_samples_available= 0x26EA;
315 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
316 h->topleft_samples_available&= 0xDF5F;
317 h->left_samples_available&= 0x5F5F;
321 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
322 h->topleft_samples_available&= 0x7FFF;
324 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
325 h->topright_samples_available&= 0xFBFF;
327 if(IS_INTRA4x4(mb_type)){
328 if(IS_INTRA4x4(top_type)){
329 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
330 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
331 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
332 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
335 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
340 h->intra4x4_pred_mode_cache[4+8*0]=
341 h->intra4x4_pred_mode_cache[5+8*0]=
342 h->intra4x4_pred_mode_cache[6+8*0]=
343 h->intra4x4_pred_mode_cache[7+8*0]= pred;
346 if(IS_INTRA4x4(left_type[i])){
347 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
348 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
351 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
356 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
357 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
372 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
374 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
375 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
376 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
377 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
379 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
380 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
382 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
383 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
386 h->non_zero_count_cache[4+8*0]=
387 h->non_zero_count_cache[5+8*0]=
388 h->non_zero_count_cache[6+8*0]=
389 h->non_zero_count_cache[7+8*0]=
391 h->non_zero_count_cache[1+8*0]=
392 h->non_zero_count_cache[2+8*0]=
394 h->non_zero_count_cache[1+8*3]=
395 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
399 for (i=0; i<2; i++) {
401 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
402 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
403 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
404 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
406 h->non_zero_count_cache[3+8*1 + 2*8*i]=
407 h->non_zero_count_cache[3+8*2 + 2*8*i]=
408 h->non_zero_count_cache[0+8*1 + 8*i]=
409 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
416 h->top_cbp = h->cbp_table[top_xy];
417 } else if(IS_INTRA(mb_type)) {
424 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
425 } else if(IS_INTRA(mb_type)) {
431 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
434 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
439 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
441 for(list=0; list<h->list_count; list++){
442 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
443 /*if(!h->mv_cache_clean[list]){
444 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
445 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
446 h->mv_cache_clean[list]= 1;
450 h->mv_cache_clean[list]= 0;
452 if(USES_LIST(top_type, list)){
453 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
454 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
455 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
456 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
457 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
458 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
459 h->ref_cache[list][scan8[0] + 0 - 1*8]=
460 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
461 h->ref_cache[list][scan8[0] + 2 - 1*8]=
462 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
464 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
465 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
466 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
467 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
468 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
472 int cache_idx = scan8[0] - 1 + i*2*8;
473 if(USES_LIST(left_type[i], list)){
474 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
475 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
476 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
477 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
478 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
479 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
481 *(uint32_t*)h->mv_cache [list][cache_idx ]=
482 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
483 h->ref_cache[list][cache_idx ]=
484 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
488 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
491 if(USES_LIST(topleft_type, list)){
492 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
493 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
494 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
495 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
497 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
498 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
501 if(USES_LIST(topright_type, list)){
502 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
503 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
504 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
505 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
507 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
508 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
511 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
514 h->ref_cache[list][scan8[5 ]+1] =
515 h->ref_cache[list][scan8[7 ]+1] =
516 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
517 h->ref_cache[list][scan8[4 ]] =
518 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
519 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
520 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
521 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
522 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
523 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
526 /* XXX beurk, Load mvd */
527 if(USES_LIST(top_type, list)){
528 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
529 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
530 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
531 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
532 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
534 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
535 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
536 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
537 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
539 if(USES_LIST(left_type[0], list)){
540 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
541 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
542 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
544 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
545 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
547 if(USES_LIST(left_type[1], list)){
548 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
549 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
550 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
552 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
553 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
555 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
556 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
557 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
558 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
559 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
561 if(h->slice_type == B_TYPE){
562 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
564 if(IS_DIRECT(top_type)){
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
566 }else if(IS_8X8(top_type)){
567 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
568 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
569 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
571 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
574 if(IS_DIRECT(left_type[0]))
575 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
576 else if(IS_8X8(left_type[0]))
577 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
579 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
581 if(IS_DIRECT(left_type[1]))
582 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
583 else if(IS_8X8(left_type[1]))
584 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
586 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
592 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
593 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
594 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
595 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
596 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
597 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
598 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
599 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
600 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
601 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
603 #define MAP_F2F(idx, mb_type)\
604 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
605 h->ref_cache[list][idx] <<= 1;\
606 h->mv_cache[list][idx][1] /= 2;\
607 h->mvd_cache[list][idx][1] /= 2;\
612 #define MAP_F2F(idx, mb_type)\
613 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
614 h->ref_cache[list][idx] >>= 1;\
615 h->mv_cache[list][idx][1] <<= 1;\
616 h->mvd_cache[list][idx][1] <<= 1;\
626 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
629 static inline void write_back_intra_pred_mode(H264Context *h){
630 MpegEncContext * const s = &h->s;
631 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
633 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
634 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
635 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
636 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
637 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
638 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
639 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
643 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
645 static inline int check_intra4x4_pred_mode(H264Context *h){
646 MpegEncContext * const s = &h->s;
647 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
648 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
651 if(!(h->top_samples_available&0x8000)){
653 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
655 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
658 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
663 if(!(h->left_samples_available&0x8000)){
665 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
667 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
670 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
676 } //FIXME cleanup like next
679 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
681 static inline int check_intra_pred_mode(H264Context *h, int mode){
682 MpegEncContext * const s = &h->s;
683 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
684 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
687 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
691 if(!(h->top_samples_available&0x8000)){
694 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
699 if(!(h->left_samples_available&0x8000)){
702 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
711 * gets the predicted intra4x4 prediction mode.
713 static inline int pred_intra_mode(H264Context *h, int n){
714 const int index8= scan8[n];
715 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
716 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
717 const int min= FFMIN(left, top);
719 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
721 if(min<0) return DC_PRED;
725 static inline void write_back_non_zero_count(H264Context *h){
726 MpegEncContext * const s = &h->s;
727 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
729 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
730 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
731 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
732 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
733 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
734 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
735 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
737 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
738 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
739 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
741 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
742 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
743 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
746 // store all luma nnzs, for deblocking
749 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
750 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
755 * gets the predicted number of non zero coefficients.
756 * @param n block index
758 static inline int pred_non_zero_count(H264Context *h, int n){
759 const int index8= scan8[n];
760 const int left= h->non_zero_count_cache[index8 - 1];
761 const int top = h->non_zero_count_cache[index8 - 8];
764 if(i<64) i= (i+1)>>1;
766 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
771 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
772 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
773 MpegEncContext *s = &h->s;
775 /* there is no consistent mapping of mvs to neighboring locations that will
776 * make mbaff happy, so we can't move all this logic to fill_caches */
778 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
780 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
781 *C = h->mv_cache[list][scan8[0]-2];
784 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
785 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
786 if(IS_INTERLACED(mb_types[topright_xy])){
787 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
788 const int x4 = X4, y4 = Y4;\
789 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
790 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
791 return LIST_NOT_USED;\
792 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
793 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
794 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
795 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
797 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
800 if(topright_ref == PART_NOT_AVAILABLE
801 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
802 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
804 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
805 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
808 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
810 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
811 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
817 if(topright_ref != PART_NOT_AVAILABLE){
818 *C= h->mv_cache[list][ i - 8 + part_width ];
821 tprintf(s->avctx, "topright MV not available\n");
823 *C= h->mv_cache[list][ i - 8 - 1 ];
824 return h->ref_cache[list][ i - 8 - 1 ];
829 * gets the predicted MV.
830 * @param n the block index
831 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
832 * @param mx the x component of the predicted motion vector
833 * @param my the y component of the predicted motion vector
835 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
836 const int index8= scan8[n];
837 const int top_ref= h->ref_cache[list][ index8 - 8 ];
838 const int left_ref= h->ref_cache[list][ index8 - 1 ];
839 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
840 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
842 int diagonal_ref, match_count;
844 assert(part_width==1 || part_width==2 || part_width==4);
854 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
855 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
856 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
857 if(match_count > 1){ //most common
858 *mx= mid_pred(A[0], B[0], C[0]);
859 *my= mid_pred(A[1], B[1], C[1]);
860 }else if(match_count==1){
864 }else if(top_ref==ref){
872 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
876 *mx= mid_pred(A[0], B[0], C[0]);
877 *my= mid_pred(A[1], B[1], C[1]);
881 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
885 * gets the directionally predicted 16x8 MV.
886 * @param n the block index
887 * @param mx the x component of the predicted motion vector
888 * @param my the y component of the predicted motion vector
890 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
892 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
893 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
895 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
903 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
904 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
906 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
916 pred_motion(h, n, 4, list, ref, mx, my);
920 * gets the directionally predicted 8x16 MV.
921 * @param n the block index
922 * @param mx the x component of the predicted motion vector
923 * @param my the y component of the predicted motion vector
925 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
927 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
928 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
930 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
941 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
943 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
945 if(diagonal_ref == ref){
953 pred_motion(h, n, 2, list, ref, mx, my);
956 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
957 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
958 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
960 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
962 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
963 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
964 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
970 pred_motion(h, 0, 4, 0, 0, mx, my);
975 static inline void direct_dist_scale_factor(H264Context * const h){
976 const int poc = h->s.current_picture_ptr->poc;
977 const int poc1 = h->ref_list[1][0].poc;
979 for(i=0; i<h->ref_count[0]; i++){
980 int poc0 = h->ref_list[0][i].poc;
981 int td = av_clip(poc1 - poc0, -128, 127);
982 if(td == 0 /* FIXME || pic0 is a long-term ref */){
983 h->dist_scale_factor[i] = 256;
985 int tb = av_clip(poc - poc0, -128, 127);
986 int tx = (16384 + (FFABS(td) >> 1)) / td;
987 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
991 for(i=0; i<h->ref_count[0]; i++){
992 h->dist_scale_factor_field[2*i] =
993 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
997 static inline void direct_ref_list_init(H264Context * const h){
998 MpegEncContext * const s = &h->s;
999 Picture * const ref1 = &h->ref_list[1][0];
1000 Picture * const cur = s->current_picture_ptr;
1002 if(cur->pict_type == I_TYPE)
1003 cur->ref_count[0] = 0;
1004 if(cur->pict_type != B_TYPE)
1005 cur->ref_count[1] = 0;
1006 for(list=0; list<2; list++){
1007 cur->ref_count[list] = h->ref_count[list];
1008 for(j=0; j<h->ref_count[list]; j++)
1009 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1011 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1013 for(list=0; list<2; list++){
1014 for(i=0; i<ref1->ref_count[list]; i++){
1015 const int poc = ref1->ref_poc[list][i];
1016 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1017 for(j=0; j<h->ref_count[list]; j++)
1018 if(h->ref_list[list][j].poc == poc){
1019 h->map_col_to_list0[list][i] = j;
1025 for(list=0; list<2; list++){
1026 for(i=0; i<ref1->ref_count[list]; i++){
1027 j = h->map_col_to_list0[list][i];
1028 h->map_col_to_list0_field[list][2*i] = 2*j;
1029 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1035 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1036 MpegEncContext * const s = &h->s;
1037 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1038 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1039 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1040 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1041 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1042 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1043 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1044 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1045 const int is_b8x8 = IS_8X8(*mb_type);
1046 unsigned int sub_mb_type;
1049 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1050 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1051 /* FIXME save sub mb types from previous frames (or derive from MVs)
1052 * so we know exactly what block size to use */
1053 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1055 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1056 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1057 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1059 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1060 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1063 *mb_type |= MB_TYPE_DIRECT2;
1065 *mb_type |= MB_TYPE_INTERLACED;
1067 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1069 if(h->direct_spatial_mv_pred){
1074 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1076 /* ref = min(neighbors) */
1077 for(list=0; list<2; list++){
1078 int refa = h->ref_cache[list][scan8[0] - 1];
1079 int refb = h->ref_cache[list][scan8[0] - 8];
1080 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1082 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1084 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1086 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1092 if(ref[0] < 0 && ref[1] < 0){
1093 ref[0] = ref[1] = 0;
1094 mv[0][0] = mv[0][1] =
1095 mv[1][0] = mv[1][1] = 0;
1097 for(list=0; list<2; list++){
1099 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1101 mv[list][0] = mv[list][1] = 0;
1106 *mb_type &= ~MB_TYPE_P0L1;
1107 sub_mb_type &= ~MB_TYPE_P0L1;
1108 }else if(ref[0] < 0){
1109 *mb_type &= ~MB_TYPE_P0L0;
1110 sub_mb_type &= ~MB_TYPE_P0L0;
1113 if(IS_16X16(*mb_type)){
1116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1117 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1118 if(!IS_INTRA(mb_type_col)
1119 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1120 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1121 && (h->x264_build>33 || !h->x264_build)))){
1123 a= pack16to32(mv[0][0],mv[0][1]);
1125 b= pack16to32(mv[1][0],mv[1][1]);
1127 a= pack16to32(mv[0][0],mv[0][1]);
1128 b= pack16to32(mv[1][0],mv[1][1]);
1130 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1131 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1133 for(i8=0; i8<4; i8++){
1134 const int x8 = i8&1;
1135 const int y8 = i8>>1;
1137 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1139 h->sub_mb_type[i8] = sub_mb_type;
1141 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1142 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1143 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1144 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1147 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1148 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1149 && (h->x264_build>33 || !h->x264_build)))){
1150 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1151 if(IS_SUB_8X8(sub_mb_type)){
1152 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1153 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1155 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1160 for(i4=0; i4<4; i4++){
1161 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1162 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1164 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1166 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1172 }else{ /* direct temporal mv pred */
1173 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1174 const int *dist_scale_factor = h->dist_scale_factor;
1177 if(IS_INTERLACED(*mb_type)){
1178 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1179 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1180 dist_scale_factor = h->dist_scale_factor_field;
1182 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1183 /* FIXME assumes direct_8x8_inference == 1 */
1184 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1185 int mb_types_col[2];
1188 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1189 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1190 | (*mb_type & MB_TYPE_INTERLACED);
1191 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1193 if(IS_INTERLACED(*mb_type)){
1194 /* frame to field scaling */
1195 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1198 l1ref0 -= 2*h->b8_stride;
1199 l1ref1 -= 2*h->b8_stride;
1200 l1mv0 -= 4*h->b_stride;
1201 l1mv1 -= 4*h->b_stride;
1205 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1206 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1208 *mb_type |= MB_TYPE_16x8;
1210 *mb_type |= MB_TYPE_8x8;
1212 /* field to frame scaling */
1213 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1214 * but in MBAFF, top and bottom POC are equal */
1215 int dy = (s->mb_y&1) ? 1 : 2;
1217 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1218 l1ref0 += dy*h->b8_stride;
1219 l1ref1 += dy*h->b8_stride;
1220 l1mv0 += 2*dy*h->b_stride;
1221 l1mv1 += 2*dy*h->b_stride;
1224 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1226 *mb_type |= MB_TYPE_16x16;
1228 *mb_type |= MB_TYPE_8x8;
1231 for(i8=0; i8<4; i8++){
1232 const int x8 = i8&1;
1233 const int y8 = i8>>1;
1235 const int16_t (*l1mv)[2]= l1mv0;
1237 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1239 h->sub_mb_type[i8] = sub_mb_type;
1241 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1242 if(IS_INTRA(mb_types_col[y8])){
1243 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1244 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1245 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1249 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1251 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1253 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1256 scale = dist_scale_factor[ref0];
1257 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1260 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1261 int my_col = (mv_col[1]<<y_shift)/2;
1262 int mx = (scale * mv_col[0] + 128) >> 8;
1263 int my = (scale * my_col + 128) >> 8;
1264 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1265 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1272 /* one-to-one mv scaling */
1274 if(IS_16X16(*mb_type)){
1277 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col)){
1281 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1282 : map_col_to_list0[1][l1ref1[0]];
1283 const int scale = dist_scale_factor[ref0];
1284 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1286 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1287 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1289 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1290 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1292 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1293 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1294 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1296 for(i8=0; i8<4; i8++){
1297 const int x8 = i8&1;
1298 const int y8 = i8>>1;
1300 const int16_t (*l1mv)[2]= l1mv0;
1302 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1304 h->sub_mb_type[i8] = sub_mb_type;
1305 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1306 if(IS_INTRA(mb_type_col)){
1307 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1308 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1309 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1313 ref0 = l1ref0[x8 + y8*h->b8_stride];
1315 ref0 = map_col_to_list0[0][ref0];
1317 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1320 scale = dist_scale_factor[ref0];
1322 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1323 if(IS_SUB_8X8(sub_mb_type)){
1324 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1325 int mx = (scale * mv_col[0] + 128) >> 8;
1326 int my = (scale * mv_col[1] + 128) >> 8;
1327 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1328 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1330 for(i4=0; i4<4; i4++){
1331 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1332 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1333 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1334 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1335 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1336 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1343 static inline void write_back_motion(H264Context *h, int mb_type){
1344 MpegEncContext * const s = &h->s;
1345 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1346 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1349 if(!USES_LIST(mb_type, 0))
1350 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1352 for(list=0; list<h->list_count; list++){
1354 if(!USES_LIST(mb_type, list))
1358 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1359 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1361 if( h->pps.cabac ) {
1362 if(IS_SKIP(mb_type))
1363 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1366 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1367 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1372 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1373 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1374 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1375 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1376 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1380 if(h->slice_type == B_TYPE && h->pps.cabac){
1381 if(IS_8X8(mb_type)){
1382 uint8_t *direct_table = &h->direct_table[b8_xy];
1383 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1384 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1385 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1391 * Decodes a network abstraction layer unit.
1392 * @param consumed is the number of bytes used as input
1393 * @param length is the length of the array
1394 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1395 * @returns decoded bytes, might be src+1 if no escapes
1397 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1402 // src[0]&0x80; //forbidden bit
1403 h->nal_ref_idc= src[0]>>5;
1404 h->nal_unit_type= src[0]&0x1F;
1408 for(i=0; i<length; i++)
1409 printf("%2X ", src[i]);
1411 for(i=0; i+1<length; i+=2){
1412 if(src[i]) continue;
1413 if(i>0 && src[i-1]==0) i--;
1414 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1416 /* startcode, so we must be past the end */
1423 if(i>=length-1){ //no escaped 0
1424 *dst_length= length;
1425 *consumed= length+1; //+1 for the header
1429 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1430 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1431 dst= h->rbsp_buffer[bufidx];
1437 //printf("decoding esc\n");
1440 //remove escapes (very rare 1:2^22)
1441 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1442 if(src[si+2]==3){ //escape
1447 }else //next start code
1451 dst[di++]= src[si++];
1455 *consumed= si + 1;//+1 for the header
1456 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1461 * identifies the exact end of the bitstream
1462 * @return the length of the trailing, or 0 if damaged
1464 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1468 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1478 * idct tranforms the 16 dc values and dequantize them.
1479 * @param qp quantization parameter
1481 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1484 int temp[16]; //FIXME check if this is a good idea
1485 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1486 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1488 //memset(block, 64, 2*256);
1491 const int offset= y_offset[i];
1492 const int z0= block[offset+stride*0] + block[offset+stride*4];
1493 const int z1= block[offset+stride*0] - block[offset+stride*4];
1494 const int z2= block[offset+stride*1] - block[offset+stride*5];
1495 const int z3= block[offset+stride*1] + block[offset+stride*5];
1504 const int offset= x_offset[i];
1505 const int z0= temp[4*0+i] + temp[4*2+i];
1506 const int z1= temp[4*0+i] - temp[4*2+i];
1507 const int z2= temp[4*1+i] - temp[4*3+i];
1508 const int z3= temp[4*1+i] + temp[4*3+i];
1510 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1511 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1512 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1513 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1519 * dct tranforms the 16 dc values.
1520 * @param qp quantization parameter ??? FIXME
1522 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1523 // const int qmul= dequant_coeff[qp][0];
1525 int temp[16]; //FIXME check if this is a good idea
1526 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1527 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1530 const int offset= y_offset[i];
1531 const int z0= block[offset+stride*0] + block[offset+stride*4];
1532 const int z1= block[offset+stride*0] - block[offset+stride*4];
1533 const int z2= block[offset+stride*1] - block[offset+stride*5];
1534 const int z3= block[offset+stride*1] + block[offset+stride*5];
1543 const int offset= x_offset[i];
1544 const int z0= temp[4*0+i] + temp[4*2+i];
1545 const int z1= temp[4*0+i] - temp[4*2+i];
1546 const int z2= temp[4*1+i] - temp[4*3+i];
1547 const int z3= temp[4*1+i] + temp[4*3+i];
1549 block[stride*0 +offset]= (z0 + z3)>>1;
1550 block[stride*2 +offset]= (z1 + z2)>>1;
1551 block[stride*8 +offset]= (z1 - z2)>>1;
1552 block[stride*10+offset]= (z0 - z3)>>1;
1560 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1561 const int stride= 16*2;
1562 const int xStride= 16;
1565 a= block[stride*0 + xStride*0];
1566 b= block[stride*0 + xStride*1];
1567 c= block[stride*1 + xStride*0];
1568 d= block[stride*1 + xStride*1];
1575 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1576 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1577 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1578 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1582 static void chroma_dc_dct_c(DCTELEM *block){
1583 const int stride= 16*2;
1584 const int xStride= 16;
1587 a= block[stride*0 + xStride*0];
1588 b= block[stride*0 + xStride*1];
1589 c= block[stride*1 + xStride*0];
1590 d= block[stride*1 + xStride*1];
1597 block[stride*0 + xStride*0]= (a+c);
1598 block[stride*0 + xStride*1]= (e+b);
1599 block[stride*1 + xStride*0]= (a-c);
1600 block[stride*1 + xStride*1]= (e-b);
1605 * gets the chroma qp.
1607 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1608 return h->pps.chroma_qp_table[t][qscale & 0xff];
1611 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1612 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1613 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1615 const int * const quant_table= quant_coeff[qscale];
1616 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1617 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1618 const unsigned int threshold2= (threshold1<<1);
1624 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1625 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1626 const unsigned int dc_threshold2= (dc_threshold1<<1);
1628 int level= block[0]*quant_coeff[qscale+18][0];
1629 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1631 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1634 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1637 // last_non_zero = i;
1642 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1643 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1644 const unsigned int dc_threshold2= (dc_threshold1<<1);
1646 int level= block[0]*quant_table[0];
1647 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1649 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1652 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1655 // last_non_zero = i;
1668 const int j= scantable[i];
1669 int level= block[j]*quant_table[j];
1671 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1672 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1673 if(((unsigned)(level+threshold1))>threshold2){
1675 level= (bias + level)>>QUANT_SHIFT;
1678 level= (bias - level)>>QUANT_SHIFT;
1687 return last_non_zero;
1690 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1691 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1692 int src_x_offset, int src_y_offset,
1693 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1694 MpegEncContext * const s = &h->s;
1695 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1696 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1697 const int luma_xy= (mx&3) + ((my&3)<<2);
1698 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1699 uint8_t * src_cb, * src_cr;
1700 int extra_width= h->emu_edge_width;
1701 int extra_height= h->emu_edge_height;
1703 const int full_mx= mx>>2;
1704 const int full_my= my>>2;
1705 const int pic_width = 16*s->mb_width;
1706 const int pic_height = 16*s->mb_height >> (MB_MBAFF || FIELD_PICTURE);
1708 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1711 if(mx&7) extra_width -= 3;
1712 if(my&7) extra_height -= 3;
1714 if( full_mx < 0-extra_width
1715 || full_my < 0-extra_height
1716 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1717 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1719 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1723 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1725 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1728 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1730 if(MB_MBAFF || FIELD_PICTURE){
1731 // chroma offset when predicting from a field of opposite parity
1732 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
1733 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1735 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1736 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1739 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1740 src_cb= s->edge_emu_buffer;
1742 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1745 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1746 src_cr= s->edge_emu_buffer;
1748 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1751 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1752 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1753 int x_offset, int y_offset,
1754 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1755 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1756 int list0, int list1){
1757 MpegEncContext * const s = &h->s;
1758 qpel_mc_func *qpix_op= qpix_put;
1759 h264_chroma_mc_func chroma_op= chroma_put;
1761 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1762 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1763 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1764 x_offset += 8*s->mb_x;
1765 y_offset += 8*(s->mb_y >> (MB_MBAFF || FIELD_PICTURE));
1768 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1769 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1770 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1771 qpix_op, chroma_op);
1774 chroma_op= chroma_avg;
1778 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1779 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1780 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1781 qpix_op, chroma_op);
1785 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1786 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 int x_offset, int y_offset,
1788 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1789 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1790 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1791 int list0, int list1){
1792 MpegEncContext * const s = &h->s;
1794 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1795 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1796 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1797 x_offset += 8*s->mb_x;
1798 y_offset += 8*(s->mb_y >> (MB_MBAFF || FIELD_PICTURE));
1801 /* don't optimize for luma-only case, since B-frames usually
1802 * use implicit weights => chroma too. */
1803 uint8_t *tmp_cb = s->obmc_scratchpad;
1804 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1805 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1806 int refn0 = h->ref_cache[0][ scan8[n] ];
1807 int refn1 = h->ref_cache[1][ scan8[n] ];
1809 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1810 dest_y, dest_cb, dest_cr,
1811 x_offset, y_offset, qpix_put, chroma_put);
1812 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1813 tmp_y, tmp_cb, tmp_cr,
1814 x_offset, y_offset, qpix_put, chroma_put);
1816 if(h->use_weight == 2){
1817 int weight0 = h->implicit_weight[refn0][refn1];
1818 int weight1 = 64 - weight0;
1819 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1820 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1821 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1823 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1824 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1825 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1826 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1827 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1828 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1829 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1831 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1834 int list = list1 ? 1 : 0;
1835 int refn = h->ref_cache[list][ scan8[n] ];
1836 Picture *ref= &h->ref_list[list][refn];
1837 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1838 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put, chroma_put);
1841 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1842 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1843 if(h->use_weight_chroma){
1844 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1845 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1846 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1847 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1852 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1853 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1854 int x_offset, int y_offset,
1855 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1856 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1857 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1858 int list0, int list1){
1859 if((h->use_weight==2 && list0 && list1
1860 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1861 || h->use_weight==1)
1862 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1863 x_offset, y_offset, qpix_put, chroma_put,
1864 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1866 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1867 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1870 static inline void prefetch_motion(H264Context *h, int list){
1871 /* fetch pixels for estimated mv 4 macroblocks ahead
1872 * optimized for 64byte cache lines */
1873 MpegEncContext * const s = &h->s;
1874 const int refn = h->ref_cache[list][scan8[0]];
1876 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1877 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1878 uint8_t **src= h->ref_list[list][refn].data;
1879 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1880 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1881 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1882 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1886 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1887 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1888 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1889 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1890 MpegEncContext * const s = &h->s;
1891 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1892 const int mb_type= s->current_picture.mb_type[mb_xy];
1894 assert(IS_INTER(mb_type));
1896 prefetch_motion(h, 0);
1898 if(IS_16X16(mb_type)){
1899 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1900 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1901 &weight_op[0], &weight_avg[0],
1902 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1903 }else if(IS_16X8(mb_type)){
1904 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1905 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1906 &weight_op[1], &weight_avg[1],
1907 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1908 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1909 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1910 &weight_op[1], &weight_avg[1],
1911 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1912 }else if(IS_8X16(mb_type)){
1913 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1914 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1915 &weight_op[2], &weight_avg[2],
1916 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1917 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1918 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1919 &weight_op[2], &weight_avg[2],
1920 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1924 assert(IS_8X8(mb_type));
1927 const int sub_mb_type= h->sub_mb_type[i];
1929 int x_offset= (i&1)<<2;
1930 int y_offset= (i&2)<<1;
1932 if(IS_SUB_8X8(sub_mb_type)){
1933 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1934 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1935 &weight_op[3], &weight_avg[3],
1936 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1937 }else if(IS_SUB_8X4(sub_mb_type)){
1938 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1939 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1940 &weight_op[4], &weight_avg[4],
1941 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1942 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1943 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1944 &weight_op[4], &weight_avg[4],
1945 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1946 }else if(IS_SUB_4X8(sub_mb_type)){
1947 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1948 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1949 &weight_op[5], &weight_avg[5],
1950 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1951 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1952 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1953 &weight_op[5], &weight_avg[5],
1954 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1957 assert(IS_SUB_4X4(sub_mb_type));
1959 int sub_x_offset= x_offset + 2*(j&1);
1960 int sub_y_offset= y_offset + (j&2);
1961 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1962 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1963 &weight_op[6], &weight_avg[6],
1964 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1970 prefetch_motion(h, 1);
1973 static void decode_init_vlc(void){
1974 static int done = 0;
1980 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1981 &chroma_dc_coeff_token_len [0], 1, 1,
1982 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1985 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1986 &coeff_token_len [i][0], 1, 1,
1987 &coeff_token_bits[i][0], 1, 1, 1);
1991 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1992 &chroma_dc_total_zeros_len [i][0], 1, 1,
1993 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1995 for(i=0; i<15; i++){
1996 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1997 &total_zeros_len [i][0], 1, 1,
1998 &total_zeros_bits[i][0], 1, 1, 1);
2002 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2003 &run_len [i][0], 1, 1,
2004 &run_bits[i][0], 1, 1, 1);
2006 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2007 &run_len [6][0], 1, 1,
2008 &run_bits[6][0], 1, 1, 1);
2012 static void free_tables(H264Context *h){
2015 av_freep(&h->intra4x4_pred_mode);
2016 av_freep(&h->chroma_pred_mode_table);
2017 av_freep(&h->cbp_table);
2018 av_freep(&h->mvd_table[0]);
2019 av_freep(&h->mvd_table[1]);
2020 av_freep(&h->direct_table);
2021 av_freep(&h->non_zero_count);
2022 av_freep(&h->slice_table_base);
2023 h->slice_table= NULL;
2025 av_freep(&h->mb2b_xy);
2026 av_freep(&h->mb2b8_xy);
2028 for(i = 0; i < MAX_SPS_COUNT; i++)
2029 av_freep(h->sps_buffers + i);
2031 for(i = 0; i < MAX_PPS_COUNT; i++)
2032 av_freep(h->pps_buffers + i);
2034 for(i = 0; i < h->s.avctx->thread_count; i++) {
2035 hx = h->thread_context[i];
2037 av_freep(&hx->top_borders[1]);
2038 av_freep(&hx->top_borders[0]);
2039 av_freep(&hx->s.obmc_scratchpad);
2040 av_freep(&hx->s.allocated_edge_emu_buffer);
2044 static void init_dequant8_coeff_table(H264Context *h){
2046 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2047 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2048 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2050 for(i=0; i<2; i++ ){
2051 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2052 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2056 for(q=0; q<52; q++){
2057 int shift = ff_div6[q];
2058 int idx = ff_rem6[q];
2060 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2061 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2062 h->pps.scaling_matrix8[i][x]) << shift;
2067 static void init_dequant4_coeff_table(H264Context *h){
2069 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2070 for(i=0; i<6; i++ ){
2071 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2073 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2074 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2081 for(q=0; q<52; q++){
2082 int shift = ff_div6[q] + 2;
2083 int idx = ff_rem6[q];
2085 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2086 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2087 h->pps.scaling_matrix4[i][x]) << shift;
2092 static void init_dequant_tables(H264Context *h){
2094 init_dequant4_coeff_table(h);
2095 if(h->pps.transform_8x8_mode)
2096 init_dequant8_coeff_table(h);
2097 if(h->sps.transform_bypass){
2100 h->dequant4_coeff[i][0][x] = 1<<6;
2101 if(h->pps.transform_8x8_mode)
2104 h->dequant8_coeff[i][0][x] = 1<<6;
2111 * needs width/height
2113 static int alloc_tables(H264Context *h){
2114 MpegEncContext * const s = &h->s;
2115 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2118 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2120 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2121 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2124 if( h->pps.cabac ) {
2125 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2126 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2127 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
2138 const int mb_xy= x + y*s->mb_stride;
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2147 s->obmc_scratchpad = NULL;
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2159 * Mimic alloc_tables(), but for every context thread.
2161 static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2179 * Allocate buffers which are not shared amongst multiple threads.
2181 static int context_init(H264Context *h){
2182 MpegEncContext * const s = &h->s;
2184 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2185 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2187 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
2188 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
2189 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
2190 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
2193 return -1; // free_tables will clean up for us
2196 static void common_init(H264Context *h){
2197 MpegEncContext * const s = &h->s;
2199 s->width = s->avctx->width;
2200 s->height = s->avctx->height;
2201 s->codec_id= s->avctx->codec->id;
2203 ff_h264_pred_init(&h->hpc, s->codec_id);
2205 h->dequant_coeff_pps= -1;
2206 s->unrestricted_mv=1;
2207 s->decode=1; //FIXME
2209 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2210 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2213 static int decode_init(AVCodecContext *avctx){
2214 H264Context *h= avctx->priv_data;
2215 MpegEncContext * const s = &h->s;
2217 MPV_decode_defaults(s);
2222 s->out_format = FMT_H264;
2223 s->workaround_bugs= avctx->workaround_bugs;
2226 // s->decode_mb= ff_h263_decode_mb;
2227 s->quarter_sample = 1;
2229 avctx->pix_fmt= PIX_FMT_YUV420P;
2233 if(avctx->extradata_size > 0 && avctx->extradata &&
2234 *(char *)avctx->extradata == 1){
2241 h->thread_context[0] = h;
2245 static int frame_start(H264Context *h){
2246 MpegEncContext * const s = &h->s;
2249 if(MPV_frame_start(s, s->avctx) < 0)
2251 ff_er_frame_start(s);
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
2255 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2256 * See decode_nal_units().
2258 s->current_picture_ptr->key_frame= 0;
2260 assert(s->linesize && s->uvlinesize);
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2279 /* some macroblocks will be accessed before they're available */
2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2283 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2287 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2288 MpegEncContext * const s = &h->s;
2292 src_cb -= uvlinesize;
2293 src_cr -= uvlinesize;
2295 // There are two lines saved, the line above the the top macroblock of a pair,
2296 // and the line above the bottom macroblock
2297 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2298 for(i=1; i<17; i++){
2299 h->left_border[i]= src_y[15+i* linesize];
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2305 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2306 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2307 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2309 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2310 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2312 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2313 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2317 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2318 MpegEncContext * const s = &h->s;
2325 if(h->deblocking_filter == 2) {
2326 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2327 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2328 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2330 deblock_left = (s->mb_x > 0);
2331 deblock_top = (s->mb_y > 0);
2334 src_y -= linesize + 1;
2335 src_cb -= uvlinesize + 1;
2336 src_cr -= uvlinesize + 1;
2338 #define XCHG(a,b,t,xchg)\
2345 for(i = !deblock_top; i<17; i++){
2346 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2351 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2352 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2353 if(s->mb_x+1 < s->mb_width){
2354 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2358 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2360 for(i = !deblock_top; i<9; i++){
2361 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2362 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2366 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2367 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2372 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2373 MpegEncContext * const s = &h->s;
2376 src_y -= 2 * linesize;
2377 src_cb -= 2 * uvlinesize;
2378 src_cr -= 2 * uvlinesize;
2380 // There are two lines saved, the line above the the top macroblock of a pair,
2381 // and the line above the bottom macroblock
2382 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2383 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2384 for(i=2; i<34; i++){
2385 h->left_border[i]= src_y[15+i* linesize];
2388 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2389 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2390 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2391 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2393 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2394 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2395 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2396 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2397 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2398 for(i=2; i<18; i++){
2399 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2400 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2402 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2403 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2404 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2405 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2409 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2410 MpegEncContext * const s = &h->s;
2413 int deblock_left = (s->mb_x > 0);
2414 int deblock_top = (s->mb_y > 1);
2416 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2418 src_y -= 2 * linesize + 1;
2419 src_cb -= 2 * uvlinesize + 1;
2420 src_cr -= 2 * uvlinesize + 1;
2422 #define XCHG(a,b,t,xchg)\
2429 for(i = (!deblock_top)<<1; i<34; i++){
2430 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2435 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2439 if(s->mb_x+1 < s->mb_width){
2440 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2441 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2445 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2447 for(i = (!deblock_top) << 1; i<18; i++){
2448 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2449 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2453 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2454 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2455 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2456 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2461 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2462 MpegEncContext * const s = &h->s;
2463 const int mb_x= s->mb_x;
2464 const int mb_y= s->mb_y;
2465 const int mb_xy= mb_x + mb_y*s->mb_stride;
2466 const int mb_type= s->current_picture.mb_type[mb_xy];
2467 uint8_t *dest_y, *dest_cb, *dest_cr;
2468 int linesize, uvlinesize /*dct_offset*/;
2470 int *block_offset = &h->block_offset[0];
2471 const unsigned int bottom = mb_y & 1;
2472 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2473 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2474 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2476 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2477 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2478 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2480 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2481 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2483 if (!simple && MB_FIELD) {
2484 linesize = h->mb_linesize = s->linesize * 2;
2485 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2486 block_offset = &h->block_offset[24];
2487 if(mb_y&1){ //FIXME move out of this func?
2488 dest_y -= s->linesize*15;
2489 dest_cb-= s->uvlinesize*7;
2490 dest_cr-= s->uvlinesize*7;
2494 for(list=0; list<h->list_count; list++){
2495 if(!USES_LIST(mb_type, list))
2497 if(IS_16X16(mb_type)){
2498 int8_t *ref = &h->ref_cache[list][scan8[0]];
2499 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
2501 for(i=0; i<16; i+=4){
2502 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2503 int ref = h->ref_cache[list][scan8[i]];
2505 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
2511 linesize = h->mb_linesize = s->linesize;
2512 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2513 // dct_offset = s->linesize * 16;
2516 if(transform_bypass){
2518 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2519 }else if(IS_8x8DCT(mb_type)){
2520 idct_dc_add = s->dsp.h264_idct8_dc_add;
2521 idct_add = s->dsp.h264_idct8_add;
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2527 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2528 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2529 int mbt_y = mb_y&~1;
2530 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2531 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2532 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2533 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2536 if (!simple && IS_INTRA_PCM(mb_type)) {
2539 // The pixels are stored in h->mb array in the same order as levels,
2540 // copy them in output in the correct order.
2541 for(i=0; i<16; i++) {
2542 for (y=0; y<4; y++) {
2543 for (x=0; x<4; x++) {
2544 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2548 for(i=16; i<16+4; i++) {
2549 for (y=0; y<4; y++) {
2550 for (x=0; x<4; x++) {
2551 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2555 for(i=20; i<20+4; i++) {
2556 for (y=0; y<4; y++) {
2557 for (x=0; x<4; x++) {
2558 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2563 if(IS_INTRA(mb_type)){
2564 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2565 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2567 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2568 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2569 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2572 if(IS_INTRA4x4(mb_type)){
2573 if(simple || !s->encoding){
2574 if(IS_8x8DCT(mb_type)){
2575 for(i=0; i<16; i+=4){
2576 uint8_t * const ptr= dest_y + block_offset[i];
2577 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2579 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2580 (h->topright_samples_available<<i)&0x4000, linesize);
2582 if(nnz == 1 && h->mb[i*16])
2583 idct_dc_add(ptr, h->mb + i*16, linesize);
2585 idct_add(ptr, h->mb + i*16, linesize);
2589 for(i=0; i<16; i++){
2590 uint8_t * const ptr= dest_y + block_offset[i];
2592 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2595 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2596 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2597 assert(mb_y || linesize <= block_offset[i]);
2598 if(!topright_avail){
2599 tr= ptr[3 - linesize]*0x01010101;
2600 topright= (uint8_t*) &tr;
2602 topright= ptr + 4 - linesize;
2606 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2607 nnz = h->non_zero_count_cache[ scan8[i] ];
2610 if(nnz == 1 && h->mb[i*16])
2611 idct_dc_add(ptr, h->mb + i*16, linesize);
2613 idct_add(ptr, h->mb + i*16, linesize);
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2620 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2622 if(!transform_bypass)
2623 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2625 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2627 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2628 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2630 hl_motion(h, dest_y, dest_cb, dest_cr,
2631 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2632 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2633 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2637 if(!IS_INTRA4x4(mb_type)){
2639 if(IS_INTRA16x16(mb_type)){
2640 for(i=0; i<16; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2647 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2648 for(i=0; i<16; i+=di){
2649 int nnz = h->non_zero_count_cache[ scan8[i] ];
2651 if(nnz==1 && h->mb[i*16])
2652 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2654 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2659 for(i=0; i<16; i++){
2660 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2661 uint8_t * const ptr= dest_y + block_offset[i];
2662 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2668 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2669 uint8_t *dest[2] = {dest_cb, dest_cr};
2670 if(transform_bypass){
2671 idct_add = idct_dc_add = s->dsp.add_pixels4;
2673 idct_add = s->dsp.h264_idct_add;
2674 idct_dc_add = s->dsp.h264_idct_dc_add;
2675 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2676 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2679 for(i=16; i<16+8; i++){
2680 if(h->non_zero_count_cache[ scan8[i] ])
2681 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2682 else if(h->mb[i*16])
2683 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2686 for(i=16; i<16+8; i++){
2687 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2688 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2689 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2695 if(h->deblocking_filter) {
2696 if (!simple && FRAME_MBAFF) {
2697 //FIXME try deblocking one mb at a time?
2698 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2699 const int mb_y = s->mb_y - 1;
2700 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2701 const int mb_xy= mb_x + mb_y*s->mb_stride;
2702 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2703 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2704 if (!bottom) return;
2705 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2706 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2707 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2709 if(IS_INTRA(mb_type_top | mb_type_bottom))
2710 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2712 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2716 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2717 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2718 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2719 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2720 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2723 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2724 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2725 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2726 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2727 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2729 tprintf(h->s.avctx, "call filter_mb\n");
2730 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2731 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2732 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2738 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2740 static void hl_decode_mb_simple(H264Context *h){
2741 hl_decode_mb_internal(h, 1);
2745 * Process a macroblock; this handles edge cases, such as interlacing.
2747 static void av_noinline hl_decode_mb_complex(H264Context *h){
2748 hl_decode_mb_internal(h, 0);
2751 static void hl_decode_mb(H264Context *h){
2752 MpegEncContext * const s = &h->s;
2753 const int mb_x= s->mb_x;
2754 const int mb_y= s->mb_y;
2755 const int mb_xy= mb_x + mb_y*s->mb_stride;
2756 const int mb_type= s->current_picture.mb_type[mb_xy];
2757 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2763 hl_decode_mb_complex(h);
2764 else hl_decode_mb_simple(h);
2768 * fills the default_ref_list.
2770 static int fill_default_ref_list(H264Context *h){
2771 MpegEncContext * const s = &h->s;
2773 int smallest_poc_greater_than_current = -1;
2774 Picture sorted_short_ref[32];
2776 if(h->slice_type==B_TYPE){
2780 /* sort frame according to poc in B slice */
2781 for(out_i=0; out_i<h->short_ref_count; out_i++){
2783 int best_poc=INT_MAX;
2785 for(i=0; i<h->short_ref_count; i++){
2786 const int poc= h->short_ref[i]->poc;
2787 if(poc > limit && poc < best_poc){
2793 assert(best_i != INT_MIN);
2796 sorted_short_ref[out_i]= *h->short_ref[best_i];
2797 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2798 if (-1 == smallest_poc_greater_than_current) {
2799 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2800 smallest_poc_greater_than_current = out_i;
2806 if(s->picture_structure == PICT_FRAME){
2807 if(h->slice_type==B_TYPE){
2809 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2811 // find the largest poc
2812 for(list=0; list<2; list++){
2815 int step= list ? -1 : 1;
2817 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2818 while(j<0 || j>= h->short_ref_count){
2819 if(j != -99 && step == (list ? -1 : 1))
2822 j= smallest_poc_greater_than_current + (step>>1);
2824 if(sorted_short_ref[j].reference != 3) continue;
2825 h->default_ref_list[list][index ]= sorted_short_ref[j];
2826 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2829 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2830 if(h->long_ref[i] == NULL) continue;
2831 if(h->long_ref[i]->reference != 3) continue;
2833 h->default_ref_list[ list ][index ]= *h->long_ref[i];
2834 h->default_ref_list[ list ][index++].pic_id= i;;
2837 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
2838 // swap the two first elements of L1 when
2839 // L0 and L1 are identical
2840 Picture temp= h->default_ref_list[1][0];
2841 h->default_ref_list[1][0] = h->default_ref_list[1][1];
2842 h->default_ref_list[1][1] = temp;
2845 if(index < h->ref_count[ list ])
2846 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
2850 for(i=0; i<h->short_ref_count; i++){
2851 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
2852 h->default_ref_list[0][index ]= *h->short_ref[i];
2853 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2855 for(i = 0; i < 16; i++){
2856 if(h->long_ref[i] == NULL) continue;
2857 if(h->long_ref[i]->reference != 3) continue;
2858 h->default_ref_list[0][index ]= *h->long_ref[i];
2859 h->default_ref_list[0][index++].pic_id= i;;
2861 if(index < h->ref_count[0])
2862 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2865 if(h->slice_type==B_TYPE){
2867 //FIXME second field balh
2871 for (i=0; i<h->ref_count[0]; i++) {
2872 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2874 if(h->slice_type==B_TYPE){
2875 for (i=0; i<h->ref_count[1]; i++) {
2876 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
2883 static void print_short_term(H264Context *h);
2884 static void print_long_term(H264Context *h);
2886 static int decode_ref_pic_list_reordering(H264Context *h){
2887 MpegEncContext * const s = &h->s;
2890 print_short_term(h);
2892 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
2894 for(list=0; list<h->list_count; list++){
2895 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2897 if(get_bits1(&s->gb)){
2898 int pred= h->curr_pic_num;
2900 for(index=0; ; index++){
2901 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2902 unsigned int pic_id;
2904 Picture *ref = NULL;
2906 if(reordering_of_pic_nums_idc==3)
2909 if(index >= h->ref_count[list]){
2910 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2914 if(reordering_of_pic_nums_idc<3){
2915 if(reordering_of_pic_nums_idc<2){
2916 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2918 if(abs_diff_pic_num >= h->max_pic_num){
2919 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2923 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2924 else pred+= abs_diff_pic_num;
2925 pred &= h->max_pic_num - 1;
2927 for(i= h->short_ref_count-1; i>=0; i--){
2928 ref = h->short_ref[i];
2929 assert(ref->reference == 3);
2930 assert(!ref->long_ref);
2931 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
2935 ref->pic_id= ref->frame_num;
2937 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2939 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2942 ref = h->long_ref[pic_id];
2944 ref->pic_id= pic_id;
2945 assert(ref->reference == 3);
2946 assert(ref->long_ref);
2954 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2955 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2957 for(i=index; i+1<h->ref_count[list]; i++){
2958 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2961 for(; i > index; i--){
2962 h->ref_list[list][i]= h->ref_list[list][i-1];
2964 h->ref_list[list][index]= *ref;
2967 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2973 for(list=0; list<h->list_count; list++){
2974 for(index= 0; index < h->ref_count[list]; index++){
2975 if(!h->ref_list[list][index].data[0])
2976 h->ref_list[list][index]= s->current_picture;
2980 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
2981 direct_dist_scale_factor(h);
2982 direct_ref_list_init(h);
2986 static void fill_mbaff_ref_list(H264Context *h){
2988 for(list=0; list<2; list++){ //FIXME try list_count
2989 for(i=0; i<h->ref_count[list]; i++){
2990 Picture *frame = &h->ref_list[list][i];
2991 Picture *field = &h->ref_list[list][16+2*i];
2994 field[0].linesize[j] <<= 1;
2995 field[1] = field[0];
2997 field[1].data[j] += frame->linesize[j];
2999 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3000 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3002 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3003 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3007 for(j=0; j<h->ref_count[1]; j++){
3008 for(i=0; i<h->ref_count[0]; i++)
3009 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3010 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3011 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3015 static int pred_weight_table(H264Context *h){
3016 MpegEncContext * const s = &h->s;
3018 int luma_def, chroma_def;
3021 h->use_weight_chroma= 0;
3022 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3023 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3024 luma_def = 1<<h->luma_log2_weight_denom;
3025 chroma_def = 1<<h->chroma_log2_weight_denom;
3027 for(list=0; list<2; list++){
3028 for(i=0; i<h->ref_count[list]; i++){
3029 int luma_weight_flag, chroma_weight_flag;
3031 luma_weight_flag= get_bits1(&s->gb);
3032 if(luma_weight_flag){
3033 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3034 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3035 if( h->luma_weight[list][i] != luma_def
3036 || h->luma_offset[list][i] != 0)
3039 h->luma_weight[list][i]= luma_def;
3040 h->luma_offset[list][i]= 0;
3043 chroma_weight_flag= get_bits1(&s->gb);
3044 if(chroma_weight_flag){
3047 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3048 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3049 if( h->chroma_weight[list][i][j] != chroma_def
3050 || h->chroma_offset[list][i][j] != 0)
3051 h->use_weight_chroma= 1;
3056 h->chroma_weight[list][i][j]= chroma_def;
3057 h->chroma_offset[list][i][j]= 0;
3061 if(h->slice_type != B_TYPE) break;
3063 h->use_weight= h->use_weight || h->use_weight_chroma;
3067 static void implicit_weight_table(H264Context *h){
3068 MpegEncContext * const s = &h->s;
3070 int cur_poc = s->current_picture_ptr->poc;
3072 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3073 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3075 h->use_weight_chroma= 0;
3080 h->use_weight_chroma= 2;
3081 h->luma_log2_weight_denom= 5;
3082 h->chroma_log2_weight_denom= 5;
3084 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3085 int poc0 = h->ref_list[0][ref0].poc;
3086 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3087 int poc1 = h->ref_list[1][ref1].poc;
3088 int td = av_clip(poc1 - poc0, -128, 127);
3090 int tb = av_clip(cur_poc - poc0, -128, 127);
3091 int tx = (16384 + (FFABS(td) >> 1)) / td;
3092 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3093 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3094 h->implicit_weight[ref0][ref1] = 32;
3096 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3098 h->implicit_weight[ref0][ref1] = 32;
3104 * Mark a picture as no longer needed for reference. The refmask
3105 * argument allows unreferencing of individual fields or the whole frame.
3106 * If the picture becomes entirely unreferenced, but is being held for
3107 * display purposes, it is marked as such.
3108 * @param refmask mask of fields to unreference; the mask is bitwise
3109 * anded with the reference marking of pic
3110 * @return non-zero if pic becomes entirely unreferenced (except possibly
3111 * for display purposes) zero if one of the fields remains in
3114 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3116 if (pic->reference &= refmask) {
3119 if(pic == h->delayed_output_pic)
3120 pic->reference=DELAYED_PIC_REF;
3122 for(i = 0; h->delayed_pic[i]; i++)
3123 if(pic == h->delayed_pic[i]){
3124 pic->reference=DELAYED_PIC_REF;
3133 * instantaneous decoder refresh.
3135 static void idr(H264Context *h){
3138 for(i=0; i<16; i++){
3139 if (h->long_ref[i] != NULL) {
3140 unreference_pic(h, h->long_ref[i], 0);
3141 h->long_ref[i]= NULL;
3144 h->long_ref_count=0;
3146 for(i=0; i<h->short_ref_count; i++){
3147 unreference_pic(h, h->short_ref[i], 0);
3148 h->short_ref[i]= NULL;
3150 h->short_ref_count=0;
3153 /* forget old pics after a seek */
3154 static void flush_dpb(AVCodecContext *avctx){
3155 H264Context *h= avctx->priv_data;
3157 for(i=0; i<16; i++) {
3158 if(h->delayed_pic[i])
3159 h->delayed_pic[i]->reference= 0;
3160 h->delayed_pic[i]= NULL;
3162 if(h->delayed_output_pic)
3163 h->delayed_output_pic->reference= 0;
3164 h->delayed_output_pic= NULL;
3166 if(h->s.current_picture_ptr)
3167 h->s.current_picture_ptr->reference= 0;
3171 * Find a Picture in the short term reference list by frame number.
3172 * @param frame_num frame number to search for
3173 * @param idx the index into h->short_ref where returned picture is found
3174 * undefined if no picture found.
3175 * @return pointer to the found picture, or NULL if no pic with the provided
3176 * frame number is found
3178 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3179 MpegEncContext * const s = &h->s;
3182 for(i=0; i<h->short_ref_count; i++){
3183 Picture *pic= h->short_ref[i];
3184 if(s->avctx->debug&FF_DEBUG_MMCO)
3185 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3186 if(pic->frame_num == frame_num) {
3195 * Remove a picture from the short term reference list by its index in
3196 * that list. This does no checking on the provided index; it is assumed
3197 * to be valid. Other list entries are shifted down.
3198 * @param i index into h->short_ref of picture to remove.
3200 static void remove_short_at_index(H264Context *h, int i){
3201 assert(i > 0 && i < h->short_ref_count);
3202 h->short_ref[i]= NULL;
3203 if (--h->short_ref_count)
3204 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3209 * @return the removed picture or NULL if an error occurs
3211 static Picture * remove_short(H264Context *h, int frame_num){
3212 MpegEncContext * const s = &h->s;
3216 if(s->avctx->debug&FF_DEBUG_MMCO)
3217 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3219 pic = find_short(h, frame_num, &i);
3221 remove_short_at_index(h, i);
3228 * @return the removed picture or NULL if an error occurs
3230 static Picture * remove_long(H264Context *h, int i){
3233 pic= h->long_ref[i];
3234 h->long_ref[i]= NULL;
3235 if(pic) h->long_ref_count--;
3241 * print short term list
3243 static void print_short_term(H264Context *h) {
3245 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3246 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3247 for(i=0; i<h->short_ref_count; i++){
3248 Picture *pic= h->short_ref[i];
3249 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3255 * print long term list
3257 static void print_long_term(H264Context *h) {
3259 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3260 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3261 for(i = 0; i < 16; i++){
3262 Picture *pic= h->long_ref[i];
3264 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3271 * Executes the reference picture marking (memory management control operations).
3273 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3274 MpegEncContext * const s = &h->s;
3276 int current_is_long=0;
3279 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3280 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3282 for(i=0; i<mmco_count; i++){
3283 if(s->avctx->debug&FF_DEBUG_MMCO)
3284 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3286 switch(mmco[i].opcode){
3287 case MMCO_SHORT2UNUSED:
3288 pic= remove_short(h, mmco[i].short_pic_num);
3290 unreference_pic(h, pic, 0);
3291 else if(s->avctx->debug&FF_DEBUG_MMCO)
3292 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3294 case MMCO_SHORT2LONG:
3295 pic= remove_long(h, mmco[i].long_arg);
3296 if(pic) unreference_pic(h, pic, 0);
3298 h->long_ref[ mmco[i].long_arg ]= remove_short(h, mmco[i].short_pic_num);
3299 if (h->long_ref[ mmco[i].long_arg ]){
3300 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3301 h->long_ref_count++;
3304 case MMCO_LONG2UNUSED:
3305 pic= remove_long(h, mmco[i].long_arg);
3307 unreference_pic(h, pic, 0);
3308 else if(s->avctx->debug&FF_DEBUG_MMCO)
3309 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3312 pic= remove_long(h, mmco[i].long_arg);
3313 if(pic) unreference_pic(h, pic, 0);
3315 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3316 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3317 h->long_ref_count++;
3321 case MMCO_SET_MAX_LONG:
3322 assert(mmco[i].long_arg <= 16);
3323 // just remove the long term which index is greater than new max
3324 for(j = mmco[i].long_arg; j<16; j++){
3325 pic = remove_long(h, j);
3326 if (pic) unreference_pic(h, pic, 0);
3330 while(h->short_ref_count){
3331 pic= remove_short(h, h->short_ref[0]->frame_num);
3332 if(pic) unreference_pic(h, pic, 0);
3334 for(j = 0; j < 16; j++) {
3335 pic= remove_long(h, j);
3336 if(pic) unreference_pic(h, pic, 0);
3343 if(!current_is_long){
3344 pic= remove_short(h, s->current_picture_ptr->frame_num);
3346 unreference_pic(h, pic, 0);
3347 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3350 if(h->short_ref_count)
3351 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3353 h->short_ref[0]= s->current_picture_ptr;
3354 h->short_ref[0]->long_ref=0;
3355 h->short_ref_count++;
3358 print_short_term(h);
3363 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3364 MpegEncContext * const s = &h->s;
3367 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3368 s->broken_link= get_bits1(gb) -1;
3369 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3370 if(h->mmco[0].long_arg == -1)
3373 h->mmco[0].opcode= MMCO_LONG;
3377 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3378 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3379 MMCOOpcode opcode= get_ue_golomb(gb);
3381 h->mmco[i].opcode= opcode;
3382 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3383 h->mmco[i].short_pic_num= (h->frame_num - get_ue_golomb(gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
3384 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3385 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3389 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3390 unsigned int long_arg= get_ue_golomb(gb);
3391 if(/*h->mmco[i].long_arg >= h->long_ref_count || h->long_ref[ h->mmco[i].long_arg ] == NULL*/ long_arg >= 16){
3392 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3395 h->mmco[i].long_arg= long_arg;
3398 if(opcode > (unsigned)MMCO_LONG){
3399 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3402 if(opcode == MMCO_END)
3407 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3409 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
3410 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3411 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3421 static int init_poc(H264Context *h){
3422 MpegEncContext * const s = &h->s;
3423 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3426 if(h->nal_unit_type == NAL_IDR_SLICE){
3427 h->frame_num_offset= 0;
3429 if(h->frame_num < h->prev_frame_num)
3430 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3432 h->frame_num_offset= h->prev_frame_num_offset;
3435 if(h->sps.poc_type==0){
3436 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3438 if(h->nal_unit_type == NAL_IDR_SLICE){
3443 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3444 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3445 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3446 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3448 h->poc_msb = h->prev_poc_msb;
3449 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3451 field_poc[1] = h->poc_msb + h->poc_lsb;
3452 if(s->picture_structure == PICT_FRAME)
3453 field_poc[1] += h->delta_poc_bottom;
3454 }else if(h->sps.poc_type==1){
3455 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3458 if(h->sps.poc_cycle_length != 0)
3459 abs_frame_num = h->frame_num_offset + h->frame_num;
3463 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3466 expected_delta_per_poc_cycle = 0;
3467 for(i=0; i < h->sps.poc_cycle_length; i++)
3468 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3470 if(abs_frame_num > 0){
3471 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3472 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3474 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3475 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3476 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3480 if(h->nal_ref_idc == 0)
3481 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3483 field_poc[0] = expectedpoc + h->delta_poc[0];
3484 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3486 if(s->picture_structure == PICT_FRAME)
3487 field_poc[1] += h->delta_poc[1];
3490 if(h->nal_unit_type == NAL_IDR_SLICE){
3493 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3494 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3500 if(s->picture_structure != PICT_BOTTOM_FIELD)
3501 s->current_picture_ptr->field_poc[0]= field_poc[0];
3502 if(s->picture_structure != PICT_TOP_FIELD)
3503 s->current_picture_ptr->field_poc[1]= field_poc[1];
3504 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
3505 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
3512 * initialize scan tables
3514 static void init_scan_tables(H264Context *h){
3515 MpegEncContext * const s = &h->s;
3517 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3518 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3519 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3521 for(i=0; i<16; i++){
3522 #define T(x) (x>>2) | ((x<<2) & 0xF)
3523 h->zigzag_scan[i] = T(zigzag_scan[i]);
3524 h-> field_scan[i] = T( field_scan[i]);
3528 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3529 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3530 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3531 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3532 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3534 for(i=0; i<64; i++){
3535 #define T(x) (x>>3) | ((x&7)<<3)
3536 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3537 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3538 h->field_scan8x8[i] = T(field_scan8x8[i]);
3539 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3543 if(h->sps.transform_bypass){ //FIXME same ugly
3544 h->zigzag_scan_q0 = zigzag_scan;
3545 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3546 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3547 h->field_scan_q0 = field_scan;
3548 h->field_scan8x8_q0 = field_scan8x8;
3549 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3551 h->zigzag_scan_q0 = h->zigzag_scan;
3552 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3553 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3554 h->field_scan_q0 = h->field_scan;
3555 h->field_scan8x8_q0 = h->field_scan8x8;
3556 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3561 * Replicates H264 "master" context to thread contexts.
3563 static void clone_slice(H264Context *dst, H264Context *src)
3565 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3566 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3567 dst->s.current_picture = src->s.current_picture;
3568 dst->s.linesize = src->s.linesize;
3569 dst->s.uvlinesize = src->s.uvlinesize;
3571 dst->prev_poc_msb = src->prev_poc_msb;
3572 dst->prev_poc_lsb = src->prev_poc_lsb;
3573 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3574 dst->prev_frame_num = src->prev_frame_num;
3575 dst->short_ref_count = src->short_ref_count;
3577 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3578 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3579 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3580 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3582 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3583 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3587 * decodes a slice header.
3588 * this will allso call MPV_common_init() and frame_start() as needed
3590 * @param h h264context
3591 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3593 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3595 static int decode_slice_header(H264Context *h, H264Context *h0){
3596 MpegEncContext * const s = &h->s;
3597 unsigned int first_mb_in_slice;
3598 unsigned int pps_id;
3599 int num_ref_idx_active_override_flag;
3600 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3601 unsigned int slice_type, tmp, i;
3602 int default_ref_list_done = 0;
3604 s->dropable= h->nal_ref_idc == 0;
3606 first_mb_in_slice= get_ue_golomb(&s->gb);
3608 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3609 h0->current_slice = 0;
3610 s->current_picture_ptr= NULL;
3613 slice_type= get_ue_golomb(&s->gb);
3615 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3620 h->slice_type_fixed=1;
3622 h->slice_type_fixed=0;
3624 slice_type= slice_type_map[ slice_type ];
3625 if (slice_type == I_TYPE
3626 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3627 default_ref_list_done = 1;
3629 h->slice_type= slice_type;
3631 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3633 pps_id= get_ue_golomb(&s->gb);
3634 if(pps_id>=MAX_PPS_COUNT){
3635 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3638 if(!h0->pps_buffers[pps_id]) {
3639 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3642 h->pps= *h0->pps_buffers[pps_id];
3644 if(!h0->sps_buffers[h->pps.sps_id]) {
3645 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3648 h->sps = *h0->sps_buffers[h->pps.sps_id];
3650 if(h == h0 && h->dequant_coeff_pps != pps_id){
3651 h->dequant_coeff_pps = pps_id;
3652 init_dequant_tables(h);
3655 s->mb_width= h->sps.mb_width;
3656 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3658 h->b_stride= s->mb_width*4;
3659 h->b8_stride= s->mb_width*2;
3661 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3662 if(h->sps.frame_mbs_only_flag)
3663 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3665 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3667 if (s->context_initialized
3668 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3670 return -1; // width / height changed during parallelized decoding
3674 if (!s->context_initialized) {
3676 return -1; // we cant (re-)initialize context during parallel decoding
3677 if (MPV_common_init(s) < 0)
3680 init_scan_tables(h);
3683 for(i = 1; i < s->avctx->thread_count; i++) {
3685 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3686 memcpy(c, h, sizeof(MpegEncContext));
3687 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3690 init_scan_tables(c);
3694 for(i = 0; i < s->avctx->thread_count; i++)
3695 if(context_init(h->thread_context[i]) < 0)
3698 s->avctx->width = s->width;
3699 s->avctx->height = s->height;
3700 s->avctx->sample_aspect_ratio= h->sps.sar;
3701 if(!s->avctx->sample_aspect_ratio.den)
3702 s->avctx->sample_aspect_ratio.den = 1;
3704 if(h->sps.timing_info_present_flag){
3705 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3706 if(h->x264_build > 0 && h->x264_build < 44)
3707 s->avctx->time_base.den *= 2;
3708 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3709 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3713 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3716 h->mb_aff_frame = 0;
3717 if(h->sps.frame_mbs_only_flag){
3718 s->picture_structure= PICT_FRAME;
3720 if(get_bits1(&s->gb)) { //field_pic_flag
3721 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3722 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
3724 s->picture_structure= PICT_FRAME;
3725 h->mb_aff_frame = h->sps.mb_aff;
3729 if(h0->current_slice == 0){
3730 if(frame_start(h) < 0)
3736 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3738 assert(s->mb_num == s->mb_width * s->mb_height);
3739 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
3740 first_mb_in_slice >= s->mb_num){
3741 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3744 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3745 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
3746 assert(s->mb_y < s->mb_height);
3748 if(s->picture_structure==PICT_FRAME){
3749 h->curr_pic_num= h->frame_num;
3750 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3752 h->curr_pic_num= 2*h->frame_num + 1;
3753 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3756 if(h->nal_unit_type == NAL_IDR_SLICE){
3757 get_ue_golomb(&s->gb); /* idr_pic_id */
3760 if(h->sps.poc_type==0){
3761 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3763 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3764 h->delta_poc_bottom= get_se_golomb(&s->gb);
3768 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3769 h->delta_poc[0]= get_se_golomb(&s->gb);
3771 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3772 h->delta_poc[1]= get_se_golomb(&s->gb);
3777 if(h->pps.redundant_pic_cnt_present){
3778 h->redundant_pic_count= get_ue_golomb(&s->gb);
3781 //set defaults, might be overriden a few line later
3782 h->ref_count[0]= h->pps.ref_count[0];
3783 h->ref_count[1]= h->pps.ref_count[1];
3785 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
3786 if(h->slice_type == B_TYPE){
3787 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3788 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
3789 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
3791 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3793 if(num_ref_idx_active_override_flag){
3794 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3795 if(h->slice_type==B_TYPE)
3796 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3798 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3799 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3800 h->ref_count[0]= h->ref_count[1]= 1;
3804 if(h->slice_type == B_TYPE)
3811 if(!default_ref_list_done){
3812 fill_default_ref_list(h);
3815 if(decode_ref_pic_list_reordering(h) < 0)
3818 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
3819 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
3820 pred_weight_table(h);
3821 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
3822 implicit_weight_table(h);
3827 decode_ref_pic_marking(h0, &s->gb);
3830 fill_mbaff_ref_list(h);
3832 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
3833 tmp = get_ue_golomb(&s->gb);
3835 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3838 h->cabac_init_idc= tmp;
3841 h->last_qscale_diff = 0;
3842 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3844 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3848 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3849 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3850 //FIXME qscale / qp ... stuff
3851 if(h->slice_type == SP_TYPE){
3852 get_bits1(&s->gb); /* sp_for_switch_flag */
3854 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
3855 get_se_golomb(&s->gb); /* slice_qs_delta */
3858 h->deblocking_filter = 1;
3859 h->slice_alpha_c0_offset = 0;
3860 h->slice_beta_offset = 0;
3861 if( h->pps.deblocking_filter_parameters_present ) {
3862 tmp= get_ue_golomb(&s->gb);
3864 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3867 h->deblocking_filter= tmp;
3868 if(h->deblocking_filter < 2)
3869 h->deblocking_filter^= 1; // 1<->0
3871 if( h->deblocking_filter ) {
3872 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3873 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3877 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3878 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
3879 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
3880 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3881 h->deblocking_filter= 0;
3883 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3884 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3885 /* Cheat slightly for speed:
3886 Dont bother to deblock across slices */
3887 h->deblocking_filter = 2;
3889 h0->max_contexts = 1;
3890 if(!h0->single_decode_warning) {
3891 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3892 h0->single_decode_warning = 1;
3895 return 1; // deblocking switched inside frame
3900 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3901 slice_group_change_cycle= get_bits(&s->gb, ?);
3904 h0->last_slice_type = slice_type;
3905 h->slice_num = ++h0->current_slice;
3907 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3908 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
3910 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3911 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
3913 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3915 av_get_pict_type_char(h->slice_type),
3916 pps_id, h->frame_num,
3917 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3918 h->ref_count[0], h->ref_count[1],
3920 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
3922 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
3926 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3927 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3928 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3930 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3931 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3940 static inline int get_level_prefix(GetBitContext *gb){
3944 OPEN_READER(re, gb);
3945 UPDATE_CACHE(re, gb);
3946 buf=GET_CACHE(re, gb);
3948 log= 32 - av_log2(buf);
3950 print_bin(buf>>(32-log), log);
3951 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
3954 LAST_SKIP_BITS(re, gb, log);
3955 CLOSE_READER(re, gb);
3960 static inline int get_dct8x8_allowed(H264Context *h){
3963 if(!IS_SUB_8X8(h->sub_mb_type[i])
3964 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
3971 * decodes a residual block.
3972 * @param n block index
3973 * @param scantable scantable
3974 * @param max_coeff number of coefficients in the block
3975 * @return <0 if an error occured
3977 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
3978 MpegEncContext * const s = &h->s;
3979 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
3981 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
3983 //FIXME put trailing_onex into the context
3985 if(n == CHROMA_DC_BLOCK_INDEX){
3986 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
3987 total_coeff= coeff_token>>2;
3989 if(n == LUMA_DC_BLOCK_INDEX){
3990 total_coeff= pred_non_zero_count(h, 0);
3991 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3992 total_coeff= coeff_token>>2;
3994 total_coeff= pred_non_zero_count(h, n);
3995 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3996 total_coeff= coeff_token>>2;
3997 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4001 //FIXME set last_non_zero?
4005 if(total_coeff > (unsigned)max_coeff) {
4006 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4010 trailing_ones= coeff_token&3;
4011 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4012 assert(total_coeff<=16);
4014 for(i=0; i<trailing_ones; i++){
4015 level[i]= 1 - 2*get_bits1(gb);
4019 int level_code, mask;
4020 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4021 int prefix= get_level_prefix(gb);
4023 //first coefficient has suffix_length equal to 0 or 1
4024 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4026 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4028 level_code= (prefix<<suffix_length); //part
4029 }else if(prefix==14){
4031 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4033 level_code= prefix + get_bits(gb, 4); //part
4034 }else if(prefix==15){
4035 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4036 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4038 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4042 if(trailing_ones < 3) level_code += 2;
4047 mask= -(level_code&1);
4048 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4051 //remaining coefficients have suffix_length > 0
4052 for(;i<total_coeff;i++) {
4053 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4054 prefix = get_level_prefix(gb);
4056 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4057 }else if(prefix==15){
4058 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4060 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4063 mask= -(level_code&1);
4064 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4065 if(level_code > suffix_limit[suffix_length])
4070 if(total_coeff == max_coeff)
4073 if(n == CHROMA_DC_BLOCK_INDEX)
4074 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4076 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4079 coeff_num = zeros_left + total_coeff - 1;
4080 j = scantable[coeff_num];
4082 block[j] = level[0];
4083 for(i=1;i<total_coeff;i++) {
4086 else if(zeros_left < 7){
4087 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4089 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4091 zeros_left -= run_before;
4092 coeff_num -= 1 + run_before;
4093 j= scantable[ coeff_num ];
4098 block[j] = (level[0] * qmul[j] + 32)>>6;
4099 for(i=1;i<total_coeff;i++) {
4102 else if(zeros_left < 7){
4103 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4105 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4107 zeros_left -= run_before;
4108 coeff_num -= 1 + run_before;
4109 j= scantable[ coeff_num ];
4111 block[j]= (level[i] * qmul[j] + 32)>>6;
4116 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4123 static void predict_field_decoding_flag(H264Context *h){
4124 MpegEncContext * const s = &h->s;
4125 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4126 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4127 ? s->current_picture.mb_type[mb_xy-1]
4128 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4129 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4131 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4135 * decodes a P_SKIP or B_SKIP macroblock
4137 static void decode_mb_skip(H264Context *h){
4138 MpegEncContext * const s = &h->s;
4139 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4142 memset(h->non_zero_count[mb_xy], 0, 16);
4143 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4146 mb_type|= MB_TYPE_INTERLACED;
4148 if( h->slice_type == B_TYPE )
4150 // just for fill_caches. pred_direct_motion will set the real mb_type
4151 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4153 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4154 pred_direct_motion(h, &mb_type);
4155 mb_type|= MB_TYPE_SKIP;
4160 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4162 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4163 pred_pskip_motion(h, &mx, &my);
4164 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4165 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4168 write_back_motion(h, mb_type);
4169 s->current_picture.mb_type[mb_xy]= mb_type;
4170 s->current_picture.qscale_table[mb_xy]= s->qscale;
4171 h->slice_table[ mb_xy ]= h->slice_num;
4172 h->prev_mb_skipped= 1;
4176 * decodes a macroblock
4177 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4179 static int decode_mb_cavlc(H264Context *h){
4180 MpegEncContext * const s = &h->s;
4181 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4182 int partition_count;
4183 unsigned int mb_type, cbp;
4184 int dct8x8_allowed= h->pps.transform_8x8_mode;
4186 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4188 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4189 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4191 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4192 if(s->mb_skip_run==-1)
4193 s->mb_skip_run= get_ue_golomb(&s->gb);
4195 if (s->mb_skip_run--) {
4196 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4197 if(s->mb_skip_run==0)
4198 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4200 predict_field_decoding_flag(h);
4207 if( (s->mb_y&1) == 0 )
4208 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4210 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4212 h->prev_mb_skipped= 0;
4214 mb_type= get_ue_golomb(&s->gb);
4215 if(h->slice_type == B_TYPE){
4217 partition_count= b_mb_type_info[mb_type].partition_count;
4218 mb_type= b_mb_type_info[mb_type].type;
4221 goto decode_intra_mb;
4223 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4225 partition_count= p_mb_type_info[mb_type].partition_count;
4226 mb_type= p_mb_type_info[mb_type].type;
4229 goto decode_intra_mb;
4232 assert(h->slice_type == I_TYPE);
4235 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4239 cbp= i_mb_type_info[mb_type].cbp;
4240 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4241 mb_type= i_mb_type_info[mb_type].type;
4245 mb_type |= MB_TYPE_INTERLACED;
4247 h->slice_table[ mb_xy ]= h->slice_num;
4249 if(IS_INTRA_PCM(mb_type)){
4252 // We assume these blocks are very rare so we do not optimize it.
4253 align_get_bits(&s->gb);
4255 // The pixels are stored in the same order as levels in h->mb array.
4256 for(y=0; y<16; y++){
4257 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4258 for(x=0; x<16; x++){
4259 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4260 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4264 const int index= 256 + 4*(y&3) + 32*(y>>2);
4266 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4267 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4271 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4273 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4274 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4278 // In deblocking, the quantizer is 0
4279 s->current_picture.qscale_table[mb_xy]= 0;
4280 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4281 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4282 // All coeffs are present
4283 memset(h->non_zero_count[mb_xy], 16, 16);
4285 s->current_picture.mb_type[mb_xy]= mb_type;
4290 h->ref_count[0] <<= 1;
4291 h->ref_count[1] <<= 1;
4294 fill_caches(h, mb_type, 0);
4297 if(IS_INTRA(mb_type)){
4299 // init_top_left_availability(h);
4300 if(IS_INTRA4x4(mb_type)){
4303 if(dct8x8_allowed && get_bits1(&s->gb)){
4304 mb_type |= MB_TYPE_8x8DCT;
4308 // fill_intra4x4_pred_table(h);
4309 for(i=0; i<16; i+=di){
4310 int mode= pred_intra_mode(h, i);
4312 if(!get_bits1(&s->gb)){
4313 const int rem_mode= get_bits(&s->gb, 3);
4314 mode = rem_mode + (rem_mode >= mode);
4318 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4320 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4322 write_back_intra_pred_mode(h);
4323 if( check_intra4x4_pred_mode(h) < 0)
4326 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4327 if(h->intra16x16_pred_mode < 0)
4331 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4334 h->chroma_pred_mode= pred_mode;
4335 }else if(partition_count==4){
4336 int i, j, sub_partition_count[4], list, ref[2][4];
4338 if(h->slice_type == B_TYPE){
4340 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4341 if(h->sub_mb_type[i] >=13){
4342 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4345 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4346 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4348 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4349 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4350 pred_direct_motion(h, &mb_type);
4351 h->ref_cache[0][scan8[4]] =
4352 h->ref_cache[1][scan8[4]] =
4353 h->ref_cache[0][scan8[12]] =
4354 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4357 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4359 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4360 if(h->sub_mb_type[i] >=4){
4361 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4364 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4365 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4369 for(list=0; list<h->list_count; list++){
4370 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4372 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4373 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4374 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4376 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4388 dct8x8_allowed = get_dct8x8_allowed(h);
4390 for(list=0; list<h->list_count; list++){
4392 if(IS_DIRECT(h->sub_mb_type[i])) {
4393 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4396 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4397 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4399 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4400 const int sub_mb_type= h->sub_mb_type[i];
4401 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4402 for(j=0; j<sub_partition_count[i]; j++){
4404 const int index= 4*i + block_width*j;
4405 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4406 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4407 mx += get_se_golomb(&s->gb);
4408 my += get_se_golomb(&s->gb);
4409 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4411 if(IS_SUB_8X8(sub_mb_type)){
4413 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4415 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4416 }else if(IS_SUB_8X4(sub_mb_type)){
4417 mv_cache[ 1 ][0]= mx;
4418 mv_cache[ 1 ][1]= my;
4419 }else if(IS_SUB_4X8(sub_mb_type)){
4420 mv_cache[ 8 ][0]= mx;
4421 mv_cache[ 8 ][1]= my;
4423 mv_cache[ 0 ][0]= mx;
4424 mv_cache[ 0 ][1]= my;
4427 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4433 }else if(IS_DIRECT(mb_type)){
4434 pred_direct_motion(h, &mb_type);
4435 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4437 int list, mx, my, i;
4438 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4439 if(IS_16X16(mb_type)){
4440 for(list=0; list<h->list_count; list++){
4442 if(IS_DIR(mb_type, 0, list)){
4443 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4444 if(val >= h->ref_count[list]){
4445 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4449 val= LIST_NOT_USED&0xFF;
4450 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4452 for(list=0; list<h->list_count; list++){
4454 if(IS_DIR(mb_type, 0, list)){
4455 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4456 mx += get_se_golomb(&s->gb);
4457 my += get_se_golomb(&s->gb);
4458 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4460 val= pack16to32(mx,my);
4463 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4466 else if(IS_16X8(mb_type)){
4467 for(list=0; list<h->list_count; list++){
4470 if(IS_DIR(mb_type, i, list)){
4471 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4472 if(val >= h->ref_count[list]){
4473 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4477 val= LIST_NOT_USED&0xFF;
4478 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4481 for(list=0; list<h->list_count; list++){
4484 if(IS_DIR(mb_type, i, list)){
4485 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4486 mx += get_se_golomb(&s->gb);
4487 my += get_se_golomb(&s->gb);
4488 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4490 val= pack16to32(mx,my);
4493 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4497 assert(IS_8X16(mb_type));
4498 for(list=0; list<h->list_count; list++){
4501 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4502 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4503 if(val >= h->ref_count[list]){
4504 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4508 val= LIST_NOT_USED&0xFF;
4509 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4512 for(list=0; list<h->list_count; list++){
4515 if(IS_DIR(mb_type, i, list)){
4516 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4517 mx += get_se_golomb(&s->gb);
4518 my += get_se_golomb(&s->gb);
4519 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4521 val= pack16to32(mx,my);
4524 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4530 if(IS_INTER(mb_type))
4531 write_back_motion(h, mb_type);
4533 if(!IS_INTRA16x16(mb_type)){
4534 cbp= get_ue_golomb(&s->gb);
4536 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4540 if(IS_INTRA4x4(mb_type))
4541 cbp= golomb_to_intra4x4_cbp[cbp];
4543 cbp= golomb_to_inter_cbp[cbp];
4547 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4548 if(get_bits1(&s->gb))
4549 mb_type |= MB_TYPE_8x8DCT;
4551 s->current_picture.mb_type[mb_xy]= mb_type;
4553 if(cbp || IS_INTRA16x16(mb_type)){
4554 int i8x8, i4x4, chroma_idx;
4556 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4557 const uint8_t *scan, *scan8x8, *dc_scan;
4559 // fill_non_zero_count_cache(h);
4561 if(IS_INTERLACED(mb_type)){
4562 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4563 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4564 dc_scan= luma_dc_field_scan;
4566 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4567 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4568 dc_scan= luma_dc_zigzag_scan;
4571 dquant= get_se_golomb(&s->gb);
4573 if( dquant > 25 || dquant < -26 ){
4574 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4578 s->qscale += dquant;
4579 if(((unsigned)s->qscale) > 51){
4580 if(s->qscale<0) s->qscale+= 52;
4581 else s->qscale-= 52;
4584 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4585 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4586 if(IS_INTRA16x16(mb_type)){
4587 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4588 return -1; //FIXME continue if partitioned and other return -1 too
4591 assert((cbp&15) == 0 || (cbp&15) == 15);
4594 for(i8x8=0; i8x8<4; i8x8++){
4595 for(i4x4=0; i4x4<4; i4x4++){
4596 const int index= i4x4 + 4*i8x8;
4597 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4603 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4606 for(i8x8=0; i8x8<4; i8x8++){
4607 if(cbp & (1<<i8x8)){
4608 if(IS_8x8DCT(mb_type)){
4609 DCTELEM *buf = &h->mb[64*i8x8];
4611 for(i4x4=0; i4x4<4; i4x4++){
4612 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4613 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4616 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4617 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4619 for(i4x4=0; i4x4<4; i4x4++){
4620 const int index= i4x4 + 4*i8x8;
4622 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4628 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4629 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4635 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4636 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4642 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4643 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4644 for(i4x4=0; i4x4<4; i4x4++){
4645 const int index= 16 + 4*chroma_idx + i4x4;
4646 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4652 uint8_t * const nnz= &h->non_zero_count_cache[0];
4653 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4654 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4657 uint8_t * const nnz= &h->non_zero_count_cache[0];
4658 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4659 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4660 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4662 s->current_picture.qscale_table[mb_xy]= s->qscale;
4663 write_back_non_zero_count(h);
4666 h->ref_count[0] >>= 1;
4667 h->ref_count[1] >>= 1;
4673 static int decode_cabac_field_decoding_flag(H264Context *h) {
4674 MpegEncContext * const s = &h->s;
4675 const int mb_x = s->mb_x;
4676 const int mb_y = s->mb_y & ~1;
4677 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4678 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4680 unsigned int ctx = 0;
4682 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4685 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4689 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4692 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4693 uint8_t *state= &h->cabac_state[ctx_base];
4697 MpegEncContext * const s = &h->s;
4698 const int mba_xy = h->left_mb_xy[0];
4699 const int mbb_xy = h->top_mb_xy;
4701 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4703 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4705 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4706 return 0; /* I4x4 */
4709 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4710 return 0; /* I4x4 */
4713 if( get_cabac_terminate( &h->cabac ) )
4714 return 25; /* PCM */
4716 mb_type = 1; /* I16x16 */
4717 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4718 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4719 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4720 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4721 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4725 static int decode_cabac_mb_type( H264Context *h ) {
4726 MpegEncContext * const s = &h->s;
4728 if( h->slice_type == I_TYPE ) {
4729 return decode_cabac_intra_mb_type(h, 3, 1);
4730 } else if( h->slice_type == P_TYPE ) {
4731 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4733 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4734 /* P_L0_D16x16, P_8x8 */
4735 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4737 /* P_L0_D8x16, P_L0_D16x8 */
4738 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4741 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4743 } else if( h->slice_type == B_TYPE ) {
4744 const int mba_xy = h->left_mb_xy[0];
4745 const int mbb_xy = h->top_mb_xy;
4749 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4751 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4754 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4755 return 0; /* B_Direct_16x16 */
4757 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4758 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4761 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4762 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4763 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4764 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4766 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4767 else if( bits == 13 ) {
4768 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4769 } else if( bits == 14 )
4770 return 11; /* B_L1_L0_8x16 */
4771 else if( bits == 15 )
4772 return 22; /* B_8x8 */
4774 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4775 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4777 /* TODO SI/SP frames? */
4782 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4783 MpegEncContext * const s = &h->s;
4787 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4788 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4791 && h->slice_table[mba_xy] == h->slice_num
4792 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4793 mba_xy += s->mb_stride;
4795 mbb_xy = mb_xy - s->mb_stride;
4797 && h->slice_table[mbb_xy] == h->slice_num
4798 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4799 mbb_xy -= s->mb_stride;
4801 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4803 int mb_xy = mb_x + mb_y*s->mb_stride;
4805 mbb_xy = mb_xy - s->mb_stride;
4808 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4810 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4813 if( h->slice_type == B_TYPE )
4815 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4818 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4821 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4824 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4825 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4826 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4828 if( mode >= pred_mode )
4834 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4835 const int mba_xy = h->left_mb_xy[0];
4836 const int mbb_xy = h->top_mb_xy;
4840 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4841 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4844 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4847 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4850 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4852 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4858 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4859 int cbp_b, cbp_a, ctx, cbp = 0;
4861 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4862 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4864 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4865 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4866 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4867 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4868 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4869 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4870 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4871 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4874 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4878 cbp_a = (h->left_cbp>>4)&0x03;
4879 cbp_b = (h-> top_cbp>>4)&0x03;
4882 if( cbp_a > 0 ) ctx++;
4883 if( cbp_b > 0 ) ctx += 2;
4884 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4888 if( cbp_a == 2 ) ctx++;
4889 if( cbp_b == 2 ) ctx += 2;
4890 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4892 static int decode_cabac_mb_dqp( H264Context *h) {
4896 if( h->last_qscale_diff != 0 )
4899 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4905 if(val > 102) //prevent infinite loop
4912 return -(val + 1)/2;
4914 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4915 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4917 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4919 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4923 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4925 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4926 return 0; /* B_Direct_8x8 */
4927 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4928 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4930 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4931 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4932 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4935 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4936 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4940 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4941 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4944 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4945 int refa = h->ref_cache[list][scan8[n] - 1];
4946 int refb = h->ref_cache[list][scan8[n] - 8];
4950 if( h->slice_type == B_TYPE) {
4951 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
4953 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
4962 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
4968 if(ref >= 32 /*h->ref_list[list]*/){
4969 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
4970 return 0; //FIXME we should return -1 and check the return everywhere
4976 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
4977 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
4978 abs( h->mvd_cache[list][scan8[n] - 8][l] );
4979 int ctxbase = (l == 0) ? 40 : 47;
4984 else if( amvd > 32 )
4989 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
4994 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5002 while( get_cabac_bypass( &h->cabac ) ) {
5006 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5011 if( get_cabac_bypass( &h->cabac ) )
5015 return get_cabac_bypass_sign( &h->cabac, -mvd );
5018 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5023 nza = h->left_cbp&0x100;
5024 nzb = h-> top_cbp&0x100;
5025 } else if( cat == 1 || cat == 2 ) {
5026 nza = h->non_zero_count_cache[scan8[idx] - 1];
5027 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5028 } else if( cat == 3 ) {
5029 nza = (h->left_cbp>>(6+idx))&0x01;
5030 nzb = (h-> top_cbp>>(6+idx))&0x01;
5033 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5034 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5043 return ctx + 4 * cat;
5046 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5047 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5048 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5049 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5050 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5053 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5054 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5055 static const int significant_coeff_flag_offset[2][6] = {
5056 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5057 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5059 static const int last_coeff_flag_offset[2][6] = {
5060 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5061 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5063 static const int coeff_abs_level_m1_offset[6] = {
5064 227+0, 227+10, 227+20, 227+30, 227+39, 426
5066 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5067 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5068 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5069 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5070 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5071 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5072 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5073 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5074 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5080 int coeff_count = 0;
5083 int abslevelgt1 = 0;
5085 uint8_t *significant_coeff_ctx_base;
5086 uint8_t *last_coeff_ctx_base;
5087 uint8_t *abs_level_m1_ctx_base;
5090 #define CABAC_ON_STACK
5092 #ifdef CABAC_ON_STACK
5095 cc.range = h->cabac.range;
5096 cc.low = h->cabac.low;
5097 cc.bytestream= h->cabac.bytestream;
5099 #define CC &h->cabac
5103 /* cat: 0-> DC 16x16 n = 0
5104 * 1-> AC 16x16 n = luma4x4idx
5105 * 2-> Luma4x4 n = luma4x4idx
5106 * 3-> DC Chroma n = iCbCr
5107 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5108 * 5-> Luma8x8 n = 4 * luma8x8idx
5111 /* read coded block flag */
5113 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5114 if( cat == 1 || cat == 2 )
5115 h->non_zero_count_cache[scan8[n]] = 0;
5117 h->non_zero_count_cache[scan8[16+n]] = 0;
5118 #ifdef CABAC_ON_STACK
5119 h->cabac.range = cc.range ;
5120 h->cabac.low = cc.low ;
5121 h->cabac.bytestream= cc.bytestream;
5127 significant_coeff_ctx_base = h->cabac_state
5128 + significant_coeff_flag_offset[MB_FIELD][cat];
5129 last_coeff_ctx_base = h->cabac_state
5130 + last_coeff_flag_offset[MB_FIELD][cat];
5131 abs_level_m1_ctx_base = h->cabac_state
5132 + coeff_abs_level_m1_offset[cat];
5135 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5136 for(last= 0; last < coefs; last++) { \
5137 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5138 if( get_cabac( CC, sig_ctx )) { \
5139 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5140 index[coeff_count++] = last; \
5141 if( get_cabac( CC, last_ctx ) ) { \
5147 if( last == max_coeff -1 ) {\
5148 index[coeff_count++] = last;\
5150 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5151 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5152 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5154 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5156 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5158 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5161 assert(coeff_count > 0);
5164 h->cbp_table[mb_xy] |= 0x100;
5165 else if( cat == 1 || cat == 2 )
5166 h->non_zero_count_cache[scan8[n]] = coeff_count;
5168 h->cbp_table[mb_xy] |= 0x40 << n;
5170 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5173 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5176 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5177 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5178 int j= scantable[index[coeff_count]];
5180 if( get_cabac( CC, ctx ) == 0 ) {
5182 block[j] = get_cabac_bypass_sign( CC, -1);
5184 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5190 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5191 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5195 if( coeff_abs >= 15 ) {
5197 while( get_cabac_bypass( CC ) ) {
5203 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5209 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5210 else block[j] = coeff_abs;
5212 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5213 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5219 #ifdef CABAC_ON_STACK
5220 h->cabac.range = cc.range ;
5221 h->cabac.low = cc.low ;
5222 h->cabac.bytestream= cc.bytestream;
5227 static inline void compute_mb_neighbors(H264Context *h)
5229 MpegEncContext * const s = &h->s;
5230 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5231 h->top_mb_xy = mb_xy - s->mb_stride;
5232 h->left_mb_xy[0] = mb_xy - 1;
5234 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5235 const int top_pair_xy = pair_xy - s->mb_stride;
5236 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5237 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5238 const int curr_mb_frame_flag = !MB_FIELD;
5239 const int bottom = (s->mb_y & 1);
5241 ? !curr_mb_frame_flag // bottom macroblock
5242 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5244 h->top_mb_xy -= s->mb_stride;
5246 if (left_mb_frame_flag != curr_mb_frame_flag) {
5247 h->left_mb_xy[0] = pair_xy - 1;
5254 * decodes a macroblock
5255 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5257 static int decode_mb_cabac(H264Context *h) {
5258 MpegEncContext * const s = &h->s;
5259 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5260 int mb_type, partition_count, cbp = 0;
5261 int dct8x8_allowed= h->pps.transform_8x8_mode;
5263 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5265 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5266 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5268 /* a skipped mb needs the aff flag from the following mb */
5269 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5270 predict_field_decoding_flag(h);
5271 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5272 skip = h->next_mb_skipped;
5274 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5275 /* read skip flags */
5277 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5278 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5279 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5280 if(h->next_mb_skipped)
5281 predict_field_decoding_flag(h);
5283 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5288 h->cbp_table[mb_xy] = 0;
5289 h->chroma_pred_mode_table[mb_xy] = 0;
5290 h->last_qscale_diff = 0;
5297 if( (s->mb_y&1) == 0 )
5299 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5301 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5303 h->prev_mb_skipped = 0;
5305 compute_mb_neighbors(h);
5306 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5307 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5311 if( h->slice_type == B_TYPE ) {
5313 partition_count= b_mb_type_info[mb_type].partition_count;
5314 mb_type= b_mb_type_info[mb_type].type;
5317 goto decode_intra_mb;
5319 } else if( h->slice_type == P_TYPE ) {
5321 partition_count= p_mb_type_info[mb_type].partition_count;
5322 mb_type= p_mb_type_info[mb_type].type;
5325 goto decode_intra_mb;
5328 assert(h->slice_type == I_TYPE);
5330 partition_count = 0;
5331 cbp= i_mb_type_info[mb_type].cbp;
5332 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5333 mb_type= i_mb_type_info[mb_type].type;
5336 mb_type |= MB_TYPE_INTERLACED;
5338 h->slice_table[ mb_xy ]= h->slice_num;
5340 if(IS_INTRA_PCM(mb_type)) {
5344 // We assume these blocks are very rare so we do not optimize it.
5345 // FIXME The two following lines get the bitstream position in the cabac
5346 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5347 ptr= h->cabac.bytestream;
5348 if(h->cabac.low&0x1) ptr--;
5350 if(h->cabac.low&0x1FF) ptr--;
5353 // The pixels are stored in the same order as levels in h->mb array.
5354 for(y=0; y<16; y++){
5355 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5356 for(x=0; x<16; x++){
5357 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5358 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5362 const int index= 256 + 4*(y&3) + 32*(y>>2);
5364 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5365 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5369 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5371 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5372 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5376 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5378 // All blocks are present
5379 h->cbp_table[mb_xy] = 0x1ef;
5380 h->chroma_pred_mode_table[mb_xy] = 0;
5381 // In deblocking, the quantizer is 0
5382 s->current_picture.qscale_table[mb_xy]= 0;
5383 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5384 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5385 // All coeffs are present
5386 memset(h->non_zero_count[mb_xy], 16, 16);
5387 s->current_picture.mb_type[mb_xy]= mb_type;
5392 h->ref_count[0] <<= 1;
5393 h->ref_count[1] <<= 1;
5396 fill_caches(h, mb_type, 0);
5398 if( IS_INTRA( mb_type ) ) {
5400 if( IS_INTRA4x4( mb_type ) ) {
5401 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5402 mb_type |= MB_TYPE_8x8DCT;
5403 for( i = 0; i < 16; i+=4 ) {
5404 int pred = pred_intra_mode( h, i );
5405 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5406 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5409 for( i = 0; i < 16; i++ ) {
5410 int pred = pred_intra_mode( h, i );
5411 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5413 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5416 write_back_intra_pred_mode(h);
5417 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5419 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5420 if( h->intra16x16_pred_mode < 0 ) return -1;
5422 h->chroma_pred_mode_table[mb_xy] =
5423 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5425 pred_mode= check_intra_pred_mode( h, pred_mode );
5426 if( pred_mode < 0 ) return -1;
5427 h->chroma_pred_mode= pred_mode;
5428 } else if( partition_count == 4 ) {
5429 int i, j, sub_partition_count[4], list, ref[2][4];
5431 if( h->slice_type == B_TYPE ) {
5432 for( i = 0; i < 4; i++ ) {
5433 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5434 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5435 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5437 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5438 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5439 pred_direct_motion(h, &mb_type);
5440 h->ref_cache[0][scan8[4]] =
5441 h->ref_cache[1][scan8[4]] =
5442 h->ref_cache[0][scan8[12]] =
5443 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5444 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5445 for( i = 0; i < 4; i++ )
5446 if( IS_DIRECT(h->sub_mb_type[i]) )
5447 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5451 for( i = 0; i < 4; i++ ) {
5452 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5453 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5454 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5458 for( list = 0; list < h->list_count; list++ ) {
5459 for( i = 0; i < 4; i++ ) {
5460 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5461 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5462 if( h->ref_count[list] > 1 )
5463 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5469 h->ref_cache[list][ scan8[4*i]+1 ]=
5470 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5475 dct8x8_allowed = get_dct8x8_allowed(h);
5477 for(list=0; list<h->list_count; list++){
5479 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5480 if(IS_DIRECT(h->sub_mb_type[i])){
5481 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5485 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5486 const int sub_mb_type= h->sub_mb_type[i];
5487 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5488 for(j=0; j<sub_partition_count[i]; j++){
5491 const int index= 4*i + block_width*j;
5492 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5493 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5494 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5496 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5497 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5498 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5500 if(IS_SUB_8X8(sub_mb_type)){
5502 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5504 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5507 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5509 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5510 }else if(IS_SUB_8X4(sub_mb_type)){
5511 mv_cache[ 1 ][0]= mx;
5512 mv_cache[ 1 ][1]= my;
5514 mvd_cache[ 1 ][0]= mx - mpx;
5515 mvd_cache[ 1 ][1]= my - mpy;
5516 }else if(IS_SUB_4X8(sub_mb_type)){
5517 mv_cache[ 8 ][0]= mx;
5518 mv_cache[ 8 ][1]= my;
5520 mvd_cache[ 8 ][0]= mx - mpx;
5521 mvd_cache[ 8 ][1]= my - mpy;
5523 mv_cache[ 0 ][0]= mx;
5524 mv_cache[ 0 ][1]= my;
5526 mvd_cache[ 0 ][0]= mx - mpx;
5527 mvd_cache[ 0 ][1]= my - mpy;
5530 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5531 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5532 p[0] = p[1] = p[8] = p[9] = 0;
5533 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5537 } else if( IS_DIRECT(mb_type) ) {
5538 pred_direct_motion(h, &mb_type);
5539 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5540 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5541 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5543 int list, mx, my, i, mpx, mpy;
5544 if(IS_16X16(mb_type)){
5545 for(list=0; list<h->list_count; list++){
5546 if(IS_DIR(mb_type, 0, list)){
5547 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5548 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5550 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5552 for(list=0; list<h->list_count; list++){
5553 if(IS_DIR(mb_type, 0, list)){
5554 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5556 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5557 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5558 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5560 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5561 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5563 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5566 else if(IS_16X8(mb_type)){
5567 for(list=0; list<h->list_count; list++){
5569 if(IS_DIR(mb_type, i, list)){
5570 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5571 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5573 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5576 for(list=0; list<h->list_count; list++){
5578 if(IS_DIR(mb_type, i, list)){
5579 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5580 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5581 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5582 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5584 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5585 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5587 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5588 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5593 assert(IS_8X16(mb_type));
5594 for(list=0; list<h->list_count; list++){
5596 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5597 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5598 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5600 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5603 for(list=0; list<h->list_count; list++){
5605 if(IS_DIR(mb_type, i, list)){
5606 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5607 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5608 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5610 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5611 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5612 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5614 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5615 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5622 if( IS_INTER( mb_type ) ) {
5623 h->chroma_pred_mode_table[mb_xy] = 0;
5624 write_back_motion( h, mb_type );
5627 if( !IS_INTRA16x16( mb_type ) ) {
5628 cbp = decode_cabac_mb_cbp_luma( h );
5629 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5632 h->cbp_table[mb_xy] = h->cbp = cbp;
5634 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5635 if( decode_cabac_mb_transform_size( h ) )
5636 mb_type |= MB_TYPE_8x8DCT;
5638 s->current_picture.mb_type[mb_xy]= mb_type;
5640 if( cbp || IS_INTRA16x16( mb_type ) ) {
5641 const uint8_t *scan, *scan8x8, *dc_scan;
5642 const uint32_t *qmul;
5645 if(IS_INTERLACED(mb_type)){
5646 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5647 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5648 dc_scan= luma_dc_field_scan;
5650 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5651 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5652 dc_scan= luma_dc_zigzag_scan;
5655 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5656 if( dqp == INT_MIN ){
5657 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5661 if(((unsigned)s->qscale) > 51){
5662 if(s->qscale<0) s->qscale+= 52;
5663 else s->qscale-= 52;
5665 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5666 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5668 if( IS_INTRA16x16( mb_type ) ) {
5670 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5671 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5674 qmul = h->dequant4_coeff[0][s->qscale];
5675 for( i = 0; i < 16; i++ ) {
5676 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5677 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5680 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5684 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5685 if( cbp & (1<<i8x8) ) {
5686 if( IS_8x8DCT(mb_type) ) {
5687 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5688 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5690 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5691 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5692 const int index = 4*i8x8 + i4x4;
5693 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5695 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5696 //STOP_TIMER("decode_residual")
5700 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5701 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5708 for( c = 0; c < 2; c++ ) {
5709 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5710 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5716 for( c = 0; c < 2; c++ ) {
5717 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5718 for( i = 0; i < 4; i++ ) {
5719 const int index = 16 + 4 * c + i;
5720 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5721 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5725 uint8_t * const nnz= &h->non_zero_count_cache[0];
5726 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5727 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5730 uint8_t * const nnz= &h->non_zero_count_cache[0];
5731 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5732 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5733 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5734 h->last_qscale_diff = 0;
5737 s->current_picture.qscale_table[mb_xy]= s->qscale;
5738 write_back_non_zero_count(h);
5741 h->ref_count[0] >>= 1;
5742 h->ref_count[1] >>= 1;
5749 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5751 const int index_a = qp + h->slice_alpha_c0_offset;
5752 const int alpha = (alpha_table+52)[index_a];
5753 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5758 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5759 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5761 /* 16px edge length, because bS=4 is triggered by being at
5762 * the edge of an intra MB, so all 4 bS are the same */
5763 for( d = 0; d < 16; d++ ) {
5764 const int p0 = pix[-1];
5765 const int p1 = pix[-2];
5766 const int p2 = pix[-3];
5768 const int q0 = pix[0];
5769 const int q1 = pix[1];
5770 const int q2 = pix[2];
5772 if( FFABS( p0 - q0 ) < alpha &&
5773 FFABS( p1 - p0 ) < beta &&
5774 FFABS( q1 - q0 ) < beta ) {
5776 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5777 if( FFABS( p2 - p0 ) < beta)
5779 const int p3 = pix[-4];
5781 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5782 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5783 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5786 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5788 if( FFABS( q2 - q0 ) < beta)
5790 const int q3 = pix[3];
5792 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5793 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5794 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5797 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5801 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5802 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5804 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5810 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5812 const int index_a = qp + h->slice_alpha_c0_offset;
5813 const int alpha = (alpha_table+52)[index_a];
5814 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5819 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5820 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5822 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5826 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5828 for( i = 0; i < 16; i++, pix += stride) {
5834 int bS_index = (i >> 1);
5837 bS_index |= (i & 1);
5840 if( bS[bS_index] == 0 ) {
5844 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5845 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5846 alpha = (alpha_table+52)[index_a];
5847 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5849 if( bS[bS_index] < 4 ) {
5850 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5851 const int p0 = pix[-1];
5852 const int p1 = pix[-2];
5853 const int p2 = pix[-3];
5854 const int q0 = pix[0];
5855 const int q1 = pix[1];
5856 const int q2 = pix[2];
5858 if( FFABS( p0 - q0 ) < alpha &&
5859 FFABS( p1 - p0 ) < beta &&
5860 FFABS( q1 - q0 ) < beta ) {
5864 if( FFABS( p2 - p0 ) < beta ) {
5865 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5868 if( FFABS( q2 - q0 ) < beta ) {
5869 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5873 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5874 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5875 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5876 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5879 const int p0 = pix[-1];
5880 const int p1 = pix[-2];
5881 const int p2 = pix[-3];
5883 const int q0 = pix[0];
5884 const int q1 = pix[1];
5885 const int q2 = pix[2];
5887 if( FFABS( p0 - q0 ) < alpha &&
5888 FFABS( p1 - p0 ) < beta &&
5889 FFABS( q1 - q0 ) < beta ) {
5891 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5892 if( FFABS( p2 - p0 ) < beta)
5894 const int p3 = pix[-4];
5896 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5897 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5898 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5901 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5903 if( FFABS( q2 - q0 ) < beta)
5905 const int q3 = pix[3];
5907 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5908 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5909 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5912 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5916 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5917 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5919 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5924 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5926 for( i = 0; i < 8; i++, pix += stride) {
5934 if( bS[bS_index] == 0 ) {
5938 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5939 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5940 alpha = (alpha_table+52)[index_a];
5941 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5943 if( bS[bS_index] < 4 ) {
5944 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
5945 const int p0 = pix[-1];
5946 const int p1 = pix[-2];
5947 const int q0 = pix[0];
5948 const int q1 = pix[1];
5950 if( FFABS( p0 - q0 ) < alpha &&
5951 FFABS( p1 - p0 ) < beta &&
5952 FFABS( q1 - q0 ) < beta ) {
5953 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5955 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5956 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5957 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5960 const int p0 = pix[-1];
5961 const int p1 = pix[-2];
5962 const int q0 = pix[0];
5963 const int q1 = pix[1];
5965 if( FFABS( p0 - q0 ) < alpha &&
5966 FFABS( p1 - p0 ) < beta &&
5967 FFABS( q1 - q0 ) < beta ) {
5969 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
5970 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
5971 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5977 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5979 const int index_a = qp + h->slice_alpha_c0_offset;
5980 const int alpha = (alpha_table+52)[index_a];
5981 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5982 const int pix_next = stride;
5987 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5988 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
5990 /* 16px edge length, see filter_mb_edgev */
5991 for( d = 0; d < 16; d++ ) {
5992 const int p0 = pix[-1*pix_next];
5993 const int p1 = pix[-2*pix_next];
5994 const int p2 = pix[-3*pix_next];
5995 const int q0 = pix[0];
5996 const int q1 = pix[1*pix_next];
5997 const int q2 = pix[2*pix_next];
5999 if( FFABS( p0 - q0 ) < alpha &&
6000 FFABS( p1 - p0 ) < beta &&
6001 FFABS( q1 - q0 ) < beta ) {
6003 const int p3 = pix[-4*pix_next];
6004 const int q3 = pix[ 3*pix_next];
6006 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6007 if( FFABS( p2 - p0 ) < beta) {
6009 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6010 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6011 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6014 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6016 if( FFABS( q2 - q0 ) < beta) {
6018 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6019 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6020 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6023 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6027 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6028 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6030 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6037 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6039 const int index_a = qp + h->slice_alpha_c0_offset;
6040 const int alpha = (alpha_table+52)[index_a];
6041 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6046 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6047 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6049 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6053 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6054 MpegEncContext * const s = &h->s;
6056 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6058 mb_xy = mb_x + mb_y*s->mb_stride;
6060 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6061 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6062 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6063 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6066 assert(!FRAME_MBAFF);
6068 mb_type = s->current_picture.mb_type[mb_xy];
6069 qp = s->current_picture.qscale_table[mb_xy];
6070 qp0 = s->current_picture.qscale_table[mb_xy-1];
6071 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6072 qpc = get_chroma_qp( h, 0, qp );
6073 qpc0 = get_chroma_qp( h, 0, qp0 );
6074 qpc1 = get_chroma_qp( h, 0, qp1 );
6075 qp0 = (qp + qp0 + 1) >> 1;
6076 qp1 = (qp + qp1 + 1) >> 1;
6077 qpc0 = (qpc + qpc0 + 1) >> 1;
6078 qpc1 = (qpc + qpc1 + 1) >> 1;
6079 qp_thresh = 15 - h->slice_alpha_c0_offset;
6080 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6081 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6084 if( IS_INTRA(mb_type) ) {
6085 int16_t bS4[4] = {4,4,4,4};
6086 int16_t bS3[4] = {3,3,3,3};
6087 if( IS_8x8DCT(mb_type) ) {
6088 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6089 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6090 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6091 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6093 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6094 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6095 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6096 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6097 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6098 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6099 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6100 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6102 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6103 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6104 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6105 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6106 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6107 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6108 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6109 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6112 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6113 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6115 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6117 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6119 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6120 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6121 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6122 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6124 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6125 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6126 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6127 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6129 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6130 bSv[0][0] = 0x0004000400040004ULL;
6131 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6132 bSv[1][0] = 0x0004000400040004ULL;
6134 #define FILTER(hv,dir,edge)\
6135 if(bSv[dir][edge]) {\
6136 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6138 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6139 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6145 } else if( IS_8x8DCT(mb_type) ) {
6164 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6165 MpegEncContext * const s = &h->s;
6166 const int mb_xy= mb_x + mb_y*s->mb_stride;
6167 const int mb_type = s->current_picture.mb_type[mb_xy];
6168 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6169 int first_vertical_edge_done = 0;
6171 /* FIXME: A given frame may occupy more than one position in
6172 * the reference list. So ref2frm should be populated with
6173 * frame numbers, not indices. */
6174 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6175 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6177 //for sufficiently low qp, filtering wouldn't do anything
6178 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6180 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6181 int qp = s->current_picture.qscale_table[mb_xy];
6183 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6184 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6190 // left mb is in picture
6191 && h->slice_table[mb_xy-1] != 255
6192 // and current and left pair do not have the same interlaced type
6193 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6194 // and left mb is in the same slice if deblocking_filter == 2
6195 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6196 /* First vertical edge is different in MBAFF frames
6197 * There are 8 different bS to compute and 2 different Qp
6199 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6200 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6205 int mb_qp, mbn0_qp, mbn1_qp;
6207 first_vertical_edge_done = 1;
6209 if( IS_INTRA(mb_type) )
6210 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6212 for( i = 0; i < 8; i++ ) {
6213 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6215 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6217 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6218 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6219 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6226 mb_qp = s->current_picture.qscale_table[mb_xy];
6227 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6228 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6229 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6230 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6231 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6232 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6233 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6234 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6235 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6236 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6237 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6238 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6241 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6242 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6243 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6244 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6245 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6247 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6248 for( dir = 0; dir < 2; dir++ )
6251 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6252 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6253 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6255 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6256 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6257 // how often to recheck mv-based bS when iterating between edges
6258 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6259 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6260 // how often to recheck mv-based bS when iterating along each edge
6261 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6263 if (first_vertical_edge_done) {
6265 first_vertical_edge_done = 0;
6268 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6271 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6272 && !IS_INTERLACED(mb_type)
6273 && IS_INTERLACED(mbm_type)
6275 // This is a special case in the norm where the filtering must
6276 // be done twice (one each of the field) even if we are in a
6277 // frame macroblock.
6279 static const int nnz_idx[4] = {4,5,6,3};
6280 unsigned int tmp_linesize = 2 * linesize;
6281 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6282 int mbn_xy = mb_xy - 2 * s->mb_stride;
6287 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6288 if( IS_INTRA(mb_type) ||
6289 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6290 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6292 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6293 for( i = 0; i < 4; i++ ) {
6294 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6295 mbn_nnz[nnz_idx[i]] != 0 )
6301 // Do not use s->qscale as luma quantizer because it has not the same
6302 // value in IPCM macroblocks.
6303 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6304 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6305 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6306 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6307 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6308 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6309 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6310 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6317 for( edge = start; edge < edges; edge++ ) {
6318 /* mbn_xy: neighbor macroblock */
6319 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6320 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6324 if( (edge&1) && IS_8x8DCT(mb_type) )
6327 if( IS_INTRA(mb_type) ||
6328 IS_INTRA(mbn_type) ) {
6331 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6332 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6341 bS[0] = bS[1] = bS[2] = bS[3] = value;
6346 if( edge & mask_edge ) {
6347 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6350 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6351 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6354 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6355 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6356 int bn_idx= b_idx - (dir ? 8:1);
6358 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6359 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6360 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6361 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6363 bS[0] = bS[1] = bS[2] = bS[3] = v;
6369 for( i = 0; i < 4; i++ ) {
6370 int x = dir == 0 ? edge : i;
6371 int y = dir == 0 ? i : edge;
6372 int b_idx= 8 + 4 + x + 8*y;
6373 int bn_idx= b_idx - (dir ? 8:1);
6375 if( h->non_zero_count_cache[b_idx] != 0 ||
6376 h->non_zero_count_cache[bn_idx] != 0 ) {
6382 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6383 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6384 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6385 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6393 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6398 // Do not use s->qscale as luma quantizer because it has not the same
6399 // value in IPCM macroblocks.
6400 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6401 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6402 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6403 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6405 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6406 if( (edge&1) == 0 ) {
6407 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6408 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6409 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6410 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6413 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6414 if( (edge&1) == 0 ) {
6415 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6416 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6417 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6418 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6425 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6426 MpegEncContext * const s = &h->s;
6427 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6431 if( h->pps.cabac ) {
6435 align_get_bits( &s->gb );
6438 ff_init_cabac_states( &h->cabac);
6439 ff_init_cabac_decoder( &h->cabac,
6440 s->gb.buffer + get_bits_count(&s->gb)/8,
6441 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6442 /* calculate pre-state */
6443 for( i= 0; i < 460; i++ ) {
6445 if( h->slice_type == I_TYPE )
6446 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6448 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6451 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6453 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6458 int ret = decode_mb_cabac(h);
6460 //STOP_TIMER("decode_mb_cabac")
6462 if(ret>=0) hl_decode_mb(h);
6464 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6467 if(ret>=0) ret = decode_mb_cabac(h);
6469 if(ret>=0) hl_decode_mb(h);
6472 eos = get_cabac_terminate( &h->cabac );
6474 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6475 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6476 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6480 if( ++s->mb_x >= s->mb_width ) {
6482 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6489 if( eos || s->mb_y >= s->mb_height ) {
6490 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6491 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6498 int ret = decode_mb_cavlc(h);
6500 if(ret>=0) hl_decode_mb(h);
6502 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6504 ret = decode_mb_cavlc(h);
6506 if(ret>=0) hl_decode_mb(h);
6511 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6512 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6517 if(++s->mb_x >= s->mb_width){
6519 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6524 if(s->mb_y >= s->mb_height){
6525 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6527 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6528 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6532 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6539 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6540 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6541 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6542 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6546 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6555 for(;s->mb_y < s->mb_height; s->mb_y++){
6556 for(;s->mb_x < s->mb_width; s->mb_x++){
6557 int ret= decode_mb(h);
6562 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6563 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6568 if(++s->mb_x >= s->mb_width){
6570 if(++s->mb_y >= s->mb_height){
6571 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6572 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6576 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6583 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6584 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6585 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6589 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6596 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6599 return -1; //not reached
6602 static int decode_unregistered_user_data(H264Context *h, int size){
6603 MpegEncContext * const s = &h->s;
6604 uint8_t user_data[16+256];
6610 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6611 user_data[i]= get_bits(&s->gb, 8);
6615 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6616 if(e==1 && build>=0)
6617 h->x264_build= build;
6619 if(s->avctx->debug & FF_DEBUG_BUGS)
6620 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6623 skip_bits(&s->gb, 8);
6628 static int decode_sei(H264Context *h){
6629 MpegEncContext * const s = &h->s;
6631 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6636 type+= show_bits(&s->gb, 8);
6637 }while(get_bits(&s->gb, 8) == 255);
6641 size+= show_bits(&s->gb, 8);
6642 }while(get_bits(&s->gb, 8) == 255);
6646 if(decode_unregistered_user_data(h, size) < 0)
6650 skip_bits(&s->gb, 8*size);
6653 //FIXME check bits here
6654 align_get_bits(&s->gb);
6660 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6661 MpegEncContext * const s = &h->s;
6663 cpb_count = get_ue_golomb(&s->gb) + 1;
6664 get_bits(&s->gb, 4); /* bit_rate_scale */
6665 get_bits(&s->gb, 4); /* cpb_size_scale */
6666 for(i=0; i<cpb_count; i++){
6667 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6668 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6669 get_bits1(&s->gb); /* cbr_flag */
6671 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6672 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6673 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6674 get_bits(&s->gb, 5); /* time_offset_length */
6677 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6678 MpegEncContext * const s = &h->s;
6679 int aspect_ratio_info_present_flag;
6680 unsigned int aspect_ratio_idc;
6681 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6683 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6685 if( aspect_ratio_info_present_flag ) {
6686 aspect_ratio_idc= get_bits(&s->gb, 8);
6687 if( aspect_ratio_idc == EXTENDED_SAR ) {
6688 sps->sar.num= get_bits(&s->gb, 16);
6689 sps->sar.den= get_bits(&s->gb, 16);
6690 }else if(aspect_ratio_idc < 14){
6691 sps->sar= pixel_aspect[aspect_ratio_idc];
6693 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6700 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6702 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6703 get_bits1(&s->gb); /* overscan_appropriate_flag */
6706 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6707 get_bits(&s->gb, 3); /* video_format */
6708 get_bits1(&s->gb); /* video_full_range_flag */
6709 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6710 get_bits(&s->gb, 8); /* colour_primaries */
6711 get_bits(&s->gb, 8); /* transfer_characteristics */
6712 get_bits(&s->gb, 8); /* matrix_coefficients */
6716 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6717 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6718 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6721 sps->timing_info_present_flag = get_bits1(&s->gb);
6722 if(sps->timing_info_present_flag){
6723 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6724 sps->time_scale = get_bits_long(&s->gb, 32);
6725 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6728 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6729 if(nal_hrd_parameters_present_flag)
6730 decode_hrd_parameters(h, sps);
6731 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6732 if(vcl_hrd_parameters_present_flag)
6733 decode_hrd_parameters(h, sps);
6734 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6735 get_bits1(&s->gb); /* low_delay_hrd_flag */
6736 get_bits1(&s->gb); /* pic_struct_present_flag */
6738 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6739 if(sps->bitstream_restriction_flag){
6740 unsigned int num_reorder_frames;
6741 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6742 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6743 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6744 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6745 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6746 num_reorder_frames= get_ue_golomb(&s->gb);
6747 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6749 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6750 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6754 sps->num_reorder_frames= num_reorder_frames;
6760 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6761 const uint8_t *jvt_list, const uint8_t *fallback_list){
6762 MpegEncContext * const s = &h->s;
6763 int i, last = 8, next = 8;
6764 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6765 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6766 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6768 for(i=0;i<size;i++){
6770 next = (last + get_se_golomb(&s->gb)) & 0xff;
6771 if(!i && !next){ /* matrix not written, we use the preset one */
6772 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6775 last = factors[scan[i]] = next ? next : last;
6779 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6780 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6781 MpegEncContext * const s = &h->s;
6782 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6783 const uint8_t *fallback[4] = {
6784 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6785 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6786 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6787 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6789 if(get_bits1(&s->gb)){
6790 sps->scaling_matrix_present |= is_sps;
6791 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6792 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6793 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6794 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6795 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6796 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6797 if(is_sps || pps->transform_8x8_mode){
6798 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6799 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6801 } else if(fallback_sps) {
6802 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
6803 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
6808 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
6811 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
6812 const size_t size, const char *name)
6815 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
6820 vec[id] = av_mallocz(size);
6822 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
6827 static inline int decode_seq_parameter_set(H264Context *h){
6828 MpegEncContext * const s = &h->s;
6829 int profile_idc, level_idc;
6830 unsigned int sps_id, tmp, mb_width, mb_height;
6834 profile_idc= get_bits(&s->gb, 8);
6835 get_bits1(&s->gb); //constraint_set0_flag
6836 get_bits1(&s->gb); //constraint_set1_flag
6837 get_bits1(&s->gb); //constraint_set2_flag
6838 get_bits1(&s->gb); //constraint_set3_flag
6839 get_bits(&s->gb, 4); // reserved
6840 level_idc= get_bits(&s->gb, 8);
6841 sps_id= get_ue_golomb(&s->gb);
6843 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
6847 sps->profile_idc= profile_idc;
6848 sps->level_idc= level_idc;
6850 if(sps->profile_idc >= 100){ //high profile
6851 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
6852 get_bits1(&s->gb); //residual_color_transform_flag
6853 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6854 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6855 sps->transform_bypass = get_bits1(&s->gb);
6856 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6858 sps->scaling_matrix_present = 0;
6860 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6861 sps->poc_type= get_ue_golomb(&s->gb);
6863 if(sps->poc_type == 0){ //FIXME #define
6864 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6865 } else if(sps->poc_type == 1){//FIXME #define
6866 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6867 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6868 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6869 tmp= get_ue_golomb(&s->gb);
6871 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
6872 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
6875 sps->poc_cycle_length= tmp;
6877 for(i=0; i<sps->poc_cycle_length; i++)
6878 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6879 }else if(sps->poc_type != 2){
6880 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6884 tmp= get_ue_golomb(&s->gb);
6885 if(tmp > MAX_PICTURE_COUNT-2){
6886 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
6888 sps->ref_frame_count= tmp;
6889 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
6890 mb_width= get_ue_golomb(&s->gb) + 1;
6891 mb_height= get_ue_golomb(&s->gb) + 1;
6892 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
6893 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
6894 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
6897 sps->mb_width = mb_width;
6898 sps->mb_height= mb_height;
6900 sps->frame_mbs_only_flag= get_bits1(&s->gb);
6901 if(!sps->frame_mbs_only_flag)
6902 sps->mb_aff= get_bits1(&s->gb);
6906 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
6908 #ifndef ALLOW_INTERLACE
6910 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
6912 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
6913 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
6915 sps->crop= get_bits1(&s->gb);
6917 sps->crop_left = get_ue_golomb(&s->gb);
6918 sps->crop_right = get_ue_golomb(&s->gb);
6919 sps->crop_top = get_ue_golomb(&s->gb);
6920 sps->crop_bottom= get_ue_golomb(&s->gb);
6921 if(sps->crop_left || sps->crop_top){
6922 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
6928 sps->crop_bottom= 0;
6931 sps->vui_parameters_present_flag= get_bits1(&s->gb);
6932 if( sps->vui_parameters_present_flag )
6933 decode_vui_parameters(h, sps);
6935 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
6936 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
6937 sps_id, sps->profile_idc, sps->level_idc,
6939 sps->ref_frame_count,
6940 sps->mb_width, sps->mb_height,
6941 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
6942 sps->direct_8x8_inference_flag ? "8B8" : "",
6943 sps->crop_left, sps->crop_right,
6944 sps->crop_top, sps->crop_bottom,
6945 sps->vui_parameters_present_flag ? "VUI" : ""
6952 build_qp_table(PPS *pps, int t, int index)
6955 for(i = 0; i < 255; i++)
6956 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
6959 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
6960 MpegEncContext * const s = &h->s;
6961 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
6964 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
6968 tmp= get_ue_golomb(&s->gb);
6969 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
6970 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
6975 pps->cabac= get_bits1(&s->gb);
6976 pps->pic_order_present= get_bits1(&s->gb);
6977 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
6978 if(pps->slice_group_count > 1 ){
6979 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
6980 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
6981 switch(pps->mb_slice_group_map_type){
6984 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
6985 | run_length[ i ] |1 |ue(v) |
6990 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
6992 | top_left_mb[ i ] |1 |ue(v) |
6993 | bottom_right_mb[ i ] |1 |ue(v) |
7001 | slice_group_change_direction_flag |1 |u(1) |
7002 | slice_group_change_rate_minus1 |1 |ue(v) |
7007 | slice_group_id_cnt_minus1 |1 |ue(v) |
7008 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7010 | slice_group_id[ i ] |1 |u(v) |
7015 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7016 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7017 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7018 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7019 pps->ref_count[0]= pps->ref_count[1]= 1;
7023 pps->weighted_pred= get_bits1(&s->gb);
7024 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7025 pps->init_qp= get_se_golomb(&s->gb) + 26;
7026 pps->init_qs= get_se_golomb(&s->gb) + 26;
7027 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7028 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7029 pps->constrained_intra_pred= get_bits1(&s->gb);
7030 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7032 pps->transform_8x8_mode= 0;
7033 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7034 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7035 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7037 if(get_bits_count(&s->gb) < bit_length){
7038 pps->transform_8x8_mode= get_bits1(&s->gb);
7039 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7040 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7042 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7045 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7046 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7047 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7048 h->pps.chroma_qp_diff= 1;
7050 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7052 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7053 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7054 pps_id, pps->sps_id,
7055 pps->cabac ? "CABAC" : "CAVLC",
7056 pps->slice_group_count,
7057 pps->ref_count[0], pps->ref_count[1],
7058 pps->weighted_pred ? "weighted" : "",
7059 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7060 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7061 pps->constrained_intra_pred ? "CONSTR" : "",
7062 pps->redundant_pic_cnt_present ? "REDU" : "",
7063 pps->transform_8x8_mode ? "8x8DCT" : ""
7071 * Call decode_slice() for each context.
7073 * @param h h264 master context
7074 * @param context_count number of contexts to execute
7076 static void execute_decode_slices(H264Context *h, int context_count){
7077 MpegEncContext * const s = &h->s;
7078 AVCodecContext * const avctx= s->avctx;
7082 if(context_count == 1) {
7083 decode_slice(avctx, h);
7085 for(i = 1; i < context_count; i++) {
7086 hx = h->thread_context[i];
7087 hx->s.error_resilience = avctx->error_resilience;
7088 hx->s.error_count = 0;
7091 avctx->execute(avctx, (void *)decode_slice,
7092 (void **)h->thread_context, NULL, context_count);
7094 /* pull back stuff from slices to master context */
7095 hx = h->thread_context[context_count - 1];
7096 s->mb_x = hx->s.mb_x;
7097 s->mb_y = hx->s.mb_y;
7098 for(i = 1; i < context_count; i++)
7099 h->s.error_count += h->thread_context[i]->s.error_count;
7104 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7105 MpegEncContext * const s = &h->s;
7106 AVCodecContext * const avctx= s->avctx;
7108 H264Context *hx; ///< thread context
7109 int context_count = 0;
7111 h->max_contexts = avctx->thread_count;
7114 for(i=0; i<50; i++){
7115 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7118 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7119 h->current_slice = 0;
7120 s->current_picture_ptr= NULL;
7132 if(buf_index >= buf_size) break;
7134 for(i = 0; i < h->nal_length_size; i++)
7135 nalsize = (nalsize << 8) | buf[buf_index++];
7136 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7141 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7146 // start code prefix search
7147 for(; buf_index + 3 < buf_size; buf_index++){
7148 // This should always succeed in the first iteration.
7149 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7153 if(buf_index+3 >= buf_size) break;
7158 hx = h->thread_context[context_count];
7160 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7161 if (ptr==NULL || dst_length < 0){
7164 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7166 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7168 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7169 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7172 if (h->is_avc && (nalsize != consumed))
7173 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7175 buf_index += consumed;
7177 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7178 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7183 switch(hx->nal_unit_type){
7185 if (h->nal_unit_type != NAL_IDR_SLICE) {
7186 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7189 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7191 init_get_bits(&hx->s.gb, ptr, bit_length);
7193 hx->inter_gb_ptr= &hx->s.gb;
7194 hx->s.data_partitioning = 0;
7196 if((err = decode_slice_header(hx, h)))
7199 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7200 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7201 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7202 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7203 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7204 && avctx->skip_frame < AVDISCARD_ALL)
7208 init_get_bits(&hx->s.gb, ptr, bit_length);
7210 hx->inter_gb_ptr= NULL;
7211 hx->s.data_partitioning = 1;
7213 err = decode_slice_header(hx, h);
7216 init_get_bits(&hx->intra_gb, ptr, bit_length);
7217 hx->intra_gb_ptr= &hx->intra_gb;
7220 init_get_bits(&hx->inter_gb, ptr, bit_length);
7221 hx->inter_gb_ptr= &hx->inter_gb;
7223 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7224 && s->context_initialized
7226 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7227 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7228 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7229 && avctx->skip_frame < AVDISCARD_ALL)
7233 init_get_bits(&s->gb, ptr, bit_length);
7237 init_get_bits(&s->gb, ptr, bit_length);
7238 decode_seq_parameter_set(h);
7240 if(s->flags& CODEC_FLAG_LOW_DELAY)
7243 if(avctx->has_b_frames < 2)
7244 avctx->has_b_frames= !s->low_delay;
7247 init_get_bits(&s->gb, ptr, bit_length);
7249 decode_picture_parameter_set(h, bit_length);
7253 case NAL_END_SEQUENCE:
7254 case NAL_END_STREAM:
7255 case NAL_FILLER_DATA:
7257 case NAL_AUXILIARY_SLICE:
7260 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7263 if(context_count == h->max_contexts) {
7264 execute_decode_slices(h, context_count);
7269 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7271 /* Slice could not be decoded in parallel mode, copy down
7272 * NAL unit stuff to context 0 and restart. Note that
7273 * rbsp_buffer is not transfered, but since we no longer
7274 * run in parallel mode this should not be an issue. */
7275 h->nal_unit_type = hx->nal_unit_type;
7276 h->nal_ref_idc = hx->nal_ref_idc;
7282 execute_decode_slices(h, context_count);
7287 * returns the number of bytes consumed for building the current frame
7289 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7290 if(s->flags&CODEC_FLAG_TRUNCATED){
7291 pos -= s->parse_context.last_index;
7292 if(pos<0) pos=0; // FIXME remove (unneeded?)
7296 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7297 if(pos+10>buf_size) pos=buf_size; // oops ;)
7303 static int decode_frame(AVCodecContext *avctx,
7304 void *data, int *data_size,
7305 uint8_t *buf, int buf_size)
7307 H264Context *h = avctx->priv_data;
7308 MpegEncContext *s = &h->s;
7309 AVFrame *pict = data;
7312 s->flags= avctx->flags;
7313 s->flags2= avctx->flags2;
7315 /* no supplementary picture */
7316 if (buf_size == 0) {
7320 //FIXME factorize this with the output code below
7321 out = h->delayed_pic[0];
7323 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7324 if(h->delayed_pic[i]->poc < out->poc){
7325 out = h->delayed_pic[i];
7329 for(i=out_idx; h->delayed_pic[i]; i++)
7330 h->delayed_pic[i] = h->delayed_pic[i+1];
7333 *data_size = sizeof(AVFrame);
7334 *pict= *(AVFrame*)out;
7340 if(s->flags&CODEC_FLAG_TRUNCATED){
7341 int next= ff_h264_find_frame_end(h, buf, buf_size);
7343 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7345 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7348 if(h->is_avc && !h->got_avcC) {
7349 int i, cnt, nalsize;
7350 unsigned char *p = avctx->extradata;
7351 if(avctx->extradata_size < 7) {
7352 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7356 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7359 /* sps and pps in the avcC always have length coded with 2 bytes,
7360 so put a fake nal_length_size = 2 while parsing them */
7361 h->nal_length_size = 2;
7362 // Decode sps from avcC
7363 cnt = *(p+5) & 0x1f; // Number of sps
7365 for (i = 0; i < cnt; i++) {
7366 nalsize = AV_RB16(p) + 2;
7367 if(decode_nal_units(h, p, nalsize) < 0) {
7368 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7373 // Decode pps from avcC
7374 cnt = *(p++); // Number of pps
7375 for (i = 0; i < cnt; i++) {
7376 nalsize = AV_RB16(p) + 2;
7377 if(decode_nal_units(h, p, nalsize) != nalsize) {
7378 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7383 // Now store right nal length size, that will be use to parse all other nals
7384 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7385 // Do not reparse avcC
7389 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7390 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7394 buf_index=decode_nal_units(h, buf, buf_size);
7398 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7399 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7400 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7404 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7405 Picture *out = s->current_picture_ptr;
7406 Picture *cur = s->current_picture_ptr;
7407 Picture *prev = h->delayed_output_pic;
7408 int i, pics, cross_idr, out_of_order, out_idx;
7412 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7413 s->current_picture_ptr->pict_type= s->pict_type;
7415 h->prev_frame_num_offset= h->frame_num_offset;
7416 h->prev_frame_num= h->frame_num;
7417 if(s->current_picture_ptr->reference & s->picture_structure){
7418 h->prev_poc_msb= h->poc_msb;
7419 h->prev_poc_lsb= h->poc_lsb;
7420 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7427 //FIXME do something with unavailable reference frames
7429 #if 0 //decode order
7430 *data_size = sizeof(AVFrame);
7432 /* Sort B-frames into display order */
7434 if(h->sps.bitstream_restriction_flag
7435 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7436 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7441 while(h->delayed_pic[pics]) pics++;
7443 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7445 h->delayed_pic[pics++] = cur;
7446 if(cur->reference == 0)
7447 cur->reference = DELAYED_PIC_REF;
7450 for(i=0; h->delayed_pic[i]; i++)
7451 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7454 out = h->delayed_pic[0];
7456 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7457 if(h->delayed_pic[i]->poc < out->poc){
7458 out = h->delayed_pic[i];
7462 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7463 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7465 else if(prev && pics <= s->avctx->has_b_frames)
7467 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7469 ((!cross_idr && prev && out->poc > prev->poc + 2)
7470 || cur->pict_type == B_TYPE)))
7473 s->avctx->has_b_frames++;
7476 else if(out_of_order)
7479 if(out_of_order || pics > s->avctx->has_b_frames){
7480 for(i=out_idx; h->delayed_pic[i]; i++)
7481 h->delayed_pic[i] = h->delayed_pic[i+1];
7487 *data_size = sizeof(AVFrame);
7488 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7489 prev->reference = 0;
7490 h->delayed_output_pic = out;
7494 *pict= *(AVFrame*)out;
7496 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7499 assert(pict->data[0] || !*data_size);
7500 ff_print_debug_info(s, pict);
7501 //printf("out %d\n", (int)pict->data[0]);
7504 /* Return the Picture timestamp as the frame number */
7505 /* we substract 1 because it is added on utils.c */
7506 avctx->frame_number = s->picture_number - 1;
7508 return get_consumed_bytes(s, buf_index, buf_size);
7511 static inline void fill_mb_avail(H264Context *h){
7512 MpegEncContext * const s = &h->s;
7513 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7516 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7517 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7518 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7524 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7525 h->mb_avail[4]= 1; //FIXME move out
7526 h->mb_avail[5]= 0; //FIXME move out
7533 #define SIZE (COUNT*40)
7539 // int int_temp[10000];
7541 AVCodecContext avctx;
7543 dsputil_init(&dsp, &avctx);
7545 init_put_bits(&pb, temp, SIZE);
7546 printf("testing unsigned exp golomb\n");
7547 for(i=0; i<COUNT; i++){
7549 set_ue_golomb(&pb, i);
7550 STOP_TIMER("set_ue_golomb");
7552 flush_put_bits(&pb);
7554 init_get_bits(&gb, temp, 8*SIZE);
7555 for(i=0; i<COUNT; i++){
7558 s= show_bits(&gb, 24);
7561 j= get_ue_golomb(&gb);
7563 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7566 STOP_TIMER("get_ue_golomb");
7570 init_put_bits(&pb, temp, SIZE);
7571 printf("testing signed exp golomb\n");
7572 for(i=0; i<COUNT; i++){
7574 set_se_golomb(&pb, i - COUNT/2);
7575 STOP_TIMER("set_se_golomb");
7577 flush_put_bits(&pb);
7579 init_get_bits(&gb, temp, 8*SIZE);
7580 for(i=0; i<COUNT; i++){
7583 s= show_bits(&gb, 24);
7586 j= get_se_golomb(&gb);
7587 if(j != i - COUNT/2){
7588 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7591 STOP_TIMER("get_se_golomb");
7594 printf("testing 4x4 (I)DCT\n");
7597 uint8_t src[16], ref[16];
7598 uint64_t error= 0, max_error=0;
7600 for(i=0; i<COUNT; i++){
7602 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7603 for(j=0; j<16; j++){
7604 ref[j]= random()%255;
7605 src[j]= random()%255;
7608 h264_diff_dct_c(block, src, ref, 4);
7611 for(j=0; j<16; j++){
7612 // printf("%d ", block[j]);
7613 block[j]= block[j]*4;
7614 if(j&1) block[j]= (block[j]*4 + 2)/5;
7615 if(j&4) block[j]= (block[j]*4 + 2)/5;
7619 s->dsp.h264_idct_add(ref, block, 4);
7620 /* for(j=0; j<16; j++){
7621 printf("%d ", ref[j]);
7625 for(j=0; j<16; j++){
7626 int diff= FFABS(src[j] - ref[j]);
7629 max_error= FFMAX(max_error, diff);
7632 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7634 printf("testing quantizer\n");
7635 for(qp=0; qp<52; qp++){
7637 src1_block[i]= src2_block[i]= random()%255;
7641 printf("Testing NAL layer\n");
7643 uint8_t bitstream[COUNT];
7644 uint8_t nal[COUNT*2];
7646 memset(&h, 0, sizeof(H264Context));
7648 for(i=0; i<COUNT; i++){
7656 for(j=0; j<COUNT; j++){
7657 bitstream[j]= (random() % 255) + 1;
7660 for(j=0; j<zeros; j++){
7661 int pos= random() % COUNT;
7662 while(bitstream[pos] == 0){
7671 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7673 printf("encoding failed\n");
7677 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7681 if(out_length != COUNT){
7682 printf("incorrect length %d %d\n", out_length, COUNT);
7686 if(consumed != nal_length){
7687 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7691 if(memcmp(bitstream, out, COUNT)){
7692 printf("mismatch\n");
7697 printf("Testing RBSP\n");
7705 static int decode_end(AVCodecContext *avctx)
7707 H264Context *h = avctx->priv_data;
7708 MpegEncContext *s = &h->s;
7710 av_freep(&h->rbsp_buffer[0]);
7711 av_freep(&h->rbsp_buffer[1]);
7712 free_tables(h); //FIXME cleanup init stuff perhaps
7715 // memset(h, 0, sizeof(H264Context));
7721 AVCodec h264_decoder = {
7725 sizeof(H264Context),
7730 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,