2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
42 static VLC coeff_token_vlc[4];
43 static VLC chroma_dc_coeff_token_vlc;
45 static VLC total_zeros_vlc[15];
46 static VLC chroma_dc_total_zeros_vlc[3];
48 static VLC run_vlc[6];
51 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
52 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
53 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
54 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
56 static av_always_inline uint32_t pack16to32(int a, int b){
57 #ifdef WORDS_BIGENDIAN
58 return (b&0xFFFF) + (a<<16);
60 return (a&0xFFFF) + (b<<16);
64 const uint8_t ff_rem6[52]={
65 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
68 const uint8_t ff_div6[52]={
69 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
75 * @param h height of the rectangle, should be a constant
76 * @param w width of the rectangle, should be a constant
77 * @param size the size of val (1 or 4), should be a constant
79 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
80 uint8_t *p= (uint8_t*)vp;
81 assert(size==1 || size==4);
87 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
88 assert((stride&(w-1))==0);
90 const uint16_t v= size==4 ? val : val*0x0101;
91 *(uint16_t*)(p + 0*stride)= v;
93 *(uint16_t*)(p + 1*stride)= v;
95 *(uint16_t*)(p + 2*stride)=
96 *(uint16_t*)(p + 3*stride)= v;
98 const uint32_t v= size==4 ? val : val*0x01010101;
99 *(uint32_t*)(p + 0*stride)= v;
101 *(uint32_t*)(p + 1*stride)= v;
103 *(uint32_t*)(p + 2*stride)=
104 *(uint32_t*)(p + 3*stride)= v;
106 //gcc can't optimize 64bit math on x86_32
107 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
108 const uint64_t v= val*0x0100000001ULL;
109 *(uint64_t*)(p + 0*stride)= v;
111 *(uint64_t*)(p + 1*stride)= v;
113 *(uint64_t*)(p + 2*stride)=
114 *(uint64_t*)(p + 3*stride)= v;
116 const uint64_t v= val*0x0100000001ULL;
117 *(uint64_t*)(p + 0+0*stride)=
118 *(uint64_t*)(p + 8+0*stride)=
119 *(uint64_t*)(p + 0+1*stride)=
120 *(uint64_t*)(p + 8+1*stride)= v;
122 *(uint64_t*)(p + 0+2*stride)=
123 *(uint64_t*)(p + 8+2*stride)=
124 *(uint64_t*)(p + 0+3*stride)=
125 *(uint64_t*)(p + 8+3*stride)= v;
127 *(uint32_t*)(p + 0+0*stride)=
128 *(uint32_t*)(p + 4+0*stride)= val;
130 *(uint32_t*)(p + 0+1*stride)=
131 *(uint32_t*)(p + 4+1*stride)= val;
133 *(uint32_t*)(p + 0+2*stride)=
134 *(uint32_t*)(p + 4+2*stride)=
135 *(uint32_t*)(p + 0+3*stride)=
136 *(uint32_t*)(p + 4+3*stride)= val;
138 *(uint32_t*)(p + 0+0*stride)=
139 *(uint32_t*)(p + 4+0*stride)=
140 *(uint32_t*)(p + 8+0*stride)=
141 *(uint32_t*)(p +12+0*stride)=
142 *(uint32_t*)(p + 0+1*stride)=
143 *(uint32_t*)(p + 4+1*stride)=
144 *(uint32_t*)(p + 8+1*stride)=
145 *(uint32_t*)(p +12+1*stride)= val;
147 *(uint32_t*)(p + 0+2*stride)=
148 *(uint32_t*)(p + 4+2*stride)=
149 *(uint32_t*)(p + 8+2*stride)=
150 *(uint32_t*)(p +12+2*stride)=
151 *(uint32_t*)(p + 0+3*stride)=
152 *(uint32_t*)(p + 4+3*stride)=
153 *(uint32_t*)(p + 8+3*stride)=
154 *(uint32_t*)(p +12+3*stride)= val;
161 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
162 MpegEncContext * const s = &h->s;
163 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
164 int topleft_xy, top_xy, topright_xy, left_xy[2];
165 int topleft_type, top_type, topright_type, left_type[2];
169 //FIXME deblocking could skip the intra and nnz parts.
170 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
173 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
175 top_xy = mb_xy - s->mb_stride;
176 topleft_xy = top_xy - 1;
177 topright_xy= top_xy + 1;
178 left_xy[1] = left_xy[0] = mb_xy-1;
188 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
189 const int top_pair_xy = pair_xy - s->mb_stride;
190 const int topleft_pair_xy = top_pair_xy - 1;
191 const int topright_pair_xy = top_pair_xy + 1;
192 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
193 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
194 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
195 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
196 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
197 const int bottom = (s->mb_y & 1);
198 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
200 ? !curr_mb_frame_flag // bottom macroblock
201 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
203 top_xy -= s->mb_stride;
206 ? !curr_mb_frame_flag // bottom macroblock
207 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
209 topleft_xy -= s->mb_stride;
212 ? !curr_mb_frame_flag // bottom macroblock
213 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
215 topright_xy -= s->mb_stride;
217 if (left_mb_frame_flag != curr_mb_frame_flag) {
218 left_xy[1] = left_xy[0] = pair_xy - 1;
219 if (curr_mb_frame_flag) {
240 left_xy[1] += s->mb_stride;
253 h->top_mb_xy = top_xy;
254 h->left_mb_xy[0] = left_xy[0];
255 h->left_mb_xy[1] = left_xy[1];
259 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
260 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
261 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
263 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
265 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
267 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
268 for(list=0; list<h->list_count; list++){
269 if(USES_LIST(mb_type,list)){
270 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
271 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
272 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
273 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
279 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
280 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
282 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
283 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
285 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
286 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
291 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
292 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
293 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
294 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
295 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
298 if(IS_INTRA(mb_type)){
299 h->topleft_samples_available=
300 h->top_samples_available=
301 h->left_samples_available= 0xFFFF;
302 h->topright_samples_available= 0xEEEA;
304 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
305 h->topleft_samples_available= 0xB3FF;
306 h->top_samples_available= 0x33FF;
307 h->topright_samples_available= 0x26EA;
310 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
311 h->topleft_samples_available&= 0xDF5F;
312 h->left_samples_available&= 0x5F5F;
316 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
317 h->topleft_samples_available&= 0x7FFF;
319 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
320 h->topright_samples_available&= 0xFBFF;
322 if(IS_INTRA4x4(mb_type)){
323 if(IS_INTRA4x4(top_type)){
324 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
325 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
326 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
327 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
330 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
335 h->intra4x4_pred_mode_cache[4+8*0]=
336 h->intra4x4_pred_mode_cache[5+8*0]=
337 h->intra4x4_pred_mode_cache[6+8*0]=
338 h->intra4x4_pred_mode_cache[7+8*0]= pred;
341 if(IS_INTRA4x4(left_type[i])){
342 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
343 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
346 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
351 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
352 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
367 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
369 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
370 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
371 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
372 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
374 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
375 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
377 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
378 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
381 h->non_zero_count_cache[4+8*0]=
382 h->non_zero_count_cache[5+8*0]=
383 h->non_zero_count_cache[6+8*0]=
384 h->non_zero_count_cache[7+8*0]=
386 h->non_zero_count_cache[1+8*0]=
387 h->non_zero_count_cache[2+8*0]=
389 h->non_zero_count_cache[1+8*3]=
390 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
394 for (i=0; i<2; i++) {
396 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
397 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
398 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
399 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
401 h->non_zero_count_cache[3+8*1 + 2*8*i]=
402 h->non_zero_count_cache[3+8*2 + 2*8*i]=
403 h->non_zero_count_cache[0+8*1 + 8*i]=
404 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
411 h->top_cbp = h->cbp_table[top_xy];
412 } else if(IS_INTRA(mb_type)) {
419 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
420 } else if(IS_INTRA(mb_type)) {
426 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
429 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
434 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
436 for(list=0; list<h->list_count; list++){
437 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
438 /*if(!h->mv_cache_clean[list]){
439 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
440 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
441 h->mv_cache_clean[list]= 1;
445 h->mv_cache_clean[list]= 0;
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
450 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
451 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
452 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
453 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
454 h->ref_cache[list][scan8[0] + 0 - 1*8]=
455 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
456 h->ref_cache[list][scan8[0] + 2 - 1*8]=
457 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
459 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
460 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
461 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
462 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
463 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
467 int cache_idx = scan8[0] - 1 + i*2*8;
468 if(USES_LIST(left_type[i], list)){
469 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
470 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
471 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
472 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
473 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
474 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
476 *(uint32_t*)h->mv_cache [list][cache_idx ]=
477 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
478 h->ref_cache[list][cache_idx ]=
479 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
483 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
486 if(USES_LIST(topleft_type, list)){
487 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
488 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
489 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
490 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
492 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
493 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
496 if(USES_LIST(topright_type, list)){
497 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
498 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
499 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
500 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
502 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
503 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
506 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
509 h->ref_cache[list][scan8[5 ]+1] =
510 h->ref_cache[list][scan8[7 ]+1] =
511 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
512 h->ref_cache[list][scan8[4 ]] =
513 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
514 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
515 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
516 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
517 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
518 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
521 /* XXX beurk, Load mvd */
522 if(USES_LIST(top_type, list)){
523 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
524 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
525 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
526 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
527 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
529 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
530 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
531 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
532 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
534 if(USES_LIST(left_type[0], list)){
535 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
536 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
537 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
539 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
540 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
542 if(USES_LIST(left_type[1], list)){
543 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
544 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
545 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
547 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
548 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
550 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
551 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
552 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
553 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
554 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
556 if(h->slice_type == B_TYPE){
557 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
559 if(IS_DIRECT(top_type)){
560 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
561 }else if(IS_8X8(top_type)){
562 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
563 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
564 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
566 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
569 if(IS_DIRECT(left_type[0]))
570 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
571 else if(IS_8X8(left_type[0]))
572 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
574 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
576 if(IS_DIRECT(left_type[1]))
577 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
578 else if(IS_8X8(left_type[1]))
579 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
581 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
587 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
588 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
589 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
590 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
591 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
592 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
593 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
594 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
595 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
596 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
598 #define MAP_F2F(idx, mb_type)\
599 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
600 h->ref_cache[list][idx] <<= 1;\
601 h->mv_cache[list][idx][1] /= 2;\
602 h->mvd_cache[list][idx][1] /= 2;\
607 #define MAP_F2F(idx, mb_type)\
608 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
609 h->ref_cache[list][idx] >>= 1;\
610 h->mv_cache[list][idx][1] <<= 1;\
611 h->mvd_cache[list][idx][1] <<= 1;\
621 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
624 static inline void write_back_intra_pred_mode(H264Context *h){
625 MpegEncContext * const s = &h->s;
626 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
628 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
629 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
630 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
631 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
632 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
633 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
634 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
638 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
640 static inline int check_intra4x4_pred_mode(H264Context *h){
641 MpegEncContext * const s = &h->s;
642 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
643 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
646 if(!(h->top_samples_available&0x8000)){
648 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
650 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
653 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
658 if(!(h->left_samples_available&0x8000)){
660 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
662 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
665 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
671 } //FIXME cleanup like next
674 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
676 static inline int check_intra_pred_mode(H264Context *h, int mode){
677 MpegEncContext * const s = &h->s;
678 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
679 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
682 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
686 if(!(h->top_samples_available&0x8000)){
689 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
694 if(!(h->left_samples_available&0x8000)){
697 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
706 * gets the predicted intra4x4 prediction mode.
708 static inline int pred_intra_mode(H264Context *h, int n){
709 const int index8= scan8[n];
710 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
711 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
712 const int min= FFMIN(left, top);
714 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
716 if(min<0) return DC_PRED;
720 static inline void write_back_non_zero_count(H264Context *h){
721 MpegEncContext * const s = &h->s;
722 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
724 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
725 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
726 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
727 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
728 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
729 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
730 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
732 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
733 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
734 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
736 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
737 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
738 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
741 // store all luma nnzs, for deblocking
744 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
745 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
750 * gets the predicted number of non zero coefficients.
751 * @param n block index
753 static inline int pred_non_zero_count(H264Context *h, int n){
754 const int index8= scan8[n];
755 const int left= h->non_zero_count_cache[index8 - 1];
756 const int top = h->non_zero_count_cache[index8 - 8];
759 if(i<64) i= (i+1)>>1;
761 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
766 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
767 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
768 MpegEncContext *s = &h->s;
770 /* there is no consistent mapping of mvs to neighboring locations that will
771 * make mbaff happy, so we can't move all this logic to fill_caches */
773 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
775 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
776 *C = h->mv_cache[list][scan8[0]-2];
779 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
780 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
781 if(IS_INTERLACED(mb_types[topright_xy])){
782 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
783 const int x4 = X4, y4 = Y4;\
784 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
785 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
786 return LIST_NOT_USED;\
787 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
788 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
789 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
790 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
792 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
795 if(topright_ref == PART_NOT_AVAILABLE
796 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
797 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
799 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
800 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
803 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
805 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
806 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
812 if(topright_ref != PART_NOT_AVAILABLE){
813 *C= h->mv_cache[list][ i - 8 + part_width ];
816 tprintf(s->avctx, "topright MV not available\n");
818 *C= h->mv_cache[list][ i - 8 - 1 ];
819 return h->ref_cache[list][ i - 8 - 1 ];
824 * gets the predicted MV.
825 * @param n the block index
826 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
827 * @param mx the x component of the predicted motion vector
828 * @param my the y component of the predicted motion vector
830 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
831 const int index8= scan8[n];
832 const int top_ref= h->ref_cache[list][ index8 - 8 ];
833 const int left_ref= h->ref_cache[list][ index8 - 1 ];
834 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
835 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
837 int diagonal_ref, match_count;
839 assert(part_width==1 || part_width==2 || part_width==4);
849 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
850 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
851 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
852 if(match_count > 1){ //most common
853 *mx= mid_pred(A[0], B[0], C[0]);
854 *my= mid_pred(A[1], B[1], C[1]);
855 }else if(match_count==1){
859 }else if(top_ref==ref){
867 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
871 *mx= mid_pred(A[0], B[0], C[0]);
872 *my= mid_pred(A[1], B[1], C[1]);
876 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
880 * gets the directionally predicted 16x8 MV.
881 * @param n the block index
882 * @param mx the x component of the predicted motion vector
883 * @param my the y component of the predicted motion vector
885 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
887 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
888 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
890 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
898 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
899 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
901 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
911 pred_motion(h, n, 4, list, ref, mx, my);
915 * gets the directionally predicted 8x16 MV.
916 * @param n the block index
917 * @param mx the x component of the predicted motion vector
918 * @param my the y component of the predicted motion vector
920 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
922 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
923 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
925 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
936 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
938 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
940 if(diagonal_ref == ref){
948 pred_motion(h, n, 2, list, ref, mx, my);
951 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
952 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
953 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
955 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
957 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
958 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
959 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
965 pred_motion(h, 0, 4, 0, 0, mx, my);
970 static inline void direct_dist_scale_factor(H264Context * const h){
971 const int poc = h->s.current_picture_ptr->poc;
972 const int poc1 = h->ref_list[1][0].poc;
974 for(i=0; i<h->ref_count[0]; i++){
975 int poc0 = h->ref_list[0][i].poc;
976 int td = av_clip(poc1 - poc0, -128, 127);
977 if(td == 0 /* FIXME || pic0 is a long-term ref */){
978 h->dist_scale_factor[i] = 256;
980 int tb = av_clip(poc - poc0, -128, 127);
981 int tx = (16384 + (FFABS(td) >> 1)) / td;
982 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
986 for(i=0; i<h->ref_count[0]; i++){
987 h->dist_scale_factor_field[2*i] =
988 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
992 static inline void direct_ref_list_init(H264Context * const h){
993 MpegEncContext * const s = &h->s;
994 Picture * const ref1 = &h->ref_list[1][0];
995 Picture * const cur = s->current_picture_ptr;
997 if(cur->pict_type == I_TYPE)
998 cur->ref_count[0] = 0;
999 if(cur->pict_type != B_TYPE)
1000 cur->ref_count[1] = 0;
1001 for(list=0; list<2; list++){
1002 cur->ref_count[list] = h->ref_count[list];
1003 for(j=0; j<h->ref_count[list]; j++)
1004 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1006 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1008 for(list=0; list<2; list++){
1009 for(i=0; i<ref1->ref_count[list]; i++){
1010 const int poc = ref1->ref_poc[list][i];
1011 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1012 for(j=0; j<h->ref_count[list]; j++)
1013 if(h->ref_list[list][j].poc == poc){
1014 h->map_col_to_list0[list][i] = j;
1020 for(list=0; list<2; list++){
1021 for(i=0; i<ref1->ref_count[list]; i++){
1022 j = h->map_col_to_list0[list][i];
1023 h->map_col_to_list0_field[list][2*i] = 2*j;
1024 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1030 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1031 MpegEncContext * const s = &h->s;
1032 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1033 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1034 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1035 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1036 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1037 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1038 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1039 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1040 const int is_b8x8 = IS_8X8(*mb_type);
1041 unsigned int sub_mb_type;
1044 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1045 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1046 /* FIXME save sub mb types from previous frames (or derive from MVs)
1047 * so we know exactly what block size to use */
1048 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1049 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1050 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1051 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1052 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1054 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1055 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1058 *mb_type |= MB_TYPE_DIRECT2;
1060 *mb_type |= MB_TYPE_INTERLACED;
1062 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1064 if(h->direct_spatial_mv_pred){
1069 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1071 /* ref = min(neighbors) */
1072 for(list=0; list<2; list++){
1073 int refa = h->ref_cache[list][scan8[0] - 1];
1074 int refb = h->ref_cache[list][scan8[0] - 8];
1075 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1081 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1087 if(ref[0] < 0 && ref[1] < 0){
1088 ref[0] = ref[1] = 0;
1089 mv[0][0] = mv[0][1] =
1090 mv[1][0] = mv[1][1] = 0;
1092 for(list=0; list<2; list++){
1094 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1096 mv[list][0] = mv[list][1] = 0;
1101 *mb_type &= ~MB_TYPE_P0L1;
1102 sub_mb_type &= ~MB_TYPE_P0L1;
1103 }else if(ref[0] < 0){
1104 *mb_type &= ~MB_TYPE_P0L0;
1105 sub_mb_type &= ~MB_TYPE_P0L0;
1108 if(IS_16X16(*mb_type)){
1111 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1112 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1113 if(!IS_INTRA(mb_type_col)
1114 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1115 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1116 && (h->x264_build>33 || !h->x264_build)))){
1118 a= pack16to32(mv[0][0],mv[0][1]);
1120 b= pack16to32(mv[1][0],mv[1][1]);
1122 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1126 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1128 for(i8=0; i8<4; i8++){
1129 const int x8 = i8&1;
1130 const int y8 = i8>>1;
1132 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1134 h->sub_mb_type[i8] = sub_mb_type;
1136 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1137 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1138 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1142 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1143 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1144 && (h->x264_build>33 || !h->x264_build)))){
1145 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1146 if(IS_SUB_8X8(sub_mb_type)){
1147 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1148 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1150 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1152 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1155 for(i4=0; i4<4; i4++){
1156 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1157 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1159 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1161 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1167 }else{ /* direct temporal mv pred */
1168 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1169 const int *dist_scale_factor = h->dist_scale_factor;
1172 if(IS_INTERLACED(*mb_type)){
1173 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1174 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1175 dist_scale_factor = h->dist_scale_factor_field;
1177 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1178 /* FIXME assumes direct_8x8_inference == 1 */
1179 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1180 int mb_types_col[2];
1183 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1184 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1185 | (*mb_type & MB_TYPE_INTERLACED);
1186 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1188 if(IS_INTERLACED(*mb_type)){
1189 /* frame to field scaling */
1190 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1191 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1193 l1ref0 -= 2*h->b8_stride;
1194 l1ref1 -= 2*h->b8_stride;
1195 l1mv0 -= 4*h->b_stride;
1196 l1mv1 -= 4*h->b_stride;
1200 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1201 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1203 *mb_type |= MB_TYPE_16x8;
1205 *mb_type |= MB_TYPE_8x8;
1207 /* field to frame scaling */
1208 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1209 * but in MBAFF, top and bottom POC are equal */
1210 int dy = (s->mb_y&1) ? 1 : 2;
1212 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1213 l1ref0 += dy*h->b8_stride;
1214 l1ref1 += dy*h->b8_stride;
1215 l1mv0 += 2*dy*h->b_stride;
1216 l1mv1 += 2*dy*h->b_stride;
1219 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1221 *mb_type |= MB_TYPE_16x16;
1223 *mb_type |= MB_TYPE_8x8;
1226 for(i8=0; i8<4; i8++){
1227 const int x8 = i8&1;
1228 const int y8 = i8>>1;
1230 const int16_t (*l1mv)[2]= l1mv0;
1232 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1234 h->sub_mb_type[i8] = sub_mb_type;
1236 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1237 if(IS_INTRA(mb_types_col[y8])){
1238 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1239 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1240 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1244 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1246 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1248 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1251 scale = dist_scale_factor[ref0];
1252 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1255 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1256 int my_col = (mv_col[1]<<y_shift)/2;
1257 int mx = (scale * mv_col[0] + 128) >> 8;
1258 int my = (scale * my_col + 128) >> 8;
1259 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1260 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1267 /* one-to-one mv scaling */
1269 if(IS_16X16(*mb_type)){
1272 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1273 if(IS_INTRA(mb_type_col)){
1276 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1277 : map_col_to_list0[1][l1ref1[0]];
1278 const int scale = dist_scale_factor[ref0];
1279 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1281 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1282 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1284 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1285 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1287 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1288 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1289 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1291 for(i8=0; i8<4; i8++){
1292 const int x8 = i8&1;
1293 const int y8 = i8>>1;
1295 const int16_t (*l1mv)[2]= l1mv0;
1297 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1299 h->sub_mb_type[i8] = sub_mb_type;
1300 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1301 if(IS_INTRA(mb_type_col)){
1302 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1303 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1304 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1308 ref0 = l1ref0[x8 + y8*h->b8_stride];
1310 ref0 = map_col_to_list0[0][ref0];
1312 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1315 scale = dist_scale_factor[ref0];
1317 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1318 if(IS_SUB_8X8(sub_mb_type)){
1319 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1320 int mx = (scale * mv_col[0] + 128) >> 8;
1321 int my = (scale * mv_col[1] + 128) >> 8;
1322 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1323 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1325 for(i4=0; i4<4; i4++){
1326 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1327 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1328 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1329 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1330 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1331 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1338 static inline void write_back_motion(H264Context *h, int mb_type){
1339 MpegEncContext * const s = &h->s;
1340 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1341 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1344 if(!USES_LIST(mb_type, 0))
1345 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1347 for(list=0; list<h->list_count; list++){
1349 if(!USES_LIST(mb_type, list))
1353 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1354 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1356 if( h->pps.cabac ) {
1357 if(IS_SKIP(mb_type))
1358 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1361 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1362 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1367 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1368 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1369 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1370 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1371 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1375 if(h->slice_type == B_TYPE && h->pps.cabac){
1376 if(IS_8X8(mb_type)){
1377 uint8_t *direct_table = &h->direct_table[b8_xy];
1378 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1379 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1380 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1386 * Decodes a network abstraction layer unit.
1387 * @param consumed is the number of bytes used as input
1388 * @param length is the length of the array
1389 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1390 * @returns decoded bytes, might be src+1 if no escapes
1392 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1396 // src[0]&0x80; //forbidden bit
1397 h->nal_ref_idc= src[0]>>5;
1398 h->nal_unit_type= src[0]&0x1F;
1402 for(i=0; i<length; i++)
1403 printf("%2X ", src[i]);
1405 for(i=0; i+1<length; i+=2){
1406 if(src[i]) continue;
1407 if(i>0 && src[i-1]==0) i--;
1408 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1410 /* startcode, so we must be past the end */
1417 if(i>=length-1){ //no escaped 0
1418 *dst_length= length;
1419 *consumed= length+1; //+1 for the header
1423 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1424 dst= h->rbsp_buffer;
1430 //printf("decoding esc\n");
1433 //remove escapes (very rare 1:2^22)
1434 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1435 if(src[si+2]==3){ //escape
1440 }else //next start code
1444 dst[di++]= src[si++];
1448 *consumed= si + 1;//+1 for the header
1449 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1454 * identifies the exact end of the bitstream
1455 * @return the length of the trailing, or 0 if damaged
1457 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1461 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1471 * idct tranforms the 16 dc values and dequantize them.
1472 * @param qp quantization parameter
1474 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1477 int temp[16]; //FIXME check if this is a good idea
1478 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1479 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1481 //memset(block, 64, 2*256);
1484 const int offset= y_offset[i];
1485 const int z0= block[offset+stride*0] + block[offset+stride*4];
1486 const int z1= block[offset+stride*0] - block[offset+stride*4];
1487 const int z2= block[offset+stride*1] - block[offset+stride*5];
1488 const int z3= block[offset+stride*1] + block[offset+stride*5];
1497 const int offset= x_offset[i];
1498 const int z0= temp[4*0+i] + temp[4*2+i];
1499 const int z1= temp[4*0+i] - temp[4*2+i];
1500 const int z2= temp[4*1+i] - temp[4*3+i];
1501 const int z3= temp[4*1+i] + temp[4*3+i];
1503 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1504 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1505 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1506 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1512 * dct tranforms the 16 dc values.
1513 * @param qp quantization parameter ??? FIXME
1515 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1516 // const int qmul= dequant_coeff[qp][0];
1518 int temp[16]; //FIXME check if this is a good idea
1519 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1520 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1523 const int offset= y_offset[i];
1524 const int z0= block[offset+stride*0] + block[offset+stride*4];
1525 const int z1= block[offset+stride*0] - block[offset+stride*4];
1526 const int z2= block[offset+stride*1] - block[offset+stride*5];
1527 const int z3= block[offset+stride*1] + block[offset+stride*5];
1536 const int offset= x_offset[i];
1537 const int z0= temp[4*0+i] + temp[4*2+i];
1538 const int z1= temp[4*0+i] - temp[4*2+i];
1539 const int z2= temp[4*1+i] - temp[4*3+i];
1540 const int z3= temp[4*1+i] + temp[4*3+i];
1542 block[stride*0 +offset]= (z0 + z3)>>1;
1543 block[stride*2 +offset]= (z1 + z2)>>1;
1544 block[stride*8 +offset]= (z1 - z2)>>1;
1545 block[stride*10+offset]= (z0 - z3)>>1;
1553 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1554 const int stride= 16*2;
1555 const int xStride= 16;
1558 a= block[stride*0 + xStride*0];
1559 b= block[stride*0 + xStride*1];
1560 c= block[stride*1 + xStride*0];
1561 d= block[stride*1 + xStride*1];
1568 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1569 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1570 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1571 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1575 static void chroma_dc_dct_c(DCTELEM *block){
1576 const int stride= 16*2;
1577 const int xStride= 16;
1580 a= block[stride*0 + xStride*0];
1581 b= block[stride*0 + xStride*1];
1582 c= block[stride*1 + xStride*0];
1583 d= block[stride*1 + xStride*1];
1590 block[stride*0 + xStride*0]= (a+c);
1591 block[stride*0 + xStride*1]= (e+b);
1592 block[stride*1 + xStride*0]= (a-c);
1593 block[stride*1 + xStride*1]= (e-b);
1598 * gets the chroma qp.
1600 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1602 return chroma_qp[av_clip(qscale + chroma_qp_index_offset, 0, 51)];
1605 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1606 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1607 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1609 const int * const quant_table= quant_coeff[qscale];
1610 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1611 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1612 const unsigned int threshold2= (threshold1<<1);
1618 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1619 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1620 const unsigned int dc_threshold2= (dc_threshold1<<1);
1622 int level= block[0]*quant_coeff[qscale+18][0];
1623 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1625 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1628 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1631 // last_non_zero = i;
1636 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1637 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1638 const unsigned int dc_threshold2= (dc_threshold1<<1);
1640 int level= block[0]*quant_table[0];
1641 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1643 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1646 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1649 // last_non_zero = i;
1662 const int j= scantable[i];
1663 int level= block[j]*quant_table[j];
1665 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1666 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1667 if(((unsigned)(level+threshold1))>threshold2){
1669 level= (bias + level)>>QUANT_SHIFT;
1672 level= (bias - level)>>QUANT_SHIFT;
1681 return last_non_zero;
1684 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1685 const uint32_t a= ((uint32_t*)(src-stride))[0];
1686 ((uint32_t*)(src+0*stride))[0]= a;
1687 ((uint32_t*)(src+1*stride))[0]= a;
1688 ((uint32_t*)(src+2*stride))[0]= a;
1689 ((uint32_t*)(src+3*stride))[0]= a;
1692 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1693 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1694 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1695 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1696 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1699 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1700 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1701 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1703 ((uint32_t*)(src+0*stride))[0]=
1704 ((uint32_t*)(src+1*stride))[0]=
1705 ((uint32_t*)(src+2*stride))[0]=
1706 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1709 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1710 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1712 ((uint32_t*)(src+0*stride))[0]=
1713 ((uint32_t*)(src+1*stride))[0]=
1714 ((uint32_t*)(src+2*stride))[0]=
1715 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1718 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1719 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1721 ((uint32_t*)(src+0*stride))[0]=
1722 ((uint32_t*)(src+1*stride))[0]=
1723 ((uint32_t*)(src+2*stride))[0]=
1724 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1727 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1728 ((uint32_t*)(src+0*stride))[0]=
1729 ((uint32_t*)(src+1*stride))[0]=
1730 ((uint32_t*)(src+2*stride))[0]=
1731 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1735 #define LOAD_TOP_RIGHT_EDGE\
1736 const int av_unused t4= topright[0];\
1737 const int av_unused t5= topright[1];\
1738 const int av_unused t6= topright[2];\
1739 const int av_unused t7= topright[3];\
1741 #define LOAD_LEFT_EDGE\
1742 const int av_unused l0= src[-1+0*stride];\
1743 const int av_unused l1= src[-1+1*stride];\
1744 const int av_unused l2= src[-1+2*stride];\
1745 const int av_unused l3= src[-1+3*stride];\
1747 #define LOAD_TOP_EDGE\
1748 const int av_unused t0= src[ 0-1*stride];\
1749 const int av_unused t1= src[ 1-1*stride];\
1750 const int av_unused t2= src[ 2-1*stride];\
1751 const int av_unused t3= src[ 3-1*stride];\
1753 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1754 const int lt= src[-1-1*stride];
1758 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1760 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1763 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1767 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1770 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1772 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1773 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1776 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1781 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1783 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1786 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1790 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1793 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1795 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1796 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1799 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1800 const int lt= src[-1-1*stride];
1805 src[1+2*stride]=(lt + t0 + 1)>>1;
1807 src[2+2*stride]=(t0 + t1 + 1)>>1;
1809 src[3+2*stride]=(t1 + t2 + 1)>>1;
1810 src[3+0*stride]=(t2 + t3 + 1)>>1;
1812 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1814 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1816 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1817 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1818 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1819 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1822 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1826 src[0+0*stride]=(t0 + t1 + 1)>>1;
1828 src[0+2*stride]=(t1 + t2 + 1)>>1;
1830 src[1+2*stride]=(t2 + t3 + 1)>>1;
1832 src[2+2*stride]=(t3 + t4+ 1)>>1;
1833 src[3+2*stride]=(t4 + t5+ 1)>>1;
1834 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1836 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1838 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1840 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1841 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1844 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1847 src[0+0*stride]=(l0 + l1 + 1)>>1;
1848 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1850 src[0+1*stride]=(l1 + l2 + 1)>>1;
1852 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1854 src[0+2*stride]=(l2 + l3 + 1)>>1;
1856 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
1865 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
1866 const int lt= src[-1-1*stride];
1871 src[2+1*stride]=(lt + l0 + 1)>>1;
1873 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
1874 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
1875 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1877 src[2+2*stride]=(l0 + l1 + 1)>>1;
1879 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1881 src[2+3*stride]=(l1 + l2+ 1)>>1;
1883 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1884 src[0+3*stride]=(l2 + l3 + 1)>>1;
1885 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1888 void ff_pred16x16_vertical_c(uint8_t *src, int stride){
1890 const uint32_t a= ((uint32_t*)(src-stride))[0];
1891 const uint32_t b= ((uint32_t*)(src-stride))[1];
1892 const uint32_t c= ((uint32_t*)(src-stride))[2];
1893 const uint32_t d= ((uint32_t*)(src-stride))[3];
1895 for(i=0; i<16; i++){
1896 ((uint32_t*)(src+i*stride))[0]= a;
1897 ((uint32_t*)(src+i*stride))[1]= b;
1898 ((uint32_t*)(src+i*stride))[2]= c;
1899 ((uint32_t*)(src+i*stride))[3]= d;
1903 void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
1906 for(i=0; i<16; i++){
1907 ((uint32_t*)(src+i*stride))[0]=
1908 ((uint32_t*)(src+i*stride))[1]=
1909 ((uint32_t*)(src+i*stride))[2]=
1910 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
1914 void ff_pred16x16_dc_c(uint8_t *src, int stride){
1918 dc+= src[-1+i*stride];
1925 dc= 0x01010101*((dc + 16)>>5);
1927 for(i=0; i<16; i++){
1928 ((uint32_t*)(src+i*stride))[0]=
1929 ((uint32_t*)(src+i*stride))[1]=
1930 ((uint32_t*)(src+i*stride))[2]=
1931 ((uint32_t*)(src+i*stride))[3]= dc;
1935 void ff_pred16x16_left_dc_c(uint8_t *src, int stride){
1939 dc+= src[-1+i*stride];
1942 dc= 0x01010101*((dc + 8)>>4);
1944 for(i=0; i<16; i++){
1945 ((uint32_t*)(src+i*stride))[0]=
1946 ((uint32_t*)(src+i*stride))[1]=
1947 ((uint32_t*)(src+i*stride))[2]=
1948 ((uint32_t*)(src+i*stride))[3]= dc;
1952 void ff_pred16x16_top_dc_c(uint8_t *src, int stride){
1958 dc= 0x01010101*((dc + 8)>>4);
1960 for(i=0; i<16; i++){
1961 ((uint32_t*)(src+i*stride))[0]=
1962 ((uint32_t*)(src+i*stride))[1]=
1963 ((uint32_t*)(src+i*stride))[2]=
1964 ((uint32_t*)(src+i*stride))[3]= dc;
1968 void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
1971 for(i=0; i<16; i++){
1972 ((uint32_t*)(src+i*stride))[0]=
1973 ((uint32_t*)(src+i*stride))[1]=
1974 ((uint32_t*)(src+i*stride))[2]=
1975 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1979 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
1982 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1983 const uint8_t * const src0 = src+7-stride;
1984 const uint8_t *src1 = src+8*stride-1;
1985 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
1986 int H = src0[1] - src0[-1];
1987 int V = src1[0] - src2[ 0];
1988 for(k=2; k<=8; ++k) {
1989 src1 += stride; src2 -= stride;
1990 H += k*(src0[k] - src0[-k]);
1991 V += k*(src1[0] - src2[ 0]);
1994 H = ( 5*(H/4) ) / 16;
1995 V = ( 5*(V/4) ) / 16;
1997 /* required for 100% accuracy */
1998 i = H; H = V; V = i;
2000 H = ( 5*H+32 ) >> 6;
2001 V = ( 5*V+32 ) >> 6;
2004 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2005 for(j=16; j>0; --j) {
2008 for(i=-16; i<0; i+=4) {
2009 src[16+i] = cm[ (b ) >> 5 ];
2010 src[17+i] = cm[ (b+ H) >> 5 ];
2011 src[18+i] = cm[ (b+2*H) >> 5 ];
2012 src[19+i] = cm[ (b+3*H) >> 5 ];
2019 void ff_pred16x16_plane_c(uint8_t *src, int stride){
2020 pred16x16_plane_compat_c(src, stride, 0);
2023 void ff_pred8x8_vertical_c(uint8_t *src, int stride){
2025 const uint32_t a= ((uint32_t*)(src-stride))[0];
2026 const uint32_t b= ((uint32_t*)(src-stride))[1];
2029 ((uint32_t*)(src+i*stride))[0]= a;
2030 ((uint32_t*)(src+i*stride))[1]= b;
2034 void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
2038 ((uint32_t*)(src+i*stride))[0]=
2039 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2043 void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
2047 ((uint32_t*)(src+i*stride))[0]=
2048 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2052 void ff_pred8x8_left_dc_c(uint8_t *src, int stride){
2058 dc0+= src[-1+i*stride];
2059 dc2+= src[-1+(i+4)*stride];
2061 dc0= 0x01010101*((dc0 + 2)>>2);
2062 dc2= 0x01010101*((dc2 + 2)>>2);
2065 ((uint32_t*)(src+i*stride))[0]=
2066 ((uint32_t*)(src+i*stride))[1]= dc0;
2069 ((uint32_t*)(src+i*stride))[0]=
2070 ((uint32_t*)(src+i*stride))[1]= dc2;
2074 void ff_pred8x8_top_dc_c(uint8_t *src, int stride){
2080 dc0+= src[i-stride];
2081 dc1+= src[4+i-stride];
2083 dc0= 0x01010101*((dc0 + 2)>>2);
2084 dc1= 0x01010101*((dc1 + 2)>>2);
2087 ((uint32_t*)(src+i*stride))[0]= dc0;
2088 ((uint32_t*)(src+i*stride))[1]= dc1;
2091 ((uint32_t*)(src+i*stride))[0]= dc0;
2092 ((uint32_t*)(src+i*stride))[1]= dc1;
2097 void ff_pred8x8_dc_c(uint8_t *src, int stride){
2099 int dc0, dc1, dc2, dc3;
2103 dc0+= src[-1+i*stride] + src[i-stride];
2104 dc1+= src[4+i-stride];
2105 dc2+= src[-1+(i+4)*stride];
2107 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2108 dc0= 0x01010101*((dc0 + 4)>>3);
2109 dc1= 0x01010101*((dc1 + 2)>>2);
2110 dc2= 0x01010101*((dc2 + 2)>>2);
2113 ((uint32_t*)(src+i*stride))[0]= dc0;
2114 ((uint32_t*)(src+i*stride))[1]= dc1;
2117 ((uint32_t*)(src+i*stride))[0]= dc2;
2118 ((uint32_t*)(src+i*stride))[1]= dc3;
2122 void ff_pred8x8_plane_c(uint8_t *src, int stride){
2125 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2126 const uint8_t * const src0 = src+3-stride;
2127 const uint8_t *src1 = src+4*stride-1;
2128 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2129 int H = src0[1] - src0[-1];
2130 int V = src1[0] - src2[ 0];
2131 for(k=2; k<=4; ++k) {
2132 src1 += stride; src2 -= stride;
2133 H += k*(src0[k] - src0[-k]);
2134 V += k*(src1[0] - src2[ 0]);
2136 H = ( 17*H+16 ) >> 5;
2137 V = ( 17*V+16 ) >> 5;
2139 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2140 for(j=8; j>0; --j) {
2143 src[0] = cm[ (b ) >> 5 ];
2144 src[1] = cm[ (b+ H) >> 5 ];
2145 src[2] = cm[ (b+2*H) >> 5 ];
2146 src[3] = cm[ (b+3*H) >> 5 ];
2147 src[4] = cm[ (b+4*H) >> 5 ];
2148 src[5] = cm[ (b+5*H) >> 5 ];
2149 src[6] = cm[ (b+6*H) >> 5 ];
2150 src[7] = cm[ (b+7*H) >> 5 ];
2155 #define SRC(x,y) src[(x)+(y)*stride]
2157 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2158 #define PREDICT_8x8_LOAD_LEFT \
2159 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2160 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2161 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2162 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2165 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2166 #define PREDICT_8x8_LOAD_TOP \
2167 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2168 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2169 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2170 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2171 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2174 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2175 #define PREDICT_8x8_LOAD_TOPRIGHT \
2176 int t8, t9, t10, t11, t12, t13, t14, t15; \
2177 if(has_topright) { \
2178 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2179 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2180 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2182 #define PREDICT_8x8_LOAD_TOPLEFT \
2183 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2185 #define PREDICT_8x8_DC(v) \
2187 for( y = 0; y < 8; y++ ) { \
2188 ((uint32_t*)src)[0] = \
2189 ((uint32_t*)src)[1] = v; \
2193 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2195 PREDICT_8x8_DC(0x80808080);
2197 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2199 PREDICT_8x8_LOAD_LEFT;
2200 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2203 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2205 PREDICT_8x8_LOAD_TOP;
2206 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2209 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2211 PREDICT_8x8_LOAD_LEFT;
2212 PREDICT_8x8_LOAD_TOP;
2213 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2214 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2217 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2219 PREDICT_8x8_LOAD_LEFT;
2220 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2221 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2222 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2225 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2228 PREDICT_8x8_LOAD_TOP;
2237 for( y = 1; y < 8; y++ )
2238 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2240 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2242 PREDICT_8x8_LOAD_TOP;
2243 PREDICT_8x8_LOAD_TOPRIGHT;
2244 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2245 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2246 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2247 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2248 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2249 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2250 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2251 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2252 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2253 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2254 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2255 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2256 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2257 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2258 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2260 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2262 PREDICT_8x8_LOAD_TOP;
2263 PREDICT_8x8_LOAD_LEFT;
2264 PREDICT_8x8_LOAD_TOPLEFT;
2265 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2266 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2267 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2268 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2269 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2270 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2271 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2272 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2273 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2274 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2275 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2276 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2277 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2278 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2279 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2282 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2284 PREDICT_8x8_LOAD_TOP;
2285 PREDICT_8x8_LOAD_LEFT;
2286 PREDICT_8x8_LOAD_TOPLEFT;
2287 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2288 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2289 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2290 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2291 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2292 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2293 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2294 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2295 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2296 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2297 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2298 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2299 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2300 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2301 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2302 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2303 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2304 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2305 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2306 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2307 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2308 SRC(7,0)= (t6 + t7 + 1) >> 1;
2310 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2312 PREDICT_8x8_LOAD_TOP;
2313 PREDICT_8x8_LOAD_LEFT;
2314 PREDICT_8x8_LOAD_TOPLEFT;
2315 SRC(0,7)= (l6 + l7 + 1) >> 1;
2316 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2317 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2318 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2319 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2320 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2321 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2322 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2323 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2324 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2325 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2326 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2327 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2328 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2329 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2330 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2331 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2332 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2333 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2334 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2335 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2336 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2338 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2340 PREDICT_8x8_LOAD_TOP;
2341 PREDICT_8x8_LOAD_TOPRIGHT;
2342 SRC(0,0)= (t0 + t1 + 1) >> 1;
2343 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2344 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2345 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2346 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2347 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2348 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2349 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2350 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2351 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2352 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2353 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2354 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2355 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2356 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2357 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2358 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2359 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2360 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2361 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2362 SRC(7,6)= (t10 + t11 + 1) >> 1;
2363 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2365 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2367 PREDICT_8x8_LOAD_LEFT;
2368 SRC(0,0)= (l0 + l1 + 1) >> 1;
2369 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2370 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2371 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2372 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2373 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2374 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2375 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2376 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2377 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2378 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2379 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2380 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2381 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2382 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2383 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2384 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2385 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2387 #undef PREDICT_8x8_LOAD_LEFT
2388 #undef PREDICT_8x8_LOAD_TOP
2389 #undef PREDICT_8x8_LOAD_TOPLEFT
2390 #undef PREDICT_8x8_LOAD_TOPRIGHT
2391 #undef PREDICT_8x8_DC
2397 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2398 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2399 int src_x_offset, int src_y_offset,
2400 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2401 MpegEncContext * const s = &h->s;
2402 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2403 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2404 const int luma_xy= (mx&3) + ((my&3)<<2);
2405 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2406 uint8_t * src_cb, * src_cr;
2407 int extra_width= h->emu_edge_width;
2408 int extra_height= h->emu_edge_height;
2410 const int full_mx= mx>>2;
2411 const int full_my= my>>2;
2412 const int pic_width = 16*s->mb_width;
2413 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2415 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
2418 if(mx&7) extra_width -= 3;
2419 if(my&7) extra_height -= 3;
2421 if( full_mx < 0-extra_width
2422 || full_my < 0-extra_height
2423 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2424 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2425 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2426 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2430 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2432 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2435 if(s->flags&CODEC_FLAG_GRAY) return;
2438 // chroma offset when predicting from a field of opposite parity
2439 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2440 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2442 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2443 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2446 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2447 src_cb= s->edge_emu_buffer;
2449 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2452 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2453 src_cr= s->edge_emu_buffer;
2455 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2458 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2459 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2460 int x_offset, int y_offset,
2461 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2462 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2463 int list0, int list1){
2464 MpegEncContext * const s = &h->s;
2465 qpel_mc_func *qpix_op= qpix_put;
2466 h264_chroma_mc_func chroma_op= chroma_put;
2468 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2469 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2470 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2471 x_offset += 8*s->mb_x;
2472 y_offset += 8*(s->mb_y >> MB_MBAFF);
2475 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2476 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2477 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2478 qpix_op, chroma_op);
2481 chroma_op= chroma_avg;
2485 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2486 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2487 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2488 qpix_op, chroma_op);
2492 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2493 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2494 int x_offset, int y_offset,
2495 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2496 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2497 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2498 int list0, int list1){
2499 MpegEncContext * const s = &h->s;
2501 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2502 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2503 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2504 x_offset += 8*s->mb_x;
2505 y_offset += 8*(s->mb_y >> MB_MBAFF);
2508 /* don't optimize for luma-only case, since B-frames usually
2509 * use implicit weights => chroma too. */
2510 uint8_t *tmp_cb = s->obmc_scratchpad;
2511 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2512 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2513 int refn0 = h->ref_cache[0][ scan8[n] ];
2514 int refn1 = h->ref_cache[1][ scan8[n] ];
2516 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2517 dest_y, dest_cb, dest_cr,
2518 x_offset, y_offset, qpix_put, chroma_put);
2519 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2520 tmp_y, tmp_cb, tmp_cr,
2521 x_offset, y_offset, qpix_put, chroma_put);
2523 if(h->use_weight == 2){
2524 int weight0 = h->implicit_weight[refn0][refn1];
2525 int weight1 = 64 - weight0;
2526 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2527 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2528 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2530 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2531 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2532 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2533 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2534 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2535 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2536 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2537 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2538 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2541 int list = list1 ? 1 : 0;
2542 int refn = h->ref_cache[list][ scan8[n] ];
2543 Picture *ref= &h->ref_list[list][refn];
2544 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2545 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2546 qpix_put, chroma_put);
2548 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2549 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2550 if(h->use_weight_chroma){
2551 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2552 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2553 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2554 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2559 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2560 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2561 int x_offset, int y_offset,
2562 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2563 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2564 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2565 int list0, int list1){
2566 if((h->use_weight==2 && list0 && list1
2567 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2568 || h->use_weight==1)
2569 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2570 x_offset, y_offset, qpix_put, chroma_put,
2571 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2573 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2574 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2577 static inline void prefetch_motion(H264Context *h, int list){
2578 /* fetch pixels for estimated mv 4 macroblocks ahead
2579 * optimized for 64byte cache lines */
2580 MpegEncContext * const s = &h->s;
2581 const int refn = h->ref_cache[list][scan8[0]];
2583 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2584 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2585 uint8_t **src= h->ref_list[list][refn].data;
2586 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2587 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2588 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2589 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2593 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2594 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2595 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2596 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2597 MpegEncContext * const s = &h->s;
2598 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2599 const int mb_type= s->current_picture.mb_type[mb_xy];
2601 assert(IS_INTER(mb_type));
2603 prefetch_motion(h, 0);
2605 if(IS_16X16(mb_type)){
2606 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2607 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2608 &weight_op[0], &weight_avg[0],
2609 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2610 }else if(IS_16X8(mb_type)){
2611 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2612 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2613 &weight_op[1], &weight_avg[1],
2614 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2615 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2616 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2617 &weight_op[1], &weight_avg[1],
2618 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2619 }else if(IS_8X16(mb_type)){
2620 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
2621 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2622 &weight_op[2], &weight_avg[2],
2623 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2624 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
2625 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2626 &weight_op[2], &weight_avg[2],
2627 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2631 assert(IS_8X8(mb_type));
2634 const int sub_mb_type= h->sub_mb_type[i];
2636 int x_offset= (i&1)<<2;
2637 int y_offset= (i&2)<<1;
2639 if(IS_SUB_8X8(sub_mb_type)){
2640 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2641 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2642 &weight_op[3], &weight_avg[3],
2643 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2644 }else if(IS_SUB_8X4(sub_mb_type)){
2645 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2646 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2647 &weight_op[4], &weight_avg[4],
2648 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2649 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2650 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2651 &weight_op[4], &weight_avg[4],
2652 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2653 }else if(IS_SUB_4X8(sub_mb_type)){
2654 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2655 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2656 &weight_op[5], &weight_avg[5],
2657 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2658 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2659 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2660 &weight_op[5], &weight_avg[5],
2661 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2664 assert(IS_SUB_4X4(sub_mb_type));
2666 int sub_x_offset= x_offset + 2*(j&1);
2667 int sub_y_offset= y_offset + (j&2);
2668 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2669 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2670 &weight_op[6], &weight_avg[6],
2671 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2677 prefetch_motion(h, 1);
2680 static void decode_init_vlc(void){
2681 static int done = 0;
2687 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2688 &chroma_dc_coeff_token_len [0], 1, 1,
2689 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2692 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2693 &coeff_token_len [i][0], 1, 1,
2694 &coeff_token_bits[i][0], 1, 1, 1);
2698 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2699 &chroma_dc_total_zeros_len [i][0], 1, 1,
2700 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2702 for(i=0; i<15; i++){
2703 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2704 &total_zeros_len [i][0], 1, 1,
2705 &total_zeros_bits[i][0], 1, 1, 1);
2709 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2710 &run_len [i][0], 1, 1,
2711 &run_bits[i][0], 1, 1, 1);
2713 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2714 &run_len [6][0], 1, 1,
2715 &run_bits[6][0], 1, 1, 1);
2720 * Sets the intra prediction function pointers.
2722 static void init_pred_ptrs(H264Context *h){
2723 // MpegEncContext * const s = &h->s;
2725 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2726 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2727 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2728 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2729 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2730 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2731 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2732 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2733 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2734 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2735 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2736 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2738 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2739 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2740 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2741 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2742 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2743 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2744 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2745 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2746 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2747 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2748 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2749 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2751 h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
2752 h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
2753 h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
2754 h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
2755 h->pred8x8[LEFT_DC_PRED8x8]= ff_pred8x8_left_dc_c;
2756 h->pred8x8[TOP_DC_PRED8x8 ]= ff_pred8x8_top_dc_c;
2757 h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
2759 h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
2760 h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
2761 h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
2762 h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
2763 h->pred16x16[LEFT_DC_PRED8x8]= ff_pred16x16_left_dc_c;
2764 h->pred16x16[TOP_DC_PRED8x8 ]= ff_pred16x16_top_dc_c;
2765 h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
2768 static void free_tables(H264Context *h){
2770 av_freep(&h->intra4x4_pred_mode);
2771 av_freep(&h->chroma_pred_mode_table);
2772 av_freep(&h->cbp_table);
2773 av_freep(&h->mvd_table[0]);
2774 av_freep(&h->mvd_table[1]);
2775 av_freep(&h->direct_table);
2776 av_freep(&h->non_zero_count);
2777 av_freep(&h->slice_table_base);
2778 av_freep(&h->top_borders[1]);
2779 av_freep(&h->top_borders[0]);
2780 h->slice_table= NULL;
2782 av_freep(&h->mb2b_xy);
2783 av_freep(&h->mb2b8_xy);
2785 av_freep(&h->s.obmc_scratchpad);
2787 for(i = 0; i < MAX_SPS_COUNT; i++)
2788 av_freep(h->sps_buffers + i);
2790 for(i = 0; i < MAX_PPS_COUNT; i++)
2791 av_freep(h->pps_buffers + i);
2794 static void init_dequant8_coeff_table(H264Context *h){
2796 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2797 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2798 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2800 for(i=0; i<2; i++ ){
2801 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2802 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2806 for(q=0; q<52; q++){
2807 int shift = ff_div6[q];
2808 int idx = ff_rem6[q];
2810 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2811 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2812 h->pps.scaling_matrix8[i][x]) << shift;
2817 static void init_dequant4_coeff_table(H264Context *h){
2819 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2820 for(i=0; i<6; i++ ){
2821 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2823 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2824 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2831 for(q=0; q<52; q++){
2832 int shift = ff_div6[q] + 2;
2833 int idx = ff_rem6[q];
2835 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2836 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2837 h->pps.scaling_matrix4[i][x]) << shift;
2842 static void init_dequant_tables(H264Context *h){
2844 init_dequant4_coeff_table(h);
2845 if(h->pps.transform_8x8_mode)
2846 init_dequant8_coeff_table(h);
2847 if(h->sps.transform_bypass){
2850 h->dequant4_coeff[i][0][x] = 1<<6;
2851 if(h->pps.transform_8x8_mode)
2854 h->dequant8_coeff[i][0][x] = 1<<6;
2861 * needs width/height
2863 static int alloc_tables(H264Context *h){
2864 MpegEncContext * const s = &h->s;
2865 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2868 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2870 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2871 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2872 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2873 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2874 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2876 if( h->pps.cabac ) {
2877 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2878 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2879 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2880 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2883 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2884 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2886 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2887 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2888 for(y=0; y<s->mb_height; y++){
2889 for(x=0; x<s->mb_width; x++){
2890 const int mb_xy= x + y*s->mb_stride;
2891 const int b_xy = 4*x + 4*y*h->b_stride;
2892 const int b8_xy= 2*x + 2*y*h->b8_stride;
2894 h->mb2b_xy [mb_xy]= b_xy;
2895 h->mb2b8_xy[mb_xy]= b8_xy;
2899 s->obmc_scratchpad = NULL;
2901 if(!h->dequant4_coeff[0])
2902 init_dequant_tables(h);
2910 static void common_init(H264Context *h){
2911 MpegEncContext * const s = &h->s;
2913 s->width = s->avctx->width;
2914 s->height = s->avctx->height;
2915 s->codec_id= s->avctx->codec->id;
2919 h->dequant_coeff_pps= -1;
2920 s->unrestricted_mv=1;
2921 s->decode=1; //FIXME
2923 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2924 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2927 static int decode_init(AVCodecContext *avctx){
2928 H264Context *h= avctx->priv_data;
2929 MpegEncContext * const s = &h->s;
2931 MPV_decode_defaults(s);
2936 s->out_format = FMT_H264;
2937 s->workaround_bugs= avctx->workaround_bugs;
2940 // s->decode_mb= ff_h263_decode_mb;
2942 avctx->pix_fmt= PIX_FMT_YUV420P;
2946 if(avctx->extradata_size > 0 && avctx->extradata &&
2947 *(char *)avctx->extradata == 1){
2957 static int frame_start(H264Context *h){
2958 MpegEncContext * const s = &h->s;
2961 if(MPV_frame_start(s, s->avctx) < 0)
2963 ff_er_frame_start(s);
2965 assert(s->linesize && s->uvlinesize);
2967 for(i=0; i<16; i++){
2968 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2969 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2972 h->block_offset[16+i]=
2973 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2974 h->block_offset[24+16+i]=
2975 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2978 /* can't be in alloc_tables because linesize isn't known there.
2979 * FIXME: redo bipred weight to not require extra buffer? */
2980 if(!s->obmc_scratchpad)
2981 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2983 /* some macroblocks will be accessed before they're available */
2985 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2987 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2991 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2992 MpegEncContext * const s = &h->s;
2996 src_cb -= uvlinesize;
2997 src_cr -= uvlinesize;
2999 // There are two lines saved, the line above the the top macroblock of a pair,
3000 // and the line above the bottom macroblock
3001 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3002 for(i=1; i<17; i++){
3003 h->left_border[i]= src_y[15+i* linesize];
3006 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3007 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3009 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3010 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3011 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3013 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3014 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3016 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3017 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3021 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
3022 MpegEncContext * const s = &h->s;
3025 int deblock_left = (s->mb_x > 0);
3026 int deblock_top = (s->mb_y > 0);
3028 src_y -= linesize + 1;
3029 src_cb -= uvlinesize + 1;
3030 src_cr -= uvlinesize + 1;
3032 #define XCHG(a,b,t,xchg)\
3039 for(i = !deblock_top; i<17; i++){
3040 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3045 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3046 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3047 if(s->mb_x+1 < s->mb_width){
3048 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3052 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3054 for(i = !deblock_top; i<9; i++){
3055 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3056 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3060 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3061 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3066 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3067 MpegEncContext * const s = &h->s;
3070 src_y -= 2 * linesize;
3071 src_cb -= 2 * uvlinesize;
3072 src_cr -= 2 * uvlinesize;
3074 // There are two lines saved, the line above the the top macroblock of a pair,
3075 // and the line above the bottom macroblock
3076 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3077 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3078 for(i=2; i<34; i++){
3079 h->left_border[i]= src_y[15+i* linesize];
3082 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3083 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3084 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3085 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3087 if(!(s->flags&CODEC_FLAG_GRAY)){
3088 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3089 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3090 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3091 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3092 for(i=2; i<18; i++){
3093 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3094 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3096 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3097 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3098 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3099 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3103 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3104 MpegEncContext * const s = &h->s;
3107 int deblock_left = (s->mb_x > 0);
3108 int deblock_top = (s->mb_y > 1);
3110 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3112 src_y -= 2 * linesize + 1;
3113 src_cb -= 2 * uvlinesize + 1;
3114 src_cr -= 2 * uvlinesize + 1;
3116 #define XCHG(a,b,t,xchg)\
3123 for(i = (!deblock_top)<<1; i<34; i++){
3124 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3129 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3130 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3131 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3132 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3133 if(s->mb_x+1 < s->mb_width){
3134 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3135 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3139 if(!(s->flags&CODEC_FLAG_GRAY)){
3141 for(i = (!deblock_top) << 1; i<18; i++){
3142 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3143 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3147 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3148 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3149 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3150 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3155 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
3156 MpegEncContext * const s = &h->s;
3157 const int mb_x= s->mb_x;
3158 const int mb_y= s->mb_y;
3159 const int mb_xy= mb_x + mb_y*s->mb_stride;
3160 const int mb_type= s->current_picture.mb_type[mb_xy];
3161 uint8_t *dest_y, *dest_cb, *dest_cr;
3162 int linesize, uvlinesize /*dct_offset*/;
3164 int *block_offset = &h->block_offset[0];
3165 const unsigned int bottom = mb_y & 1;
3166 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
3167 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3168 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3170 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3171 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3172 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3174 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3175 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3177 if (!simple && MB_FIELD) {
3178 linesize = h->mb_linesize = s->linesize * 2;
3179 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3180 block_offset = &h->block_offset[24];
3181 if(mb_y&1){ //FIXME move out of this func?
3182 dest_y -= s->linesize*15;
3183 dest_cb-= s->uvlinesize*7;
3184 dest_cr-= s->uvlinesize*7;
3188 for(list=0; list<h->list_count; list++){
3189 if(!USES_LIST(mb_type, list))
3191 if(IS_16X16(mb_type)){
3192 int8_t *ref = &h->ref_cache[list][scan8[0]];
3193 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3195 for(i=0; i<16; i+=4){
3196 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3197 int ref = h->ref_cache[list][scan8[i]];
3199 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3205 linesize = h->mb_linesize = s->linesize;
3206 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3207 // dct_offset = s->linesize * 16;
3210 if(transform_bypass){
3212 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3213 }else if(IS_8x8DCT(mb_type)){
3214 idct_dc_add = s->dsp.h264_idct8_dc_add;
3215 idct_add = s->dsp.h264_idct8_add;
3217 idct_dc_add = s->dsp.h264_idct_dc_add;
3218 idct_add = s->dsp.h264_idct_add;
3221 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3222 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3223 int mbt_y = mb_y&~1;
3224 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3225 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3226 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3227 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3230 if (!simple && IS_INTRA_PCM(mb_type)) {
3233 // The pixels are stored in h->mb array in the same order as levels,
3234 // copy them in output in the correct order.
3235 for(i=0; i<16; i++) {
3236 for (y=0; y<4; y++) {
3237 for (x=0; x<4; x++) {
3238 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3242 for(i=16; i<16+4; i++) {
3243 for (y=0; y<4; y++) {
3244 for (x=0; x<4; x++) {
3245 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3249 for(i=20; i<20+4; i++) {
3250 for (y=0; y<4; y++) {
3251 for (x=0; x<4; x++) {
3252 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3257 if(IS_INTRA(mb_type)){
3258 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3259 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
3261 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3262 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3263 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3266 if(IS_INTRA4x4(mb_type)){
3267 if(simple || !s->encoding){
3268 if(IS_8x8DCT(mb_type)){
3269 for(i=0; i<16; i+=4){
3270 uint8_t * const ptr= dest_y + block_offset[i];
3271 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3272 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3273 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3274 (h->topright_samples_available<<i)&0x4000, linesize);
3276 if(nnz == 1 && h->mb[i*16])
3277 idct_dc_add(ptr, h->mb + i*16, linesize);
3279 idct_add(ptr, h->mb + i*16, linesize);
3283 for(i=0; i<16; i++){
3284 uint8_t * const ptr= dest_y + block_offset[i];
3286 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3289 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3290 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3291 assert(mb_y || linesize <= block_offset[i]);
3292 if(!topright_avail){
3293 tr= ptr[3 - linesize]*0x01010101;
3294 topright= (uint8_t*) &tr;
3296 topright= ptr + 4 - linesize;
3300 h->pred4x4[ dir ](ptr, topright, linesize);
3301 nnz = h->non_zero_count_cache[ scan8[i] ];
3304 if(nnz == 1 && h->mb[i*16])
3305 idct_dc_add(ptr, h->mb + i*16, linesize);
3307 idct_add(ptr, h->mb + i*16, linesize);
3309 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3314 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3316 if(!transform_bypass)
3317 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3319 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3321 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3322 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
3324 hl_motion(h, dest_y, dest_cb, dest_cr,
3325 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3326 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3327 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3331 if(!IS_INTRA4x4(mb_type)){
3333 if(IS_INTRA16x16(mb_type)){
3334 for(i=0; i<16; i++){
3335 if(h->non_zero_count_cache[ scan8[i] ])
3336 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3337 else if(h->mb[i*16])
3338 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3341 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3342 for(i=0; i<16; i+=di){
3343 int nnz = h->non_zero_count_cache[ scan8[i] ];
3345 if(nnz==1 && h->mb[i*16])
3346 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3348 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3353 for(i=0; i<16; i++){
3354 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3355 uint8_t * const ptr= dest_y + block_offset[i];
3356 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3362 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3363 uint8_t *dest[2] = {dest_cb, dest_cr};
3364 if(transform_bypass){
3365 idct_add = idct_dc_add = s->dsp.add_pixels4;
3367 idct_add = s->dsp.h264_idct_add;
3368 idct_dc_add = s->dsp.h264_idct_dc_add;
3369 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3370 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3373 for(i=16; i<16+8; i++){
3374 if(h->non_zero_count_cache[ scan8[i] ])
3375 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3376 else if(h->mb[i*16])
3377 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3380 for(i=16; i<16+8; i++){
3381 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3382 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3383 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3389 if(h->deblocking_filter) {
3390 if (!simple && FRAME_MBAFF) {
3391 //FIXME try deblocking one mb at a time?
3392 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3393 const int mb_y = s->mb_y - 1;
3394 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3395 const int mb_xy= mb_x + mb_y*s->mb_stride;
3396 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3397 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3398 if (!bottom) return;
3399 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3400 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3401 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3403 if(IS_INTRA(mb_type_top | mb_type_bottom))
3404 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3406 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3410 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3411 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3412 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3413 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3416 tprintf(h->s.avctx, "call mbaff filter_mb\n");
3417 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3418 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3419 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3421 tprintf(h->s.avctx, "call filter_mb\n");
3422 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
3423 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3424 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3430 * Process a macroblock; this case avoids checks for expensive uncommon cases.
3432 static void hl_decode_mb_simple(H264Context *h){
3433 hl_decode_mb_internal(h, 1);
3437 * Process a macroblock; this handles edge cases, such as interlacing.
3439 static void av_noinline hl_decode_mb_complex(H264Context *h){
3440 hl_decode_mb_internal(h, 0);
3443 static void hl_decode_mb(H264Context *h){
3444 MpegEncContext * const s = &h->s;
3445 const int mb_x= s->mb_x;
3446 const int mb_y= s->mb_y;
3447 const int mb_xy= mb_x + mb_y*s->mb_stride;
3448 const int mb_type= s->current_picture.mb_type[mb_xy];
3449 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (s->flags&CODEC_FLAG_GRAY) || s->encoding;
3455 hl_decode_mb_complex(h);
3456 else hl_decode_mb_simple(h);
3460 * fills the default_ref_list.
3462 static int fill_default_ref_list(H264Context *h){
3463 MpegEncContext * const s = &h->s;
3465 int smallest_poc_greater_than_current = -1;
3466 Picture sorted_short_ref[32];
3468 if(h->slice_type==B_TYPE){
3472 /* sort frame according to poc in B slice */
3473 for(out_i=0; out_i<h->short_ref_count; out_i++){
3475 int best_poc=INT_MAX;
3477 for(i=0; i<h->short_ref_count; i++){
3478 const int poc= h->short_ref[i]->poc;
3479 if(poc > limit && poc < best_poc){
3485 assert(best_i != INT_MIN);
3488 sorted_short_ref[out_i]= *h->short_ref[best_i];
3489 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3490 if (-1 == smallest_poc_greater_than_current) {
3491 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3492 smallest_poc_greater_than_current = out_i;
3498 if(s->picture_structure == PICT_FRAME){
3499 if(h->slice_type==B_TYPE){
3501 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3503 // find the largest poc
3504 for(list=0; list<2; list++){
3507 int step= list ? -1 : 1;
3509 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3510 while(j<0 || j>= h->short_ref_count){
3511 if(j != -99 && step == (list ? -1 : 1))
3514 j= smallest_poc_greater_than_current + (step>>1);
3516 if(sorted_short_ref[j].reference != 3) continue;
3517 h->default_ref_list[list][index ]= sorted_short_ref[j];
3518 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3521 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3522 if(h->long_ref[i] == NULL) continue;
3523 if(h->long_ref[i]->reference != 3) continue;
3525 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3526 h->default_ref_list[ list ][index++].pic_id= i;;
3529 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3530 // swap the two first elements of L1 when
3531 // L0 and L1 are identical
3532 Picture temp= h->default_ref_list[1][0];
3533 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3534 h->default_ref_list[1][1] = temp;
3537 if(index < h->ref_count[ list ])
3538 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3542 for(i=0; i<h->short_ref_count; i++){
3543 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3544 h->default_ref_list[0][index ]= *h->short_ref[i];
3545 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3547 for(i = 0; i < 16; i++){
3548 if(h->long_ref[i] == NULL) continue;
3549 if(h->long_ref[i]->reference != 3) continue;
3550 h->default_ref_list[0][index ]= *h->long_ref[i];
3551 h->default_ref_list[0][index++].pic_id= i;;
3553 if(index < h->ref_count[0])
3554 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3557 if(h->slice_type==B_TYPE){
3559 //FIXME second field balh
3563 for (i=0; i<h->ref_count[0]; i++) {
3564 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3566 if(h->slice_type==B_TYPE){
3567 for (i=0; i<h->ref_count[1]; i++) {
3568 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3575 static void print_short_term(H264Context *h);
3576 static void print_long_term(H264Context *h);
3578 static int decode_ref_pic_list_reordering(H264Context *h){
3579 MpegEncContext * const s = &h->s;
3582 print_short_term(h);
3584 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3586 for(list=0; list<h->list_count; list++){
3587 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3589 if(get_bits1(&s->gb)){
3590 int pred= h->curr_pic_num;
3592 for(index=0; ; index++){
3593 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3594 unsigned int pic_id;
3596 Picture *ref = NULL;
3598 if(reordering_of_pic_nums_idc==3)
3601 if(index >= h->ref_count[list]){
3602 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3606 if(reordering_of_pic_nums_idc<3){
3607 if(reordering_of_pic_nums_idc<2){
3608 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3610 if(abs_diff_pic_num >= h->max_pic_num){
3611 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3615 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3616 else pred+= abs_diff_pic_num;
3617 pred &= h->max_pic_num - 1;
3619 for(i= h->short_ref_count-1; i>=0; i--){
3620 ref = h->short_ref[i];
3621 assert(ref->reference == 3);
3622 assert(!ref->long_ref);
3623 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3627 ref->pic_id= ref->frame_num;
3629 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3631 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3634 ref = h->long_ref[pic_id];
3636 ref->pic_id= pic_id;
3637 assert(ref->reference == 3);
3638 assert(ref->long_ref);
3646 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3647 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3649 for(i=index; i+1<h->ref_count[list]; i++){
3650 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3653 for(; i > index; i--){
3654 h->ref_list[list][i]= h->ref_list[list][i-1];
3656 h->ref_list[list][index]= *ref;
3659 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3665 for(list=0; list<h->list_count; list++){
3666 for(index= 0; index < h->ref_count[list]; index++){
3667 if(!h->ref_list[list][index].data[0])
3668 h->ref_list[list][index]= s->current_picture;
3672 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3673 direct_dist_scale_factor(h);
3674 direct_ref_list_init(h);
3678 static void fill_mbaff_ref_list(H264Context *h){
3680 for(list=0; list<2; list++){ //FIXME try list_count
3681 for(i=0; i<h->ref_count[list]; i++){
3682 Picture *frame = &h->ref_list[list][i];
3683 Picture *field = &h->ref_list[list][16+2*i];
3686 field[0].linesize[j] <<= 1;
3687 field[1] = field[0];
3689 field[1].data[j] += frame->linesize[j];
3691 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3692 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3694 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3695 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3699 for(j=0; j<h->ref_count[1]; j++){
3700 for(i=0; i<h->ref_count[0]; i++)
3701 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3702 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3703 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3707 static int pred_weight_table(H264Context *h){
3708 MpegEncContext * const s = &h->s;
3710 int luma_def, chroma_def;
3713 h->use_weight_chroma= 0;
3714 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3715 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3716 luma_def = 1<<h->luma_log2_weight_denom;
3717 chroma_def = 1<<h->chroma_log2_weight_denom;
3719 for(list=0; list<2; list++){
3720 for(i=0; i<h->ref_count[list]; i++){
3721 int luma_weight_flag, chroma_weight_flag;
3723 luma_weight_flag= get_bits1(&s->gb);
3724 if(luma_weight_flag){
3725 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3726 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3727 if( h->luma_weight[list][i] != luma_def
3728 || h->luma_offset[list][i] != 0)
3731 h->luma_weight[list][i]= luma_def;
3732 h->luma_offset[list][i]= 0;
3735 chroma_weight_flag= get_bits1(&s->gb);
3736 if(chroma_weight_flag){
3739 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3740 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3741 if( h->chroma_weight[list][i][j] != chroma_def
3742 || h->chroma_offset[list][i][j] != 0)
3743 h->use_weight_chroma= 1;
3748 h->chroma_weight[list][i][j]= chroma_def;
3749 h->chroma_offset[list][i][j]= 0;
3753 if(h->slice_type != B_TYPE) break;
3755 h->use_weight= h->use_weight || h->use_weight_chroma;
3759 static void implicit_weight_table(H264Context *h){
3760 MpegEncContext * const s = &h->s;
3762 int cur_poc = s->current_picture_ptr->poc;
3764 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3765 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3767 h->use_weight_chroma= 0;
3772 h->use_weight_chroma= 2;
3773 h->luma_log2_weight_denom= 5;
3774 h->chroma_log2_weight_denom= 5;
3776 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3777 int poc0 = h->ref_list[0][ref0].poc;
3778 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3779 int poc1 = h->ref_list[1][ref1].poc;
3780 int td = av_clip(poc1 - poc0, -128, 127);
3782 int tb = av_clip(cur_poc - poc0, -128, 127);
3783 int tx = (16384 + (FFABS(td) >> 1)) / td;
3784 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3785 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3786 h->implicit_weight[ref0][ref1] = 32;
3788 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3790 h->implicit_weight[ref0][ref1] = 32;
3795 static inline void unreference_pic(H264Context *h, Picture *pic){
3798 if(pic == h->delayed_output_pic)
3801 for(i = 0; h->delayed_pic[i]; i++)
3802 if(pic == h->delayed_pic[i]){
3810 * instantaneous decoder refresh.
3812 static void idr(H264Context *h){
3815 for(i=0; i<16; i++){
3816 if (h->long_ref[i] != NULL) {
3817 unreference_pic(h, h->long_ref[i]);
3818 h->long_ref[i]= NULL;
3821 h->long_ref_count=0;
3823 for(i=0; i<h->short_ref_count; i++){
3824 unreference_pic(h, h->short_ref[i]);
3825 h->short_ref[i]= NULL;
3827 h->short_ref_count=0;
3830 /* forget old pics after a seek */
3831 static void flush_dpb(AVCodecContext *avctx){
3832 H264Context *h= avctx->priv_data;
3834 for(i=0; i<16; i++) {
3835 if(h->delayed_pic[i])
3836 h->delayed_pic[i]->reference= 0;
3837 h->delayed_pic[i]= NULL;
3839 if(h->delayed_output_pic)
3840 h->delayed_output_pic->reference= 0;
3841 h->delayed_output_pic= NULL;
3843 if(h->s.current_picture_ptr)
3844 h->s.current_picture_ptr->reference= 0;
3849 * @return the removed picture or NULL if an error occurs
3851 static Picture * remove_short(H264Context *h, int frame_num){
3852 MpegEncContext * const s = &h->s;
3855 if(s->avctx->debug&FF_DEBUG_MMCO)
3856 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3858 for(i=0; i<h->short_ref_count; i++){
3859 Picture *pic= h->short_ref[i];
3860 if(s->avctx->debug&FF_DEBUG_MMCO)
3861 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3862 if(pic->frame_num == frame_num){
3863 h->short_ref[i]= NULL;
3864 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3865 h->short_ref_count--;
3874 * @return the removed picture or NULL if an error occurs
3876 static Picture * remove_long(H264Context *h, int i){
3879 pic= h->long_ref[i];
3880 h->long_ref[i]= NULL;
3881 if(pic) h->long_ref_count--;
3887 * print short term list
3889 static void print_short_term(H264Context *h) {
3891 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3892 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3893 for(i=0; i<h->short_ref_count; i++){
3894 Picture *pic= h->short_ref[i];
3895 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3901 * print long term list
3903 static void print_long_term(H264Context *h) {
3905 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3906 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3907 for(i = 0; i < 16; i++){
3908 Picture *pic= h->long_ref[i];
3910 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3917 * Executes the reference picture marking (memory management control operations).
3919 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3920 MpegEncContext * const s = &h->s;
3922 int current_is_long=0;
3925 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3926 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3928 for(i=0; i<mmco_count; i++){
3929 if(s->avctx->debug&FF_DEBUG_MMCO)
3930 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3932 switch(mmco[i].opcode){
3933 case MMCO_SHORT2UNUSED:
3934 pic= remove_short(h, mmco[i].short_frame_num);
3936 unreference_pic(h, pic);
3937 else if(s->avctx->debug&FF_DEBUG_MMCO)
3938 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3940 case MMCO_SHORT2LONG:
3941 pic= remove_long(h, mmco[i].long_index);
3942 if(pic) unreference_pic(h, pic);
3944 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3945 if (h->long_ref[ mmco[i].long_index ]){
3946 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3947 h->long_ref_count++;
3950 case MMCO_LONG2UNUSED:
3951 pic= remove_long(h, mmco[i].long_index);
3953 unreference_pic(h, pic);
3954 else if(s->avctx->debug&FF_DEBUG_MMCO)
3955 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3958 pic= remove_long(h, mmco[i].long_index);
3959 if(pic) unreference_pic(h, pic);
3961 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3962 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3963 h->long_ref_count++;
3967 case MMCO_SET_MAX_LONG:
3968 assert(mmco[i].long_index <= 16);
3969 // just remove the long term which index is greater than new max
3970 for(j = mmco[i].long_index; j<16; j++){
3971 pic = remove_long(h, j);
3972 if (pic) unreference_pic(h, pic);
3976 while(h->short_ref_count){
3977 pic= remove_short(h, h->short_ref[0]->frame_num);
3978 if(pic) unreference_pic(h, pic);
3980 for(j = 0; j < 16; j++) {
3981 pic= remove_long(h, j);
3982 if(pic) unreference_pic(h, pic);
3989 if(!current_is_long){
3990 pic= remove_short(h, s->current_picture_ptr->frame_num);
3992 unreference_pic(h, pic);
3993 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3996 if(h->short_ref_count)
3997 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3999 h->short_ref[0]= s->current_picture_ptr;
4000 h->short_ref[0]->long_ref=0;
4001 h->short_ref_count++;
4004 print_short_term(h);
4009 static int decode_ref_pic_marking(H264Context *h){
4010 MpegEncContext * const s = &h->s;
4013 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4014 s->broken_link= get_bits1(&s->gb) -1;
4015 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4016 if(h->mmco[0].long_index == -1)
4019 h->mmco[0].opcode= MMCO_LONG;
4023 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4024 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4025 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4027 h->mmco[i].opcode= opcode;
4028 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4029 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4030 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4031 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4035 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4036 unsigned int long_index= get_ue_golomb(&s->gb);
4037 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
4038 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4041 h->mmco[i].long_index= long_index;
4044 if(opcode > (unsigned)MMCO_LONG){
4045 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4048 if(opcode == MMCO_END)
4053 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4055 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4056 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4057 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4067 static int init_poc(H264Context *h){
4068 MpegEncContext * const s = &h->s;
4069 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4072 if(h->nal_unit_type == NAL_IDR_SLICE){
4073 h->frame_num_offset= 0;
4075 if(h->frame_num < h->prev_frame_num)
4076 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4078 h->frame_num_offset= h->prev_frame_num_offset;
4081 if(h->sps.poc_type==0){
4082 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4084 if(h->nal_unit_type == NAL_IDR_SLICE){
4089 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4090 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4091 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4092 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4094 h->poc_msb = h->prev_poc_msb;
4095 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4097 field_poc[1] = h->poc_msb + h->poc_lsb;
4098 if(s->picture_structure == PICT_FRAME)
4099 field_poc[1] += h->delta_poc_bottom;
4100 }else if(h->sps.poc_type==1){
4101 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4104 if(h->sps.poc_cycle_length != 0)
4105 abs_frame_num = h->frame_num_offset + h->frame_num;
4109 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4112 expected_delta_per_poc_cycle = 0;
4113 for(i=0; i < h->sps.poc_cycle_length; i++)
4114 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4116 if(abs_frame_num > 0){
4117 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4118 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4120 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4121 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4122 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4126 if(h->nal_ref_idc == 0)
4127 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4129 field_poc[0] = expectedpoc + h->delta_poc[0];
4130 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4132 if(s->picture_structure == PICT_FRAME)
4133 field_poc[1] += h->delta_poc[1];
4136 if(h->nal_unit_type == NAL_IDR_SLICE){
4139 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4140 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4146 if(s->picture_structure != PICT_BOTTOM_FIELD)
4147 s->current_picture_ptr->field_poc[0]= field_poc[0];
4148 if(s->picture_structure != PICT_TOP_FIELD)
4149 s->current_picture_ptr->field_poc[1]= field_poc[1];
4150 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4151 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4158 * initialize scan tables
4160 static void init_scan_tables(H264Context *h){
4161 MpegEncContext * const s = &h->s;
4163 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4164 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4165 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4167 for(i=0; i<16; i++){
4168 #define T(x) (x>>2) | ((x<<2) & 0xF)
4169 h->zigzag_scan[i] = T(zigzag_scan[i]);
4170 h-> field_scan[i] = T( field_scan[i]);
4174 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4175 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4176 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4177 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4178 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4180 for(i=0; i<64; i++){
4181 #define T(x) (x>>3) | ((x&7)<<3)
4182 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4183 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4184 h->field_scan8x8[i] = T(field_scan8x8[i]);
4185 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4189 if(h->sps.transform_bypass){ //FIXME same ugly
4190 h->zigzag_scan_q0 = zigzag_scan;
4191 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4192 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4193 h->field_scan_q0 = field_scan;
4194 h->field_scan8x8_q0 = field_scan8x8;
4195 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4197 h->zigzag_scan_q0 = h->zigzag_scan;
4198 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4199 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4200 h->field_scan_q0 = h->field_scan;
4201 h->field_scan8x8_q0 = h->field_scan8x8;
4202 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4206 * decodes a slice header.
4207 * this will allso call MPV_common_init() and frame_start() as needed
4209 static int decode_slice_header(H264Context *h){
4210 MpegEncContext * const s = &h->s;
4211 unsigned int first_mb_in_slice;
4212 unsigned int pps_id;
4213 int num_ref_idx_active_override_flag;
4214 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4215 unsigned int slice_type, tmp;
4216 int default_ref_list_done = 0;
4218 s->current_picture.reference= h->nal_ref_idc != 0;
4219 s->dropable= h->nal_ref_idc == 0;
4221 first_mb_in_slice= get_ue_golomb(&s->gb);
4223 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
4225 s->current_picture_ptr= NULL;
4228 slice_type= get_ue_golomb(&s->gb);
4230 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4235 h->slice_type_fixed=1;
4237 h->slice_type_fixed=0;
4239 slice_type= slice_type_map[ slice_type ];
4240 if (slice_type == I_TYPE
4241 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4242 default_ref_list_done = 1;
4244 h->slice_type= slice_type;
4246 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4248 pps_id= get_ue_golomb(&s->gb);
4249 if(pps_id>=MAX_PPS_COUNT){
4250 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4253 if(!h->pps_buffers[pps_id]) {
4254 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4257 h->pps= *h->pps_buffers[pps_id];
4259 if(!h->sps_buffers[h->pps.sps_id]) {
4260 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4263 h->sps = *h->sps_buffers[h->pps.sps_id];
4265 if(h->dequant_coeff_pps != pps_id){
4266 h->dequant_coeff_pps = pps_id;
4267 init_dequant_tables(h);
4270 s->mb_width= h->sps.mb_width;
4271 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4273 h->b_stride= s->mb_width*4;
4274 h->b8_stride= s->mb_width*2;
4276 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4277 if(h->sps.frame_mbs_only_flag)
4278 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4280 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4282 if (s->context_initialized
4283 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4287 if (!s->context_initialized) {
4288 if (MPV_common_init(s) < 0)
4291 init_scan_tables(h);
4294 s->avctx->width = s->width;
4295 s->avctx->height = s->height;
4296 s->avctx->sample_aspect_ratio= h->sps.sar;
4297 if(!s->avctx->sample_aspect_ratio.den)
4298 s->avctx->sample_aspect_ratio.den = 1;
4300 if(h->sps.timing_info_present_flag){
4301 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4302 if(h->x264_build > 0 && h->x264_build < 44)
4303 s->avctx->time_base.den *= 2;
4304 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4305 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4309 if(h->slice_num == 0){
4310 if(frame_start(h) < 0)
4314 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4315 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4318 h->mb_aff_frame = 0;
4319 if(h->sps.frame_mbs_only_flag){
4320 s->picture_structure= PICT_FRAME;
4322 if(get_bits1(&s->gb)) { //field_pic_flag
4323 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4324 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4326 s->picture_structure= PICT_FRAME;
4327 h->mb_aff_frame = h->sps.mb_aff;
4330 assert(s->mb_num == s->mb_width * s->mb_height);
4331 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
4332 first_mb_in_slice >= s->mb_num){
4333 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4336 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4337 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4338 assert(s->mb_y < s->mb_height);
4340 if(s->picture_structure==PICT_FRAME){
4341 h->curr_pic_num= h->frame_num;
4342 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4344 h->curr_pic_num= 2*h->frame_num;
4345 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4348 if(h->nal_unit_type == NAL_IDR_SLICE){
4349 get_ue_golomb(&s->gb); /* idr_pic_id */
4352 if(h->sps.poc_type==0){
4353 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4355 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4356 h->delta_poc_bottom= get_se_golomb(&s->gb);
4360 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4361 h->delta_poc[0]= get_se_golomb(&s->gb);
4363 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4364 h->delta_poc[1]= get_se_golomb(&s->gb);
4369 if(h->pps.redundant_pic_cnt_present){
4370 h->redundant_pic_count= get_ue_golomb(&s->gb);
4373 //set defaults, might be overriden a few line later
4374 h->ref_count[0]= h->pps.ref_count[0];
4375 h->ref_count[1]= h->pps.ref_count[1];
4377 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4378 if(h->slice_type == B_TYPE){
4379 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4380 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4381 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4383 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4385 if(num_ref_idx_active_override_flag){
4386 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4387 if(h->slice_type==B_TYPE)
4388 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4390 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4391 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4392 h->ref_count[0]= h->ref_count[1]= 1;
4396 if(h->slice_type == B_TYPE)
4403 if(!default_ref_list_done){
4404 fill_default_ref_list(h);
4407 if(decode_ref_pic_list_reordering(h) < 0)
4410 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4411 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4412 pred_weight_table(h);
4413 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4414 implicit_weight_table(h);
4418 if(s->current_picture.reference)
4419 decode_ref_pic_marking(h);
4422 fill_mbaff_ref_list(h);
4424 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4425 tmp = get_ue_golomb(&s->gb);
4427 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4430 h->cabac_init_idc= tmp;
4433 h->last_qscale_diff = 0;
4434 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4436 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4440 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4441 //FIXME qscale / qp ... stuff
4442 if(h->slice_type == SP_TYPE){
4443 get_bits1(&s->gb); /* sp_for_switch_flag */
4445 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4446 get_se_golomb(&s->gb); /* slice_qs_delta */
4449 h->deblocking_filter = 1;
4450 h->slice_alpha_c0_offset = 0;
4451 h->slice_beta_offset = 0;
4452 if( h->pps.deblocking_filter_parameters_present ) {
4453 tmp= get_ue_golomb(&s->gb);
4455 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4458 h->deblocking_filter= tmp;
4459 if(h->deblocking_filter < 2)
4460 h->deblocking_filter^= 1; // 1<->0
4462 if( h->deblocking_filter ) {
4463 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4464 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4467 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4468 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4469 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4470 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4471 h->deblocking_filter= 0;
4474 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4475 slice_group_change_cycle= get_bits(&s->gb, ?);
4480 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4481 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4483 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4484 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4486 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4488 av_get_pict_type_char(h->slice_type),
4489 pps_id, h->frame_num,
4490 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4491 h->ref_count[0], h->ref_count[1],
4493 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4495 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4499 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4500 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4501 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4503 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4504 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4513 static inline int get_level_prefix(GetBitContext *gb){
4517 OPEN_READER(re, gb);
4518 UPDATE_CACHE(re, gb);
4519 buf=GET_CACHE(re, gb);
4521 log= 32 - av_log2(buf);
4523 print_bin(buf>>(32-log), log);
4524 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4527 LAST_SKIP_BITS(re, gb, log);
4528 CLOSE_READER(re, gb);
4533 static inline int get_dct8x8_allowed(H264Context *h){
4536 if(!IS_SUB_8X8(h->sub_mb_type[i])
4537 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4544 * decodes a residual block.
4545 * @param n block index
4546 * @param scantable scantable
4547 * @param max_coeff number of coefficients in the block
4548 * @return <0 if an error occured
4550 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4551 MpegEncContext * const s = &h->s;
4552 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4554 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4556 //FIXME put trailing_onex into the context
4558 if(n == CHROMA_DC_BLOCK_INDEX){
4559 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4560 total_coeff= coeff_token>>2;
4562 if(n == LUMA_DC_BLOCK_INDEX){
4563 total_coeff= pred_non_zero_count(h, 0);
4564 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4565 total_coeff= coeff_token>>2;
4567 total_coeff= pred_non_zero_count(h, n);
4568 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4569 total_coeff= coeff_token>>2;
4570 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4574 //FIXME set last_non_zero?
4578 if(total_coeff > (unsigned)max_coeff) {
4579 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4583 trailing_ones= coeff_token&3;
4584 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4585 assert(total_coeff<=16);
4587 for(i=0; i<trailing_ones; i++){
4588 level[i]= 1 - 2*get_bits1(gb);
4592 int level_code, mask;
4593 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4594 int prefix= get_level_prefix(gb);
4596 //first coefficient has suffix_length equal to 0 or 1
4597 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4599 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4601 level_code= (prefix<<suffix_length); //part
4602 }else if(prefix==14){
4604 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4606 level_code= prefix + get_bits(gb, 4); //part
4607 }else if(prefix==15){
4608 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4609 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4611 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4615 if(trailing_ones < 3) level_code += 2;
4620 mask= -(level_code&1);
4621 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4624 //remaining coefficients have suffix_length > 0
4625 for(;i<total_coeff;i++) {
4626 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4627 prefix = get_level_prefix(gb);
4629 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4630 }else if(prefix==15){
4631 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4633 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4636 mask= -(level_code&1);
4637 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4638 if(level_code > suffix_limit[suffix_length])
4643 if(total_coeff == max_coeff)
4646 if(n == CHROMA_DC_BLOCK_INDEX)
4647 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4649 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4652 coeff_num = zeros_left + total_coeff - 1;
4653 j = scantable[coeff_num];
4655 block[j] = level[0];
4656 for(i=1;i<total_coeff;i++) {
4659 else if(zeros_left < 7){
4660 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4662 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4664 zeros_left -= run_before;
4665 coeff_num -= 1 + run_before;
4666 j= scantable[ coeff_num ];
4671 block[j] = (level[0] * qmul[j] + 32)>>6;
4672 for(i=1;i<total_coeff;i++) {
4675 else if(zeros_left < 7){
4676 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4678 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4680 zeros_left -= run_before;
4681 coeff_num -= 1 + run_before;
4682 j= scantable[ coeff_num ];
4684 block[j]= (level[i] * qmul[j] + 32)>>6;
4689 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4696 static void predict_field_decoding_flag(H264Context *h){
4697 MpegEncContext * const s = &h->s;
4698 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4699 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4700 ? s->current_picture.mb_type[mb_xy-1]
4701 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4702 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4704 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4708 * decodes a P_SKIP or B_SKIP macroblock
4710 static void decode_mb_skip(H264Context *h){
4711 MpegEncContext * const s = &h->s;
4712 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4715 memset(h->non_zero_count[mb_xy], 0, 16);
4716 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4719 mb_type|= MB_TYPE_INTERLACED;
4721 if( h->slice_type == B_TYPE )
4723 // just for fill_caches. pred_direct_motion will set the real mb_type
4724 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4726 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4727 pred_direct_motion(h, &mb_type);
4728 mb_type|= MB_TYPE_SKIP;
4733 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4735 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4736 pred_pskip_motion(h, &mx, &my);
4737 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4738 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4741 write_back_motion(h, mb_type);
4742 s->current_picture.mb_type[mb_xy]= mb_type;
4743 s->current_picture.qscale_table[mb_xy]= s->qscale;
4744 h->slice_table[ mb_xy ]= h->slice_num;
4745 h->prev_mb_skipped= 1;
4749 * decodes a macroblock
4750 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4752 static int decode_mb_cavlc(H264Context *h){
4753 MpegEncContext * const s = &h->s;
4754 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4755 int partition_count;
4756 unsigned int mb_type, cbp;
4757 int dct8x8_allowed= h->pps.transform_8x8_mode;
4759 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4761 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4762 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4764 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4765 if(s->mb_skip_run==-1)
4766 s->mb_skip_run= get_ue_golomb(&s->gb);
4768 if (s->mb_skip_run--) {
4769 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4770 if(s->mb_skip_run==0)
4771 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4773 predict_field_decoding_flag(h);
4780 if( (s->mb_y&1) == 0 )
4781 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4783 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4785 h->prev_mb_skipped= 0;
4787 mb_type= get_ue_golomb(&s->gb);
4788 if(h->slice_type == B_TYPE){
4790 partition_count= b_mb_type_info[mb_type].partition_count;
4791 mb_type= b_mb_type_info[mb_type].type;
4794 goto decode_intra_mb;
4796 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4798 partition_count= p_mb_type_info[mb_type].partition_count;
4799 mb_type= p_mb_type_info[mb_type].type;
4802 goto decode_intra_mb;
4805 assert(h->slice_type == I_TYPE);
4808 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4812 cbp= i_mb_type_info[mb_type].cbp;
4813 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4814 mb_type= i_mb_type_info[mb_type].type;
4818 mb_type |= MB_TYPE_INTERLACED;
4820 h->slice_table[ mb_xy ]= h->slice_num;
4822 if(IS_INTRA_PCM(mb_type)){
4825 // We assume these blocks are very rare so we do not optimize it.
4826 align_get_bits(&s->gb);
4828 // The pixels are stored in the same order as levels in h->mb array.
4829 for(y=0; y<16; y++){
4830 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4831 for(x=0; x<16; x++){
4832 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4833 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4837 const int index= 256 + 4*(y&3) + 32*(y>>2);
4839 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4840 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4844 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4846 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4847 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4851 // In deblocking, the quantizer is 0
4852 s->current_picture.qscale_table[mb_xy]= 0;
4853 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4854 // All coeffs are present
4855 memset(h->non_zero_count[mb_xy], 16, 16);
4857 s->current_picture.mb_type[mb_xy]= mb_type;
4862 h->ref_count[0] <<= 1;
4863 h->ref_count[1] <<= 1;
4866 fill_caches(h, mb_type, 0);
4869 if(IS_INTRA(mb_type)){
4871 // init_top_left_availability(h);
4872 if(IS_INTRA4x4(mb_type)){
4875 if(dct8x8_allowed && get_bits1(&s->gb)){
4876 mb_type |= MB_TYPE_8x8DCT;
4880 // fill_intra4x4_pred_table(h);
4881 for(i=0; i<16; i+=di){
4882 int mode= pred_intra_mode(h, i);
4884 if(!get_bits1(&s->gb)){
4885 const int rem_mode= get_bits(&s->gb, 3);
4886 mode = rem_mode + (rem_mode >= mode);
4890 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4892 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4894 write_back_intra_pred_mode(h);
4895 if( check_intra4x4_pred_mode(h) < 0)
4898 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4899 if(h->intra16x16_pred_mode < 0)
4903 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4906 h->chroma_pred_mode= pred_mode;
4907 }else if(partition_count==4){
4908 int i, j, sub_partition_count[4], list, ref[2][4];
4910 if(h->slice_type == B_TYPE){
4912 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4913 if(h->sub_mb_type[i] >=13){
4914 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4917 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4918 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4920 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4921 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4922 pred_direct_motion(h, &mb_type);
4923 h->ref_cache[0][scan8[4]] =
4924 h->ref_cache[1][scan8[4]] =
4925 h->ref_cache[0][scan8[12]] =
4926 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4929 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4931 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4932 if(h->sub_mb_type[i] >=4){
4933 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4936 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4937 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4941 for(list=0; list<h->list_count; list++){
4942 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4944 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4945 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4946 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4948 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4960 dct8x8_allowed = get_dct8x8_allowed(h);
4962 for(list=0; list<h->list_count; list++){
4964 if(IS_DIRECT(h->sub_mb_type[i])) {
4965 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4968 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4969 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4971 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4972 const int sub_mb_type= h->sub_mb_type[i];
4973 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4974 for(j=0; j<sub_partition_count[i]; j++){
4976 const int index= 4*i + block_width*j;
4977 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4978 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4979 mx += get_se_golomb(&s->gb);
4980 my += get_se_golomb(&s->gb);
4981 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4983 if(IS_SUB_8X8(sub_mb_type)){
4985 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4987 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4988 }else if(IS_SUB_8X4(sub_mb_type)){
4989 mv_cache[ 1 ][0]= mx;
4990 mv_cache[ 1 ][1]= my;
4991 }else if(IS_SUB_4X8(sub_mb_type)){
4992 mv_cache[ 8 ][0]= mx;
4993 mv_cache[ 8 ][1]= my;
4995 mv_cache[ 0 ][0]= mx;
4996 mv_cache[ 0 ][1]= my;
4999 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5005 }else if(IS_DIRECT(mb_type)){
5006 pred_direct_motion(h, &mb_type);
5007 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5009 int list, mx, my, i;
5010 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5011 if(IS_16X16(mb_type)){
5012 for(list=0; list<h->list_count; list++){
5014 if(IS_DIR(mb_type, 0, list)){
5015 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5016 if(val >= h->ref_count[list]){
5017 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5021 val= LIST_NOT_USED&0xFF;
5022 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5024 for(list=0; list<h->list_count; list++){
5026 if(IS_DIR(mb_type, 0, list)){
5027 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5028 mx += get_se_golomb(&s->gb);
5029 my += get_se_golomb(&s->gb);
5030 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5032 val= pack16to32(mx,my);
5035 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
5038 else if(IS_16X8(mb_type)){
5039 for(list=0; list<h->list_count; list++){
5042 if(IS_DIR(mb_type, i, list)){
5043 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5044 if(val >= h->ref_count[list]){
5045 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5049 val= LIST_NOT_USED&0xFF;
5050 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5053 for(list=0; list<h->list_count; list++){
5056 if(IS_DIR(mb_type, i, list)){
5057 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5058 mx += get_se_golomb(&s->gb);
5059 my += get_se_golomb(&s->gb);
5060 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5062 val= pack16to32(mx,my);
5065 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
5069 assert(IS_8X16(mb_type));
5070 for(list=0; list<h->list_count; list++){
5073 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5074 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5075 if(val >= h->ref_count[list]){
5076 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5080 val= LIST_NOT_USED&0xFF;
5081 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5084 for(list=0; list<h->list_count; list++){
5087 if(IS_DIR(mb_type, i, list)){
5088 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5089 mx += get_se_golomb(&s->gb);
5090 my += get_se_golomb(&s->gb);
5091 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5093 val= pack16to32(mx,my);
5096 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
5102 if(IS_INTER(mb_type))
5103 write_back_motion(h, mb_type);
5105 if(!IS_INTRA16x16(mb_type)){
5106 cbp= get_ue_golomb(&s->gb);
5108 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
5112 if(IS_INTRA4x4(mb_type))
5113 cbp= golomb_to_intra4x4_cbp[cbp];
5115 cbp= golomb_to_inter_cbp[cbp];
5119 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5120 if(get_bits1(&s->gb))
5121 mb_type |= MB_TYPE_8x8DCT;
5123 s->current_picture.mb_type[mb_xy]= mb_type;
5125 if(cbp || IS_INTRA16x16(mb_type)){
5126 int i8x8, i4x4, chroma_idx;
5127 int chroma_qp, dquant;
5128 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5129 const uint8_t *scan, *scan8x8, *dc_scan;
5131 // fill_non_zero_count_cache(h);
5133 if(IS_INTERLACED(mb_type)){
5134 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5135 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5136 dc_scan= luma_dc_field_scan;
5138 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5139 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5140 dc_scan= luma_dc_zigzag_scan;
5143 dquant= get_se_golomb(&s->gb);
5145 if( dquant > 25 || dquant < -26 ){
5146 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5150 s->qscale += dquant;
5151 if(((unsigned)s->qscale) > 51){
5152 if(s->qscale<0) s->qscale+= 52;
5153 else s->qscale-= 52;
5156 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5157 if(IS_INTRA16x16(mb_type)){
5158 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5159 return -1; //FIXME continue if partitioned and other return -1 too
5162 assert((cbp&15) == 0 || (cbp&15) == 15);
5165 for(i8x8=0; i8x8<4; i8x8++){
5166 for(i4x4=0; i4x4<4; i4x4++){
5167 const int index= i4x4 + 4*i8x8;
5168 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5174 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5177 for(i8x8=0; i8x8<4; i8x8++){
5178 if(cbp & (1<<i8x8)){
5179 if(IS_8x8DCT(mb_type)){
5180 DCTELEM *buf = &h->mb[64*i8x8];
5182 for(i4x4=0; i4x4<4; i4x4++){
5183 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5184 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5187 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5188 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5190 for(i4x4=0; i4x4<4; i4x4++){
5191 const int index= i4x4 + 4*i8x8;
5193 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5199 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5200 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5206 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5207 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5213 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5214 for(i4x4=0; i4x4<4; i4x4++){
5215 const int index= 16 + 4*chroma_idx + i4x4;
5216 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5222 uint8_t * const nnz= &h->non_zero_count_cache[0];
5223 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5224 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5227 uint8_t * const nnz= &h->non_zero_count_cache[0];
5228 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5229 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5230 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5232 s->current_picture.qscale_table[mb_xy]= s->qscale;
5233 write_back_non_zero_count(h);
5236 h->ref_count[0] >>= 1;
5237 h->ref_count[1] >>= 1;
5243 static int decode_cabac_field_decoding_flag(H264Context *h) {
5244 MpegEncContext * const s = &h->s;
5245 const int mb_x = s->mb_x;
5246 const int mb_y = s->mb_y & ~1;
5247 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5248 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5250 unsigned int ctx = 0;
5252 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5255 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5259 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5262 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5263 uint8_t *state= &h->cabac_state[ctx_base];
5267 MpegEncContext * const s = &h->s;
5268 const int mba_xy = h->left_mb_xy[0];
5269 const int mbb_xy = h->top_mb_xy;
5271 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5273 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5275 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5276 return 0; /* I4x4 */
5279 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5280 return 0; /* I4x4 */
5283 if( get_cabac_terminate( &h->cabac ) )
5284 return 25; /* PCM */
5286 mb_type = 1; /* I16x16 */
5287 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5288 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5289 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5290 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5291 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5295 static int decode_cabac_mb_type( H264Context *h ) {
5296 MpegEncContext * const s = &h->s;
5298 if( h->slice_type == I_TYPE ) {
5299 return decode_cabac_intra_mb_type(h, 3, 1);
5300 } else if( h->slice_type == P_TYPE ) {
5301 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5303 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5304 /* P_L0_D16x16, P_8x8 */
5305 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5307 /* P_L0_D8x16, P_L0_D16x8 */
5308 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5311 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5313 } else if( h->slice_type == B_TYPE ) {
5314 const int mba_xy = h->left_mb_xy[0];
5315 const int mbb_xy = h->top_mb_xy;
5319 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5321 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5324 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5325 return 0; /* B_Direct_16x16 */
5327 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5328 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5331 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5332 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5333 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5334 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5336 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5337 else if( bits == 13 ) {
5338 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5339 } else if( bits == 14 )
5340 return 11; /* B_L1_L0_8x16 */
5341 else if( bits == 15 )
5342 return 22; /* B_8x8 */
5344 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5345 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5347 /* TODO SI/SP frames? */
5352 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5353 MpegEncContext * const s = &h->s;
5357 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5358 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5361 && h->slice_table[mba_xy] == h->slice_num
5362 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5363 mba_xy += s->mb_stride;
5365 mbb_xy = mb_xy - s->mb_stride;
5367 && h->slice_table[mbb_xy] == h->slice_num
5368 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5369 mbb_xy -= s->mb_stride;
5371 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5373 int mb_xy = mb_x + mb_y*s->mb_stride;
5375 mbb_xy = mb_xy - s->mb_stride;
5378 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5380 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5383 if( h->slice_type == B_TYPE )
5385 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5388 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5391 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5394 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5395 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5396 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5398 if( mode >= pred_mode )
5404 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5405 const int mba_xy = h->left_mb_xy[0];
5406 const int mbb_xy = h->top_mb_xy;
5410 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5411 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5414 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5417 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5420 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5422 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5428 static const uint8_t block_idx_x[16] = {
5429 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5431 static const uint8_t block_idx_y[16] = {
5432 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5434 static const uint8_t block_idx_xy[4][4] = {
5441 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5446 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5448 tprintf(h->s.avctx, "cbp_b = top_cbp = %x\n", cbp_b);
5451 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5456 x = block_idx_x[4*i8x8];
5457 y = block_idx_y[4*i8x8];
5461 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5462 cbp_a = h->left_cbp;
5463 tprintf(h->s.avctx, "cbp_a = left_cbp = %x\n", cbp_a);
5469 /* No need to test for skip as we put 0 for skip block */
5470 /* No need to test for IPCM as we put 1 for IPCM block */
5472 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5473 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5478 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5479 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5483 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5489 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5493 cbp_a = (h->left_cbp>>4)&0x03;
5494 cbp_b = (h-> top_cbp>>4)&0x03;
5497 if( cbp_a > 0 ) ctx++;
5498 if( cbp_b > 0 ) ctx += 2;
5499 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5503 if( cbp_a == 2 ) ctx++;
5504 if( cbp_b == 2 ) ctx += 2;
5505 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5507 static int decode_cabac_mb_dqp( H264Context *h) {
5508 MpegEncContext * const s = &h->s;
5514 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5516 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5518 if( h->last_qscale_diff != 0 )
5521 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5527 if(val > 102) //prevent infinite loop
5534 return -(val + 1)/2;
5536 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5537 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5539 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5541 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5545 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5547 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5548 return 0; /* B_Direct_8x8 */
5549 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5550 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5552 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5553 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5554 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5557 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5558 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5562 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5563 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5566 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5567 int refa = h->ref_cache[list][scan8[n] - 1];
5568 int refb = h->ref_cache[list][scan8[n] - 8];
5572 if( h->slice_type == B_TYPE) {
5573 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5575 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5584 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5590 if(ref >= 32 /*h->ref_list[list]*/){
5591 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5592 return 0; //FIXME we should return -1 and check the return everywhere
5598 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5599 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5600 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5601 int ctxbase = (l == 0) ? 40 : 47;
5606 else if( amvd > 32 )
5611 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5616 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5624 while( get_cabac_bypass( &h->cabac ) ) {
5628 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5633 if( get_cabac_bypass( &h->cabac ) )
5637 return get_cabac_bypass_sign( &h->cabac, -mvd );
5640 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5645 nza = h->left_cbp&0x100;
5646 nzb = h-> top_cbp&0x100;
5647 } else if( cat == 1 || cat == 2 ) {
5648 nza = h->non_zero_count_cache[scan8[idx] - 1];
5649 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5650 } else if( cat == 3 ) {
5651 nza = (h->left_cbp>>(6+idx))&0x01;
5652 nzb = (h-> top_cbp>>(6+idx))&0x01;
5655 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5656 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5665 return ctx + 4 * cat;
5668 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5669 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5670 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5671 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5672 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5675 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5676 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5677 static const int significant_coeff_flag_offset[2][6] = {
5678 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5679 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5681 static const int last_coeff_flag_offset[2][6] = {
5682 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5683 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5685 static const int coeff_abs_level_m1_offset[6] = {
5686 227+0, 227+10, 227+20, 227+30, 227+39, 426
5688 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5689 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5690 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5691 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5692 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5693 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5694 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5695 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5696 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5702 int coeff_count = 0;
5705 int abslevelgt1 = 0;
5707 uint8_t *significant_coeff_ctx_base;
5708 uint8_t *last_coeff_ctx_base;
5709 uint8_t *abs_level_m1_ctx_base;
5712 #define CABAC_ON_STACK
5714 #ifdef CABAC_ON_STACK
5717 cc.range = h->cabac.range;
5718 cc.low = h->cabac.low;
5719 cc.bytestream= h->cabac.bytestream;
5721 #define CC &h->cabac
5725 /* cat: 0-> DC 16x16 n = 0
5726 * 1-> AC 16x16 n = luma4x4idx
5727 * 2-> Luma4x4 n = luma4x4idx
5728 * 3-> DC Chroma n = iCbCr
5729 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5730 * 5-> Luma8x8 n = 4 * luma8x8idx
5733 /* read coded block flag */
5735 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5736 if( cat == 1 || cat == 2 )
5737 h->non_zero_count_cache[scan8[n]] = 0;
5739 h->non_zero_count_cache[scan8[16+n]] = 0;
5740 #ifdef CABAC_ON_STACK
5741 h->cabac.range = cc.range ;
5742 h->cabac.low = cc.low ;
5743 h->cabac.bytestream= cc.bytestream;
5749 significant_coeff_ctx_base = h->cabac_state
5750 + significant_coeff_flag_offset[MB_FIELD][cat];
5751 last_coeff_ctx_base = h->cabac_state
5752 + last_coeff_flag_offset[MB_FIELD][cat];
5753 abs_level_m1_ctx_base = h->cabac_state
5754 + coeff_abs_level_m1_offset[cat];
5757 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5758 for(last= 0; last < coefs; last++) { \
5759 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5760 if( get_cabac( CC, sig_ctx )) { \
5761 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5762 index[coeff_count++] = last; \
5763 if( get_cabac( CC, last_ctx ) ) { \
5769 if( last == max_coeff -1 ) {\
5770 index[coeff_count++] = last;\
5772 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5773 #if defined(ARCH_X86) && defined(CONFIG_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5774 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5776 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5778 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5780 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5783 assert(coeff_count > 0);
5786 h->cbp_table[mb_xy] |= 0x100;
5787 else if( cat == 1 || cat == 2 )
5788 h->non_zero_count_cache[scan8[n]] = coeff_count;
5790 h->cbp_table[mb_xy] |= 0x40 << n;
5792 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5795 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5798 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5799 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5800 int j= scantable[index[coeff_count]];
5802 if( get_cabac( CC, ctx ) == 0 ) {
5804 block[j] = get_cabac_bypass_sign( CC, -1);
5806 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5812 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5813 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5817 if( coeff_abs >= 15 ) {
5819 while( get_cabac_bypass( CC ) ) {
5825 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5831 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5832 else block[j] = coeff_abs;
5834 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5835 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5841 #ifdef CABAC_ON_STACK
5842 h->cabac.range = cc.range ;
5843 h->cabac.low = cc.low ;
5844 h->cabac.bytestream= cc.bytestream;
5849 static inline void compute_mb_neighbors(H264Context *h)
5851 MpegEncContext * const s = &h->s;
5852 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5853 h->top_mb_xy = mb_xy - s->mb_stride;
5854 h->left_mb_xy[0] = mb_xy - 1;
5856 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5857 const int top_pair_xy = pair_xy - s->mb_stride;
5858 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5859 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5860 const int curr_mb_frame_flag = !MB_FIELD;
5861 const int bottom = (s->mb_y & 1);
5863 ? !curr_mb_frame_flag // bottom macroblock
5864 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5866 h->top_mb_xy -= s->mb_stride;
5868 if (left_mb_frame_flag != curr_mb_frame_flag) {
5869 h->left_mb_xy[0] = pair_xy - 1;
5876 * decodes a macroblock
5877 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5879 static int decode_mb_cabac(H264Context *h) {
5880 MpegEncContext * const s = &h->s;
5881 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5882 int mb_type, partition_count, cbp = 0;
5883 int dct8x8_allowed= h->pps.transform_8x8_mode;
5885 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5887 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5888 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5890 /* a skipped mb needs the aff flag from the following mb */
5891 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5892 predict_field_decoding_flag(h);
5893 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5894 skip = h->next_mb_skipped;
5896 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5897 /* read skip flags */
5899 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5900 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5901 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5902 if(h->next_mb_skipped)
5903 predict_field_decoding_flag(h);
5905 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5910 h->cbp_table[mb_xy] = 0;
5911 h->chroma_pred_mode_table[mb_xy] = 0;
5912 h->last_qscale_diff = 0;
5919 if( (s->mb_y&1) == 0 )
5921 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5923 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5925 h->prev_mb_skipped = 0;
5927 compute_mb_neighbors(h);
5928 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5929 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5933 if( h->slice_type == B_TYPE ) {
5935 partition_count= b_mb_type_info[mb_type].partition_count;
5936 mb_type= b_mb_type_info[mb_type].type;
5939 goto decode_intra_mb;
5941 } else if( h->slice_type == P_TYPE ) {
5943 partition_count= p_mb_type_info[mb_type].partition_count;
5944 mb_type= p_mb_type_info[mb_type].type;
5947 goto decode_intra_mb;
5950 assert(h->slice_type == I_TYPE);
5952 partition_count = 0;
5953 cbp= i_mb_type_info[mb_type].cbp;
5954 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5955 mb_type= i_mb_type_info[mb_type].type;
5958 mb_type |= MB_TYPE_INTERLACED;
5960 h->slice_table[ mb_xy ]= h->slice_num;
5962 if(IS_INTRA_PCM(mb_type)) {
5966 // We assume these blocks are very rare so we do not optimize it.
5967 // FIXME The two following lines get the bitstream position in the cabac
5968 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5969 ptr= h->cabac.bytestream;
5970 if(h->cabac.low&0x1) ptr--;
5972 if(h->cabac.low&0x1FF) ptr--;
5975 // The pixels are stored in the same order as levels in h->mb array.
5976 for(y=0; y<16; y++){
5977 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5978 for(x=0; x<16; x++){
5979 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5980 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5984 const int index= 256 + 4*(y&3) + 32*(y>>2);
5986 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5987 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5991 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5993 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5994 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5998 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6000 // All blocks are present
6001 h->cbp_table[mb_xy] = 0x1ef;
6002 h->chroma_pred_mode_table[mb_xy] = 0;
6003 // In deblocking, the quantizer is 0
6004 s->current_picture.qscale_table[mb_xy]= 0;
6005 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6006 // All coeffs are present
6007 memset(h->non_zero_count[mb_xy], 16, 16);
6008 s->current_picture.mb_type[mb_xy]= mb_type;
6013 h->ref_count[0] <<= 1;
6014 h->ref_count[1] <<= 1;
6017 fill_caches(h, mb_type, 0);
6019 if( IS_INTRA( mb_type ) ) {
6021 if( IS_INTRA4x4( mb_type ) ) {
6022 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6023 mb_type |= MB_TYPE_8x8DCT;
6024 for( i = 0; i < 16; i+=4 ) {
6025 int pred = pred_intra_mode( h, i );
6026 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6027 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6030 for( i = 0; i < 16; i++ ) {
6031 int pred = pred_intra_mode( h, i );
6032 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6034 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6037 write_back_intra_pred_mode(h);
6038 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6040 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6041 if( h->intra16x16_pred_mode < 0 ) return -1;
6043 h->chroma_pred_mode_table[mb_xy] =
6044 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6046 pred_mode= check_intra_pred_mode( h, pred_mode );
6047 if( pred_mode < 0 ) return -1;
6048 h->chroma_pred_mode= pred_mode;
6049 } else if( partition_count == 4 ) {
6050 int i, j, sub_partition_count[4], list, ref[2][4];
6052 if( h->slice_type == B_TYPE ) {
6053 for( i = 0; i < 4; i++ ) {
6054 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6055 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6056 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6058 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6059 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6060 pred_direct_motion(h, &mb_type);
6061 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6062 for( i = 0; i < 4; i++ )
6063 if( IS_DIRECT(h->sub_mb_type[i]) )
6064 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6068 for( i = 0; i < 4; i++ ) {
6069 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6070 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6071 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6075 for( list = 0; list < h->list_count; list++ ) {
6076 for( i = 0; i < 4; i++ ) {
6077 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6078 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6079 if( h->ref_count[list] > 1 )
6080 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6086 h->ref_cache[list][ scan8[4*i]+1 ]=
6087 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6092 dct8x8_allowed = get_dct8x8_allowed(h);
6094 for(list=0; list<h->list_count; list++){
6096 if(IS_DIRECT(h->sub_mb_type[i])){
6097 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6100 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6102 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6103 const int sub_mb_type= h->sub_mb_type[i];
6104 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6105 for(j=0; j<sub_partition_count[i]; j++){
6108 const int index= 4*i + block_width*j;
6109 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6110 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6111 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6113 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6114 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6115 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6117 if(IS_SUB_8X8(sub_mb_type)){
6119 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6121 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6124 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6126 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6127 }else if(IS_SUB_8X4(sub_mb_type)){
6128 mv_cache[ 1 ][0]= mx;
6129 mv_cache[ 1 ][1]= my;
6131 mvd_cache[ 1 ][0]= mx - mpx;
6132 mvd_cache[ 1 ][1]= my - mpy;
6133 }else if(IS_SUB_4X8(sub_mb_type)){
6134 mv_cache[ 8 ][0]= mx;
6135 mv_cache[ 8 ][1]= my;
6137 mvd_cache[ 8 ][0]= mx - mpx;
6138 mvd_cache[ 8 ][1]= my - mpy;
6140 mv_cache[ 0 ][0]= mx;
6141 mv_cache[ 0 ][1]= my;
6143 mvd_cache[ 0 ][0]= mx - mpx;
6144 mvd_cache[ 0 ][1]= my - mpy;
6147 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6148 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6149 p[0] = p[1] = p[8] = p[9] = 0;
6150 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6154 } else if( IS_DIRECT(mb_type) ) {
6155 pred_direct_motion(h, &mb_type);
6156 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6157 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6158 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6160 int list, mx, my, i, mpx, mpy;
6161 if(IS_16X16(mb_type)){
6162 for(list=0; list<h->list_count; list++){
6163 if(IS_DIR(mb_type, 0, list)){
6164 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6165 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6167 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
6169 for(list=0; list<h->list_count; list++){
6170 if(IS_DIR(mb_type, 0, list)){
6171 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6173 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6174 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6175 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6177 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6178 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6180 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6183 else if(IS_16X8(mb_type)){
6184 for(list=0; list<h->list_count; list++){
6186 if(IS_DIR(mb_type, i, list)){
6187 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6188 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6190 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6193 for(list=0; list<h->list_count; list++){
6195 if(IS_DIR(mb_type, i, list)){
6196 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6197 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6198 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6199 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6201 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6202 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6204 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6205 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6210 assert(IS_8X16(mb_type));
6211 for(list=0; list<h->list_count; list++){
6213 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6214 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6215 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6217 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6220 for(list=0; list<h->list_count; list++){
6222 if(IS_DIR(mb_type, i, list)){
6223 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6224 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6225 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6227 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6228 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6229 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6231 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6232 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6239 if( IS_INTER( mb_type ) ) {
6240 h->chroma_pred_mode_table[mb_xy] = 0;
6241 write_back_motion( h, mb_type );
6244 if( !IS_INTRA16x16( mb_type ) ) {
6245 cbp = decode_cabac_mb_cbp_luma( h );
6246 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6249 h->cbp_table[mb_xy] = h->cbp = cbp;
6251 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6252 if( decode_cabac_mb_transform_size( h ) )
6253 mb_type |= MB_TYPE_8x8DCT;
6255 s->current_picture.mb_type[mb_xy]= mb_type;
6257 if( cbp || IS_INTRA16x16( mb_type ) ) {
6258 const uint8_t *scan, *scan8x8, *dc_scan;
6261 if(IS_INTERLACED(mb_type)){
6262 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6263 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6264 dc_scan= luma_dc_field_scan;
6266 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6267 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6268 dc_scan= luma_dc_zigzag_scan;
6271 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6272 if( dqp == INT_MIN ){
6273 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6277 if(((unsigned)s->qscale) > 51){
6278 if(s->qscale<0) s->qscale+= 52;
6279 else s->qscale-= 52;
6281 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6283 if( IS_INTRA16x16( mb_type ) ) {
6285 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6286 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6289 for( i = 0; i < 16; i++ ) {
6290 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6291 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6295 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6299 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6300 if( cbp & (1<<i8x8) ) {
6301 if( IS_8x8DCT(mb_type) ) {
6302 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6303 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6306 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6307 const int index = 4*i8x8 + i4x4;
6308 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6310 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6312 //STOP_TIMER("decode_residual")
6315 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6316 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6323 for( c = 0; c < 2; c++ ) {
6324 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6325 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6332 for( c = 0; c < 2; c++ ) {
6333 for( i = 0; i < 4; i++ ) {
6334 const int index = 16 + 4 * c + i;
6335 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6336 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6341 uint8_t * const nnz= &h->non_zero_count_cache[0];
6342 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6343 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6346 uint8_t * const nnz= &h->non_zero_count_cache[0];
6347 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6348 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6349 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6350 h->last_qscale_diff = 0;
6353 s->current_picture.qscale_table[mb_xy]= s->qscale;
6354 write_back_non_zero_count(h);
6357 h->ref_count[0] >>= 1;
6358 h->ref_count[1] >>= 1;
6365 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6367 const int index_a = qp + h->slice_alpha_c0_offset;
6368 const int alpha = (alpha_table+52)[index_a];
6369 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6374 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6375 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6377 /* 16px edge length, because bS=4 is triggered by being at
6378 * the edge of an intra MB, so all 4 bS are the same */
6379 for( d = 0; d < 16; d++ ) {
6380 const int p0 = pix[-1];
6381 const int p1 = pix[-2];
6382 const int p2 = pix[-3];
6384 const int q0 = pix[0];
6385 const int q1 = pix[1];
6386 const int q2 = pix[2];
6388 if( FFABS( p0 - q0 ) < alpha &&
6389 FFABS( p1 - p0 ) < beta &&
6390 FFABS( q1 - q0 ) < beta ) {
6392 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6393 if( FFABS( p2 - p0 ) < beta)
6395 const int p3 = pix[-4];
6397 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6398 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6399 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6402 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6404 if( FFABS( q2 - q0 ) < beta)
6406 const int q3 = pix[3];
6408 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6409 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6410 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6413 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6417 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6418 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6420 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6426 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6428 const int index_a = qp + h->slice_alpha_c0_offset;
6429 const int alpha = (alpha_table+52)[index_a];
6430 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6435 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6436 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6438 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6442 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6444 for( i = 0; i < 16; i++, pix += stride) {
6450 int bS_index = (i >> 1);
6453 bS_index |= (i & 1);
6456 if( bS[bS_index] == 0 ) {
6460 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6461 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6462 alpha = (alpha_table+52)[index_a];
6463 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6465 if( bS[bS_index] < 4 ) {
6466 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6467 const int p0 = pix[-1];
6468 const int p1 = pix[-2];
6469 const int p2 = pix[-3];
6470 const int q0 = pix[0];
6471 const int q1 = pix[1];
6472 const int q2 = pix[2];
6474 if( FFABS( p0 - q0 ) < alpha &&
6475 FFABS( p1 - p0 ) < beta &&
6476 FFABS( q1 - q0 ) < beta ) {
6480 if( FFABS( p2 - p0 ) < beta ) {
6481 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6484 if( FFABS( q2 - q0 ) < beta ) {
6485 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6489 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6490 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6491 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6492 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6495 const int p0 = pix[-1];
6496 const int p1 = pix[-2];
6497 const int p2 = pix[-3];
6499 const int q0 = pix[0];
6500 const int q1 = pix[1];
6501 const int q2 = pix[2];
6503 if( FFABS( p0 - q0 ) < alpha &&
6504 FFABS( p1 - p0 ) < beta &&
6505 FFABS( q1 - q0 ) < beta ) {
6507 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6508 if( FFABS( p2 - p0 ) < beta)
6510 const int p3 = pix[-4];
6512 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6513 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6514 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6517 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6519 if( FFABS( q2 - q0 ) < beta)
6521 const int q3 = pix[3];
6523 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6524 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6525 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6528 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6532 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6533 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6535 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6540 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6542 for( i = 0; i < 8; i++, pix += stride) {
6550 if( bS[bS_index] == 0 ) {
6554 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6555 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6556 alpha = (alpha_table+52)[index_a];
6557 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6559 if( bS[bS_index] < 4 ) {
6560 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6561 const int p0 = pix[-1];
6562 const int p1 = pix[-2];
6563 const int q0 = pix[0];
6564 const int q1 = pix[1];
6566 if( FFABS( p0 - q0 ) < alpha &&
6567 FFABS( p1 - p0 ) < beta &&
6568 FFABS( q1 - q0 ) < beta ) {
6569 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6571 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6572 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6573 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6576 const int p0 = pix[-1];
6577 const int p1 = pix[-2];
6578 const int q0 = pix[0];
6579 const int q1 = pix[1];
6581 if( FFABS( p0 - q0 ) < alpha &&
6582 FFABS( p1 - p0 ) < beta &&
6583 FFABS( q1 - q0 ) < beta ) {
6585 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6586 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6587 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6593 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6595 const int index_a = qp + h->slice_alpha_c0_offset;
6596 const int alpha = (alpha_table+52)[index_a];
6597 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6598 const int pix_next = stride;
6603 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6604 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6606 /* 16px edge length, see filter_mb_edgev */
6607 for( d = 0; d < 16; d++ ) {
6608 const int p0 = pix[-1*pix_next];
6609 const int p1 = pix[-2*pix_next];
6610 const int p2 = pix[-3*pix_next];
6611 const int q0 = pix[0];
6612 const int q1 = pix[1*pix_next];
6613 const int q2 = pix[2*pix_next];
6615 if( FFABS( p0 - q0 ) < alpha &&
6616 FFABS( p1 - p0 ) < beta &&
6617 FFABS( q1 - q0 ) < beta ) {
6619 const int p3 = pix[-4*pix_next];
6620 const int q3 = pix[ 3*pix_next];
6622 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6623 if( FFABS( p2 - p0 ) < beta) {
6625 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6626 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6627 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6630 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6632 if( FFABS( q2 - q0 ) < beta) {
6634 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6635 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6636 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6639 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6643 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6644 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6646 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6653 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6655 const int index_a = qp + h->slice_alpha_c0_offset;
6656 const int alpha = (alpha_table+52)[index_a];
6657 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6662 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6663 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6665 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6669 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6670 MpegEncContext * const s = &h->s;
6672 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6674 mb_xy = mb_x + mb_y*s->mb_stride;
6676 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength ||
6677 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6678 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6679 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6682 assert(!FRAME_MBAFF);
6684 mb_type = s->current_picture.mb_type[mb_xy];
6685 qp = s->current_picture.qscale_table[mb_xy];
6686 qp0 = s->current_picture.qscale_table[mb_xy-1];
6687 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6688 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
6689 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
6690 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
6691 qp0 = (qp + qp0 + 1) >> 1;
6692 qp1 = (qp + qp1 + 1) >> 1;
6693 qpc0 = (qpc + qpc0 + 1) >> 1;
6694 qpc1 = (qpc + qpc1 + 1) >> 1;
6695 qp_thresh = 15 - h->slice_alpha_c0_offset;
6696 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6697 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6700 if( IS_INTRA(mb_type) ) {
6701 int16_t bS4[4] = {4,4,4,4};
6702 int16_t bS3[4] = {3,3,3,3};
6703 if( IS_8x8DCT(mb_type) ) {
6704 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6705 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6706 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6707 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6709 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6710 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6711 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6712 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6713 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6714 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6715 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6716 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6718 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6719 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6720 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6721 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6722 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6723 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6724 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6725 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6728 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6729 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6731 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6733 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6735 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6736 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6737 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6738 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6740 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6741 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6742 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6743 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6745 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6746 bSv[0][0] = 0x0004000400040004ULL;
6747 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6748 bSv[1][0] = 0x0004000400040004ULL;
6750 #define FILTER(hv,dir,edge)\
6751 if(bSv[dir][edge]) {\
6752 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6754 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6755 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6761 } else if( IS_8x8DCT(mb_type) ) {
6780 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6781 MpegEncContext * const s = &h->s;
6782 const int mb_xy= mb_x + mb_y*s->mb_stride;
6783 const int mb_type = s->current_picture.mb_type[mb_xy];
6784 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6785 int first_vertical_edge_done = 0;
6787 /* FIXME: A given frame may occupy more than one position in
6788 * the reference list. So ref2frm should be populated with
6789 * frame numbers, not indices. */
6790 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6791 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6793 //for sufficiently low qp, filtering wouldn't do anything
6794 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6796 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
6797 int qp = s->current_picture.qscale_table[mb_xy];
6799 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6800 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6806 // left mb is in picture
6807 && h->slice_table[mb_xy-1] != 255
6808 // and current and left pair do not have the same interlaced type
6809 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6810 // and left mb is in the same slice if deblocking_filter == 2
6811 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6812 /* First vertical edge is different in MBAFF frames
6813 * There are 8 different bS to compute and 2 different Qp
6815 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6816 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6820 int mb_qp, mbn0_qp, mbn1_qp;
6822 first_vertical_edge_done = 1;
6824 if( IS_INTRA(mb_type) )
6825 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6827 for( i = 0; i < 8; i++ ) {
6828 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6830 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6832 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6833 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6834 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6841 mb_qp = s->current_picture.qscale_table[mb_xy];
6842 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6843 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6844 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6845 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
6846 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
6847 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6848 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
6849 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
6852 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6853 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6854 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6855 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6856 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6858 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6859 for( dir = 0; dir < 2; dir++ )
6862 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6863 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6864 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6866 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6867 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6868 // how often to recheck mv-based bS when iterating between edges
6869 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6870 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6871 // how often to recheck mv-based bS when iterating along each edge
6872 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6874 if (first_vertical_edge_done) {
6876 first_vertical_edge_done = 0;
6879 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6882 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6883 && !IS_INTERLACED(mb_type)
6884 && IS_INTERLACED(mbm_type)
6886 // This is a special case in the norm where the filtering must
6887 // be done twice (one each of the field) even if we are in a
6888 // frame macroblock.
6890 static const int nnz_idx[4] = {4,5,6,3};
6891 unsigned int tmp_linesize = 2 * linesize;
6892 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6893 int mbn_xy = mb_xy - 2 * s->mb_stride;
6898 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6899 if( IS_INTRA(mb_type) ||
6900 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6901 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6903 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6904 for( i = 0; i < 4; i++ ) {
6905 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6906 mbn_nnz[nnz_idx[i]] != 0 )
6912 // Do not use s->qscale as luma quantizer because it has not the same
6913 // value in IPCM macroblocks.
6914 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6915 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6916 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6917 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6918 chroma_qp = ( h->chroma_qp +
6919 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6920 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6921 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6928 for( edge = start; edge < edges; edge++ ) {
6929 /* mbn_xy: neighbor macroblock */
6930 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6931 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6935 if( (edge&1) && IS_8x8DCT(mb_type) )
6938 if( IS_INTRA(mb_type) ||
6939 IS_INTRA(mbn_type) ) {
6942 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6943 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6952 bS[0] = bS[1] = bS[2] = bS[3] = value;
6957 if( edge & mask_edge ) {
6958 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6961 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6962 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6965 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6966 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6967 int bn_idx= b_idx - (dir ? 8:1);
6969 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6970 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6971 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6972 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6974 bS[0] = bS[1] = bS[2] = bS[3] = v;
6980 for( i = 0; i < 4; i++ ) {
6981 int x = dir == 0 ? edge : i;
6982 int y = dir == 0 ? i : edge;
6983 int b_idx= 8 + 4 + x + 8*y;
6984 int bn_idx= b_idx - (dir ? 8:1);
6986 if( h->non_zero_count_cache[b_idx] != 0 ||
6987 h->non_zero_count_cache[bn_idx] != 0 ) {
6993 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6994 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6995 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6996 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7004 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7009 // Do not use s->qscale as luma quantizer because it has not the same
7010 // value in IPCM macroblocks.
7011 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7012 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7013 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7014 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7016 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7017 if( (edge&1) == 0 ) {
7018 int chroma_qp = ( h->chroma_qp +
7019 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7020 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7021 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7024 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7025 if( (edge&1) == 0 ) {
7026 int chroma_qp = ( h->chroma_qp +
7027 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7028 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7029 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7036 static int decode_slice(H264Context *h){
7037 MpegEncContext * const s = &h->s;
7038 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7042 if( h->pps.cabac ) {
7046 align_get_bits( &s->gb );
7049 ff_init_cabac_states( &h->cabac);
7050 ff_init_cabac_decoder( &h->cabac,
7051 s->gb.buffer + get_bits_count(&s->gb)/8,
7052 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7053 /* calculate pre-state */
7054 for( i= 0; i < 460; i++ ) {
7056 if( h->slice_type == I_TYPE )
7057 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7059 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7062 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7064 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7069 int ret = decode_mb_cabac(h);
7071 //STOP_TIMER("decode_mb_cabac")
7073 if(ret>=0) hl_decode_mb(h);
7075 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7078 if(ret>=0) ret = decode_mb_cabac(h);
7080 if(ret>=0) hl_decode_mb(h);
7083 eos = get_cabac_terminate( &h->cabac );
7085 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7086 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7087 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7091 if( ++s->mb_x >= s->mb_width ) {
7093 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7100 if( eos || s->mb_y >= s->mb_height ) {
7101 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7102 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7109 int ret = decode_mb_cavlc(h);
7111 if(ret>=0) hl_decode_mb(h);
7113 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7115 ret = decode_mb_cavlc(h);
7117 if(ret>=0) hl_decode_mb(h);
7122 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7123 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7128 if(++s->mb_x >= s->mb_width){
7130 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7135 if(s->mb_y >= s->mb_height){
7136 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7138 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7139 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7143 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7150 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7151 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7152 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7153 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7157 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7166 for(;s->mb_y < s->mb_height; s->mb_y++){
7167 for(;s->mb_x < s->mb_width; s->mb_x++){
7168 int ret= decode_mb(h);
7173 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7174 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7179 if(++s->mb_x >= s->mb_width){
7181 if(++s->mb_y >= s->mb_height){
7182 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7183 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7187 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7194 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7195 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7196 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7200 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7207 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7210 return -1; //not reached
7213 static int decode_unregistered_user_data(H264Context *h, int size){
7214 MpegEncContext * const s = &h->s;
7215 uint8_t user_data[16+256];
7221 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7222 user_data[i]= get_bits(&s->gb, 8);
7226 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7227 if(e==1 && build>=0)
7228 h->x264_build= build;
7230 if(s->avctx->debug & FF_DEBUG_BUGS)
7231 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7234 skip_bits(&s->gb, 8);
7239 static int decode_sei(H264Context *h){
7240 MpegEncContext * const s = &h->s;
7242 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7247 type+= show_bits(&s->gb, 8);
7248 }while(get_bits(&s->gb, 8) == 255);
7252 size+= show_bits(&s->gb, 8);
7253 }while(get_bits(&s->gb, 8) == 255);
7257 if(decode_unregistered_user_data(h, size) < 0)
7261 skip_bits(&s->gb, 8*size);
7264 //FIXME check bits here
7265 align_get_bits(&s->gb);
7271 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7272 MpegEncContext * const s = &h->s;
7274 cpb_count = get_ue_golomb(&s->gb) + 1;
7275 get_bits(&s->gb, 4); /* bit_rate_scale */
7276 get_bits(&s->gb, 4); /* cpb_size_scale */
7277 for(i=0; i<cpb_count; i++){
7278 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7279 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7280 get_bits1(&s->gb); /* cbr_flag */
7282 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7283 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7284 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7285 get_bits(&s->gb, 5); /* time_offset_length */
7288 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7289 MpegEncContext * const s = &h->s;
7290 int aspect_ratio_info_present_flag;
7291 unsigned int aspect_ratio_idc;
7292 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7294 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7296 if( aspect_ratio_info_present_flag ) {
7297 aspect_ratio_idc= get_bits(&s->gb, 8);
7298 if( aspect_ratio_idc == EXTENDED_SAR ) {
7299 sps->sar.num= get_bits(&s->gb, 16);
7300 sps->sar.den= get_bits(&s->gb, 16);
7301 }else if(aspect_ratio_idc < 14){
7302 sps->sar= pixel_aspect[aspect_ratio_idc];
7304 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7311 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7313 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7314 get_bits1(&s->gb); /* overscan_appropriate_flag */
7317 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7318 get_bits(&s->gb, 3); /* video_format */
7319 get_bits1(&s->gb); /* video_full_range_flag */
7320 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7321 get_bits(&s->gb, 8); /* colour_primaries */
7322 get_bits(&s->gb, 8); /* transfer_characteristics */
7323 get_bits(&s->gb, 8); /* matrix_coefficients */
7327 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7328 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7329 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7332 sps->timing_info_present_flag = get_bits1(&s->gb);
7333 if(sps->timing_info_present_flag){
7334 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7335 sps->time_scale = get_bits_long(&s->gb, 32);
7336 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7339 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7340 if(nal_hrd_parameters_present_flag)
7341 decode_hrd_parameters(h, sps);
7342 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7343 if(vcl_hrd_parameters_present_flag)
7344 decode_hrd_parameters(h, sps);
7345 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7346 get_bits1(&s->gb); /* low_delay_hrd_flag */
7347 get_bits1(&s->gb); /* pic_struct_present_flag */
7349 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7350 if(sps->bitstream_restriction_flag){
7351 unsigned int num_reorder_frames;
7352 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7353 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7354 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7355 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7356 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7357 num_reorder_frames= get_ue_golomb(&s->gb);
7358 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7360 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7361 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7365 sps->num_reorder_frames= num_reorder_frames;
7371 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7372 const uint8_t *jvt_list, const uint8_t *fallback_list){
7373 MpegEncContext * const s = &h->s;
7374 int i, last = 8, next = 8;
7375 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7376 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7377 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7379 for(i=0;i<size;i++){
7381 next = (last + get_se_golomb(&s->gb)) & 0xff;
7382 if(!i && !next){ /* matrix not written, we use the preset one */
7383 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7386 last = factors[scan[i]] = next ? next : last;
7390 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7391 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7392 MpegEncContext * const s = &h->s;
7393 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7394 const uint8_t *fallback[4] = {
7395 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7396 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7397 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7398 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7400 if(get_bits1(&s->gb)){
7401 sps->scaling_matrix_present |= is_sps;
7402 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7403 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7404 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7405 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7406 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7407 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7408 if(is_sps || pps->transform_8x8_mode){
7409 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7410 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7412 } else if(fallback_sps) {
7413 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7414 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7419 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7422 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7423 const size_t size, const char *name)
7426 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7431 vec[id] = av_mallocz(size);
7433 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7438 static inline int decode_seq_parameter_set(H264Context *h){
7439 MpegEncContext * const s = &h->s;
7440 int profile_idc, level_idc;
7441 unsigned int sps_id, tmp, mb_width, mb_height;
7445 profile_idc= get_bits(&s->gb, 8);
7446 get_bits1(&s->gb); //constraint_set0_flag
7447 get_bits1(&s->gb); //constraint_set1_flag
7448 get_bits1(&s->gb); //constraint_set2_flag
7449 get_bits1(&s->gb); //constraint_set3_flag
7450 get_bits(&s->gb, 4); // reserved
7451 level_idc= get_bits(&s->gb, 8);
7452 sps_id= get_ue_golomb(&s->gb);
7454 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7458 sps->profile_idc= profile_idc;
7459 sps->level_idc= level_idc;
7461 if(sps->profile_idc >= 100){ //high profile
7462 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7463 get_bits1(&s->gb); //residual_color_transform_flag
7464 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7465 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7466 sps->transform_bypass = get_bits1(&s->gb);
7467 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7469 sps->scaling_matrix_present = 0;
7471 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7472 sps->poc_type= get_ue_golomb(&s->gb);
7474 if(sps->poc_type == 0){ //FIXME #define
7475 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7476 } else if(sps->poc_type == 1){//FIXME #define
7477 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7478 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7479 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7480 tmp= get_ue_golomb(&s->gb);
7482 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7483 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7486 sps->poc_cycle_length= tmp;
7488 for(i=0; i<sps->poc_cycle_length; i++)
7489 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7490 }else if(sps->poc_type != 2){
7491 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7495 tmp= get_ue_golomb(&s->gb);
7496 if(tmp > MAX_PICTURE_COUNT-2){
7497 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7499 sps->ref_frame_count= tmp;
7500 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7501 mb_width= get_ue_golomb(&s->gb) + 1;
7502 mb_height= get_ue_golomb(&s->gb) + 1;
7503 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7504 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7505 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7508 sps->mb_width = mb_width;
7509 sps->mb_height= mb_height;
7511 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7512 if(!sps->frame_mbs_only_flag)
7513 sps->mb_aff= get_bits1(&s->gb);
7517 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7519 #ifndef ALLOW_INTERLACE
7521 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7523 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7524 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7526 sps->crop= get_bits1(&s->gb);
7528 sps->crop_left = get_ue_golomb(&s->gb);
7529 sps->crop_right = get_ue_golomb(&s->gb);
7530 sps->crop_top = get_ue_golomb(&s->gb);
7531 sps->crop_bottom= get_ue_golomb(&s->gb);
7532 if(sps->crop_left || sps->crop_top){
7533 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7539 sps->crop_bottom= 0;
7542 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7543 if( sps->vui_parameters_present_flag )
7544 decode_vui_parameters(h, sps);
7546 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7547 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7548 sps_id, sps->profile_idc, sps->level_idc,
7550 sps->ref_frame_count,
7551 sps->mb_width, sps->mb_height,
7552 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7553 sps->direct_8x8_inference_flag ? "8B8" : "",
7554 sps->crop_left, sps->crop_right,
7555 sps->crop_top, sps->crop_bottom,
7556 sps->vui_parameters_present_flag ? "VUI" : ""
7562 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7563 MpegEncContext * const s = &h->s;
7564 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7567 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7571 tmp= get_ue_golomb(&s->gb);
7572 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7573 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7578 pps->cabac= get_bits1(&s->gb);
7579 pps->pic_order_present= get_bits1(&s->gb);
7580 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7581 if(pps->slice_group_count > 1 ){
7582 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7583 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7584 switch(pps->mb_slice_group_map_type){
7587 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7588 | run_length[ i ] |1 |ue(v) |
7593 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7595 | top_left_mb[ i ] |1 |ue(v) |
7596 | bottom_right_mb[ i ] |1 |ue(v) |
7604 | slice_group_change_direction_flag |1 |u(1) |
7605 | slice_group_change_rate_minus1 |1 |ue(v) |
7610 | slice_group_id_cnt_minus1 |1 |ue(v) |
7611 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7613 | slice_group_id[ i ] |1 |u(v) |
7618 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7619 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7620 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7621 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7622 pps->ref_count[0]= pps->ref_count[1]= 1;
7626 pps->weighted_pred= get_bits1(&s->gb);
7627 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7628 pps->init_qp= get_se_golomb(&s->gb) + 26;
7629 pps->init_qs= get_se_golomb(&s->gb) + 26;
7630 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7631 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7632 pps->constrained_intra_pred= get_bits1(&s->gb);
7633 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7635 pps->transform_8x8_mode= 0;
7636 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7637 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7638 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7640 if(get_bits_count(&s->gb) < bit_length){
7641 pps->transform_8x8_mode= get_bits1(&s->gb);
7642 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7643 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7646 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7647 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7648 pps_id, pps->sps_id,
7649 pps->cabac ? "CABAC" : "CAVLC",
7650 pps->slice_group_count,
7651 pps->ref_count[0], pps->ref_count[1],
7652 pps->weighted_pred ? "weighted" : "",
7653 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7654 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7655 pps->constrained_intra_pred ? "CONSTR" : "",
7656 pps->redundant_pic_cnt_present ? "REDU" : "",
7657 pps->transform_8x8_mode ? "8x8DCT" : ""
7664 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7665 MpegEncContext * const s = &h->s;
7666 AVCodecContext * const avctx= s->avctx;
7670 for(i=0; i<50; i++){
7671 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7674 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7676 s->current_picture_ptr= NULL;
7687 if(buf_index >= buf_size) break;
7689 for(i = 0; i < h->nal_length_size; i++)
7690 nalsize = (nalsize << 8) | buf[buf_index++];
7691 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7696 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7701 // start code prefix search
7702 for(; buf_index + 3 < buf_size; buf_index++){
7703 // This should always succeed in the first iteration.
7704 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7708 if(buf_index+3 >= buf_size) break;
7713 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7714 if (ptr==NULL || dst_length < 0){
7717 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7719 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7721 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7722 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7725 if (h->is_avc && (nalsize != consumed))
7726 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7728 buf_index += consumed;
7730 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7731 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7734 switch(h->nal_unit_type){
7736 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7738 init_get_bits(&s->gb, ptr, bit_length);
7740 h->inter_gb_ptr= &s->gb;
7741 s->data_partitioning = 0;
7743 if(decode_slice_header(h) < 0){
7744 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7747 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
7748 if(h->redundant_pic_count==0 && s->hurry_up < 5
7749 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7750 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7751 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7752 && avctx->skip_frame < AVDISCARD_ALL)
7756 init_get_bits(&s->gb, ptr, bit_length);
7758 h->inter_gb_ptr= NULL;
7759 s->data_partitioning = 1;
7761 if(decode_slice_header(h) < 0){
7762 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7766 init_get_bits(&h->intra_gb, ptr, bit_length);
7767 h->intra_gb_ptr= &h->intra_gb;
7770 init_get_bits(&h->inter_gb, ptr, bit_length);
7771 h->inter_gb_ptr= &h->inter_gb;
7773 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7774 && s->context_initialized
7776 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7777 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7778 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7779 && avctx->skip_frame < AVDISCARD_ALL)
7783 init_get_bits(&s->gb, ptr, bit_length);
7787 init_get_bits(&s->gb, ptr, bit_length);
7788 decode_seq_parameter_set(h);
7790 if(s->flags& CODEC_FLAG_LOW_DELAY)
7793 if(avctx->has_b_frames < 2)
7794 avctx->has_b_frames= !s->low_delay;
7797 init_get_bits(&s->gb, ptr, bit_length);
7799 decode_picture_parameter_set(h, bit_length);
7803 case NAL_END_SEQUENCE:
7804 case NAL_END_STREAM:
7805 case NAL_FILLER_DATA:
7807 case NAL_AUXILIARY_SLICE:
7810 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7818 * returns the number of bytes consumed for building the current frame
7820 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7821 if(s->flags&CODEC_FLAG_TRUNCATED){
7822 pos -= s->parse_context.last_index;
7823 if(pos<0) pos=0; // FIXME remove (unneeded?)
7827 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7828 if(pos+10>buf_size) pos=buf_size; // oops ;)
7834 static int decode_frame(AVCodecContext *avctx,
7835 void *data, int *data_size,
7836 uint8_t *buf, int buf_size)
7838 H264Context *h = avctx->priv_data;
7839 MpegEncContext *s = &h->s;
7840 AVFrame *pict = data;
7843 s->flags= avctx->flags;
7844 s->flags2= avctx->flags2;
7846 /* no supplementary picture */
7847 if (buf_size == 0) {
7851 //FIXME factorize this with the output code below
7852 out = h->delayed_pic[0];
7854 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7855 if(h->delayed_pic[i]->poc < out->poc){
7856 out = h->delayed_pic[i];
7860 for(i=out_idx; h->delayed_pic[i]; i++)
7861 h->delayed_pic[i] = h->delayed_pic[i+1];
7864 *data_size = sizeof(AVFrame);
7865 *pict= *(AVFrame*)out;
7871 if(s->flags&CODEC_FLAG_TRUNCATED){
7872 int next= ff_h264_find_frame_end(h, buf, buf_size);
7874 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7876 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7879 if(h->is_avc && !h->got_avcC) {
7880 int i, cnt, nalsize;
7881 unsigned char *p = avctx->extradata;
7882 if(avctx->extradata_size < 7) {
7883 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7887 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7890 /* sps and pps in the avcC always have length coded with 2 bytes,
7891 so put a fake nal_length_size = 2 while parsing them */
7892 h->nal_length_size = 2;
7893 // Decode sps from avcC
7894 cnt = *(p+5) & 0x1f; // Number of sps
7896 for (i = 0; i < cnt; i++) {
7897 nalsize = AV_RB16(p) + 2;
7898 if(decode_nal_units(h, p, nalsize) < 0) {
7899 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7904 // Decode pps from avcC
7905 cnt = *(p++); // Number of pps
7906 for (i = 0; i < cnt; i++) {
7907 nalsize = AV_RB16(p) + 2;
7908 if(decode_nal_units(h, p, nalsize) != nalsize) {
7909 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7914 // Now store right nal length size, that will be use to parse all other nals
7915 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7916 // Do not reparse avcC
7920 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7921 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7925 buf_index=decode_nal_units(h, buf, buf_size);
7929 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7930 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7934 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7935 Picture *out = s->current_picture_ptr;
7936 Picture *cur = s->current_picture_ptr;
7937 Picture *prev = h->delayed_output_pic;
7938 int i, pics, cross_idr, out_of_order, out_idx;
7942 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7943 s->current_picture_ptr->pict_type= s->pict_type;
7945 h->prev_frame_num_offset= h->frame_num_offset;
7946 h->prev_frame_num= h->frame_num;
7947 if(s->current_picture_ptr->reference){
7948 h->prev_poc_msb= h->poc_msb;
7949 h->prev_poc_lsb= h->poc_lsb;
7951 if(s->current_picture_ptr->reference)
7952 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7958 //FIXME do something with unavailable reference frames
7960 #if 0 //decode order
7961 *data_size = sizeof(AVFrame);
7963 /* Sort B-frames into display order */
7965 if(h->sps.bitstream_restriction_flag
7966 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7967 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7972 while(h->delayed_pic[pics]) pics++;
7974 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7976 h->delayed_pic[pics++] = cur;
7977 if(cur->reference == 0)
7981 for(i=0; h->delayed_pic[i]; i++)
7982 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7985 out = h->delayed_pic[0];
7987 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7988 if(h->delayed_pic[i]->poc < out->poc){
7989 out = h->delayed_pic[i];
7993 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7994 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7996 else if(prev && pics <= s->avctx->has_b_frames)
7998 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8000 ((!cross_idr && prev && out->poc > prev->poc + 2)
8001 || cur->pict_type == B_TYPE)))
8004 s->avctx->has_b_frames++;
8007 else if(out_of_order)
8010 if(out_of_order || pics > s->avctx->has_b_frames){
8011 for(i=out_idx; h->delayed_pic[i]; i++)
8012 h->delayed_pic[i] = h->delayed_pic[i+1];
8018 *data_size = sizeof(AVFrame);
8019 if(prev && prev != out && prev->reference == 1)
8020 prev->reference = 0;
8021 h->delayed_output_pic = out;
8025 *pict= *(AVFrame*)out;
8027 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8030 assert(pict->data[0] || !*data_size);
8031 ff_print_debug_info(s, pict);
8032 //printf("out %d\n", (int)pict->data[0]);
8035 /* Return the Picture timestamp as the frame number */
8036 /* we substract 1 because it is added on utils.c */
8037 avctx->frame_number = s->picture_number - 1;
8039 return get_consumed_bytes(s, buf_index, buf_size);
8042 static inline void fill_mb_avail(H264Context *h){
8043 MpegEncContext * const s = &h->s;
8044 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8047 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8048 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8049 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8055 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8056 h->mb_avail[4]= 1; //FIXME move out
8057 h->mb_avail[5]= 0; //FIXME move out
8063 #define SIZE (COUNT*40)
8069 // int int_temp[10000];
8071 AVCodecContext avctx;
8073 dsputil_init(&dsp, &avctx);
8075 init_put_bits(&pb, temp, SIZE);
8076 printf("testing unsigned exp golomb\n");
8077 for(i=0; i<COUNT; i++){
8079 set_ue_golomb(&pb, i);
8080 STOP_TIMER("set_ue_golomb");
8082 flush_put_bits(&pb);
8084 init_get_bits(&gb, temp, 8*SIZE);
8085 for(i=0; i<COUNT; i++){
8088 s= show_bits(&gb, 24);
8091 j= get_ue_golomb(&gb);
8093 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8096 STOP_TIMER("get_ue_golomb");
8100 init_put_bits(&pb, temp, SIZE);
8101 printf("testing signed exp golomb\n");
8102 for(i=0; i<COUNT; i++){
8104 set_se_golomb(&pb, i - COUNT/2);
8105 STOP_TIMER("set_se_golomb");
8107 flush_put_bits(&pb);
8109 init_get_bits(&gb, temp, 8*SIZE);
8110 for(i=0; i<COUNT; i++){
8113 s= show_bits(&gb, 24);
8116 j= get_se_golomb(&gb);
8117 if(j != i - COUNT/2){
8118 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8121 STOP_TIMER("get_se_golomb");
8124 printf("testing 4x4 (I)DCT\n");
8127 uint8_t src[16], ref[16];
8128 uint64_t error= 0, max_error=0;
8130 for(i=0; i<COUNT; i++){
8132 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8133 for(j=0; j<16; j++){
8134 ref[j]= random()%255;
8135 src[j]= random()%255;
8138 h264_diff_dct_c(block, src, ref, 4);
8141 for(j=0; j<16; j++){
8142 // printf("%d ", block[j]);
8143 block[j]= block[j]*4;
8144 if(j&1) block[j]= (block[j]*4 + 2)/5;
8145 if(j&4) block[j]= (block[j]*4 + 2)/5;
8149 s->dsp.h264_idct_add(ref, block, 4);
8150 /* for(j=0; j<16; j++){
8151 printf("%d ", ref[j]);
8155 for(j=0; j<16; j++){
8156 int diff= FFABS(src[j] - ref[j]);
8159 max_error= FFMAX(max_error, diff);
8162 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8164 printf("testing quantizer\n");
8165 for(qp=0; qp<52; qp++){
8167 src1_block[i]= src2_block[i]= random()%255;
8171 printf("Testing NAL layer\n");
8173 uint8_t bitstream[COUNT];
8174 uint8_t nal[COUNT*2];
8176 memset(&h, 0, sizeof(H264Context));
8178 for(i=0; i<COUNT; i++){
8186 for(j=0; j<COUNT; j++){
8187 bitstream[j]= (random() % 255) + 1;
8190 for(j=0; j<zeros; j++){
8191 int pos= random() % COUNT;
8192 while(bitstream[pos] == 0){
8201 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8203 printf("encoding failed\n");
8207 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8211 if(out_length != COUNT){
8212 printf("incorrect length %d %d\n", out_length, COUNT);
8216 if(consumed != nal_length){
8217 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8221 if(memcmp(bitstream, out, COUNT)){
8222 printf("mismatch\n");
8227 printf("Testing RBSP\n");
8235 static int decode_end(AVCodecContext *avctx)
8237 H264Context *h = avctx->priv_data;
8238 MpegEncContext *s = &h->s;
8240 av_freep(&h->rbsp_buffer);
8241 free_tables(h); //FIXME cleanup init stuff perhaps
8244 // memset(h, 0, sizeof(H264Context));
8250 AVCodec h264_decoder = {
8254 sizeof(H264Context),
8259 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,