2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
41 static VLC coeff_token_vlc[4];
42 static VLC chroma_dc_coeff_token_vlc;
44 static VLC total_zeros_vlc[15];
45 static VLC chroma_dc_total_zeros_vlc[3];
47 static VLC run_vlc[6];
50 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
51 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
52 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
53 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
55 static av_always_inline uint32_t pack16to32(int a, int b){
56 #ifdef WORDS_BIGENDIAN
57 return (b&0xFFFF) + (a<<16);
59 return (a&0xFFFF) + (b<<16);
63 const uint8_t ff_rem6[52]={
64 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
67 const uint8_t ff_div6[52]={
68 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
74 * @param h height of the rectangle, should be a constant
75 * @param w width of the rectangle, should be a constant
76 * @param size the size of val (1 or 4), should be a constant
78 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
79 uint8_t *p= (uint8_t*)vp;
80 assert(size==1 || size==4);
86 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
87 assert((stride&(w-1))==0);
89 const uint16_t v= size==4 ? val : val*0x0101;
90 *(uint16_t*)(p + 0*stride)= v;
92 *(uint16_t*)(p + 1*stride)= v;
94 *(uint16_t*)(p + 2*stride)=
95 *(uint16_t*)(p + 3*stride)= v;
97 const uint32_t v= size==4 ? val : val*0x01010101;
98 *(uint32_t*)(p + 0*stride)= v;
100 *(uint32_t*)(p + 1*stride)= v;
102 *(uint32_t*)(p + 2*stride)=
103 *(uint32_t*)(p + 3*stride)= v;
105 //gcc can't optimize 64bit math on x86_32
106 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
107 const uint64_t v= val*0x0100000001ULL;
108 *(uint64_t*)(p + 0*stride)= v;
110 *(uint64_t*)(p + 1*stride)= v;
112 *(uint64_t*)(p + 2*stride)=
113 *(uint64_t*)(p + 3*stride)= v;
115 const uint64_t v= val*0x0100000001ULL;
116 *(uint64_t*)(p + 0+0*stride)=
117 *(uint64_t*)(p + 8+0*stride)=
118 *(uint64_t*)(p + 0+1*stride)=
119 *(uint64_t*)(p + 8+1*stride)= v;
121 *(uint64_t*)(p + 0+2*stride)=
122 *(uint64_t*)(p + 8+2*stride)=
123 *(uint64_t*)(p + 0+3*stride)=
124 *(uint64_t*)(p + 8+3*stride)= v;
126 *(uint32_t*)(p + 0+0*stride)=
127 *(uint32_t*)(p + 4+0*stride)= val;
129 *(uint32_t*)(p + 0+1*stride)=
130 *(uint32_t*)(p + 4+1*stride)= val;
132 *(uint32_t*)(p + 0+2*stride)=
133 *(uint32_t*)(p + 4+2*stride)=
134 *(uint32_t*)(p + 0+3*stride)=
135 *(uint32_t*)(p + 4+3*stride)= val;
137 *(uint32_t*)(p + 0+0*stride)=
138 *(uint32_t*)(p + 4+0*stride)=
139 *(uint32_t*)(p + 8+0*stride)=
140 *(uint32_t*)(p +12+0*stride)=
141 *(uint32_t*)(p + 0+1*stride)=
142 *(uint32_t*)(p + 4+1*stride)=
143 *(uint32_t*)(p + 8+1*stride)=
144 *(uint32_t*)(p +12+1*stride)= val;
146 *(uint32_t*)(p + 0+2*stride)=
147 *(uint32_t*)(p + 4+2*stride)=
148 *(uint32_t*)(p + 8+2*stride)=
149 *(uint32_t*)(p +12+2*stride)=
150 *(uint32_t*)(p + 0+3*stride)=
151 *(uint32_t*)(p + 4+3*stride)=
152 *(uint32_t*)(p + 8+3*stride)=
153 *(uint32_t*)(p +12+3*stride)= val;
160 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
161 MpegEncContext * const s = &h->s;
162 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
163 int topleft_xy, top_xy, topright_xy, left_xy[2];
164 int topleft_type, top_type, topright_type, left_type[2];
168 //FIXME deblocking could skip the intra and nnz parts.
169 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
172 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
174 top_xy = mb_xy - s->mb_stride;
175 topleft_xy = top_xy - 1;
176 topright_xy= top_xy + 1;
177 left_xy[1] = left_xy[0] = mb_xy-1;
187 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
188 const int top_pair_xy = pair_xy - s->mb_stride;
189 const int topleft_pair_xy = top_pair_xy - 1;
190 const int topright_pair_xy = top_pair_xy + 1;
191 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
192 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
193 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
194 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
195 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
196 const int bottom = (s->mb_y & 1);
197 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
199 ? !curr_mb_frame_flag // bottom macroblock
200 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
202 top_xy -= s->mb_stride;
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
208 topleft_xy -= s->mb_stride;
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
214 topright_xy -= s->mb_stride;
216 if (left_mb_frame_flag != curr_mb_frame_flag) {
217 left_xy[1] = left_xy[0] = pair_xy - 1;
218 if (curr_mb_frame_flag) {
239 left_xy[1] += s->mb_stride;
252 h->top_mb_xy = top_xy;
253 h->left_mb_xy[0] = left_xy[0];
254 h->left_mb_xy[1] = left_xy[1];
258 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
259 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
260 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
262 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
264 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
266 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
267 for(list=0; list<h->list_count; list++){
268 if(USES_LIST(mb_type,list)){
269 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
270 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
271 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
272 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
278 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
279 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
281 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
282 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
284 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
285 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
290 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
291 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
292 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
293 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
294 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
297 if(IS_INTRA(mb_type)){
298 h->topleft_samples_available=
299 h->top_samples_available=
300 h->left_samples_available= 0xFFFF;
301 h->topright_samples_available= 0xEEEA;
303 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
304 h->topleft_samples_available= 0xB3FF;
305 h->top_samples_available= 0x33FF;
306 h->topright_samples_available= 0x26EA;
309 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available&= 0xDF5F;
311 h->left_samples_available&= 0x5F5F;
315 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
316 h->topleft_samples_available&= 0x7FFF;
318 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
319 h->topright_samples_available&= 0xFBFF;
321 if(IS_INTRA4x4(mb_type)){
322 if(IS_INTRA4x4(top_type)){
323 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
324 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
325 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
326 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
329 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
334 h->intra4x4_pred_mode_cache[4+8*0]=
335 h->intra4x4_pred_mode_cache[5+8*0]=
336 h->intra4x4_pred_mode_cache[6+8*0]=
337 h->intra4x4_pred_mode_cache[7+8*0]= pred;
340 if(IS_INTRA4x4(left_type[i])){
341 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
342 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
345 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
350 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
351 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
366 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
368 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
369 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
370 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
371 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
373 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
374 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
376 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
377 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
380 h->non_zero_count_cache[4+8*0]=
381 h->non_zero_count_cache[5+8*0]=
382 h->non_zero_count_cache[6+8*0]=
383 h->non_zero_count_cache[7+8*0]=
385 h->non_zero_count_cache[1+8*0]=
386 h->non_zero_count_cache[2+8*0]=
388 h->non_zero_count_cache[1+8*3]=
389 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
393 for (i=0; i<2; i++) {
395 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
396 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
397 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
398 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
400 h->non_zero_count_cache[3+8*1 + 2*8*i]=
401 h->non_zero_count_cache[3+8*2 + 2*8*i]=
402 h->non_zero_count_cache[0+8*1 + 8*i]=
403 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
410 h->top_cbp = h->cbp_table[top_xy];
411 } else if(IS_INTRA(mb_type)) {
418 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
419 } else if(IS_INTRA(mb_type)) {
425 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
428 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
433 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
435 for(list=0; list<h->list_count; list++){
436 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
437 /*if(!h->mv_cache_clean[list]){
438 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
439 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
440 h->mv_cache_clean[list]= 1;
444 h->mv_cache_clean[list]= 0;
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
449 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
450 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
451 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
452 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
453 h->ref_cache[list][scan8[0] + 0 - 1*8]=
454 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
455 h->ref_cache[list][scan8[0] + 2 - 1*8]=
456 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
458 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
459 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
460 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
461 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
462 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
466 int cache_idx = scan8[0] - 1 + i*2*8;
467 if(USES_LIST(left_type[i], list)){
468 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
469 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
470 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
471 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
472 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
473 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
475 *(uint32_t*)h->mv_cache [list][cache_idx ]=
476 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
477 h->ref_cache[list][cache_idx ]=
478 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
482 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
485 if(USES_LIST(topleft_type, list)){
486 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
487 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
488 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
489 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
491 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
492 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
495 if(USES_LIST(topright_type, list)){
496 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
497 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
498 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
499 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
501 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
502 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
505 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
508 h->ref_cache[list][scan8[5 ]+1] =
509 h->ref_cache[list][scan8[7 ]+1] =
510 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
511 h->ref_cache[list][scan8[4 ]] =
512 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
513 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
514 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
515 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
516 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
517 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
520 /* XXX beurk, Load mvd */
521 if(USES_LIST(top_type, list)){
522 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
523 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
524 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
525 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
526 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
528 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
529 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
530 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
531 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
533 if(USES_LIST(left_type[0], list)){
534 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
535 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
536 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
538 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
539 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
541 if(USES_LIST(left_type[1], list)){
542 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
543 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
544 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
546 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
547 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
549 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
550 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
551 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
552 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
553 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
555 if(h->slice_type == B_TYPE){
556 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
558 if(IS_DIRECT(top_type)){
559 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
560 }else if(IS_8X8(top_type)){
561 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
562 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
563 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
568 if(IS_DIRECT(left_type[0]))
569 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
570 else if(IS_8X8(left_type[0]))
571 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
573 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
575 if(IS_DIRECT(left_type[1]))
576 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
577 else if(IS_8X8(left_type[1]))
578 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
580 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
586 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
587 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
588 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
589 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
590 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
591 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
592 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
593 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
594 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
595 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
597 #define MAP_F2F(idx, mb_type)\
598 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
599 h->ref_cache[list][idx] <<= 1;\
600 h->mv_cache[list][idx][1] /= 2;\
601 h->mvd_cache[list][idx][1] /= 2;\
606 #define MAP_F2F(idx, mb_type)\
607 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
608 h->ref_cache[list][idx] >>= 1;\
609 h->mv_cache[list][idx][1] <<= 1;\
610 h->mvd_cache[list][idx][1] <<= 1;\
620 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
623 static inline void write_back_intra_pred_mode(H264Context *h){
624 MpegEncContext * const s = &h->s;
625 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
627 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
628 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
629 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
630 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
631 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
632 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
633 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
637 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
639 static inline int check_intra4x4_pred_mode(H264Context *h){
640 MpegEncContext * const s = &h->s;
641 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
642 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
645 if(!(h->top_samples_available&0x8000)){
647 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
649 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
652 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
657 if(!(h->left_samples_available&0x8000)){
659 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
661 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
664 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
670 } //FIXME cleanup like next
673 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
675 static inline int check_intra_pred_mode(H264Context *h, int mode){
676 MpegEncContext * const s = &h->s;
677 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
678 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
681 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
685 if(!(h->top_samples_available&0x8000)){
688 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
693 if(!(h->left_samples_available&0x8000)){
696 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
705 * gets the predicted intra4x4 prediction mode.
707 static inline int pred_intra_mode(H264Context *h, int n){
708 const int index8= scan8[n];
709 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
710 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
711 const int min= FFMIN(left, top);
713 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
715 if(min<0) return DC_PRED;
719 static inline void write_back_non_zero_count(H264Context *h){
720 MpegEncContext * const s = &h->s;
721 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
723 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
724 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
725 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
726 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
727 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
728 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
729 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
731 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
732 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
733 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
735 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
736 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
737 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
740 // store all luma nnzs, for deblocking
743 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
744 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
749 * gets the predicted number of non zero coefficients.
750 * @param n block index
752 static inline int pred_non_zero_count(H264Context *h, int n){
753 const int index8= scan8[n];
754 const int left= h->non_zero_count_cache[index8 - 1];
755 const int top = h->non_zero_count_cache[index8 - 8];
758 if(i<64) i= (i+1)>>1;
760 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
765 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
766 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
767 MpegEncContext *s = &h->s;
769 /* there is no consistent mapping of mvs to neighboring locations that will
770 * make mbaff happy, so we can't move all this logic to fill_caches */
772 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
774 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
775 *C = h->mv_cache[list][scan8[0]-2];
778 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
779 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
780 if(IS_INTERLACED(mb_types[topright_xy])){
781 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
782 const int x4 = X4, y4 = Y4;\
783 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
784 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
785 return LIST_NOT_USED;\
786 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
787 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
788 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
789 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
791 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
794 if(topright_ref == PART_NOT_AVAILABLE
795 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
796 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
798 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
799 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
802 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
804 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
805 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
811 if(topright_ref != PART_NOT_AVAILABLE){
812 *C= h->mv_cache[list][ i - 8 + part_width ];
815 tprintf(s->avctx, "topright MV not available\n");
817 *C= h->mv_cache[list][ i - 8 - 1 ];
818 return h->ref_cache[list][ i - 8 - 1 ];
823 * gets the predicted MV.
824 * @param n the block index
825 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
826 * @param mx the x component of the predicted motion vector
827 * @param my the y component of the predicted motion vector
829 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
830 const int index8= scan8[n];
831 const int top_ref= h->ref_cache[list][ index8 - 8 ];
832 const int left_ref= h->ref_cache[list][ index8 - 1 ];
833 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
834 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
836 int diagonal_ref, match_count;
838 assert(part_width==1 || part_width==2 || part_width==4);
848 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
849 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
850 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
851 if(match_count > 1){ //most common
852 *mx= mid_pred(A[0], B[0], C[0]);
853 *my= mid_pred(A[1], B[1], C[1]);
854 }else if(match_count==1){
858 }else if(top_ref==ref){
866 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
870 *mx= mid_pred(A[0], B[0], C[0]);
871 *my= mid_pred(A[1], B[1], C[1]);
875 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
879 * gets the directionally predicted 16x8 MV.
880 * @param n the block index
881 * @param mx the x component of the predicted motion vector
882 * @param my the y component of the predicted motion vector
884 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
886 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
887 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
889 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
897 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
898 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
900 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
910 pred_motion(h, n, 4, list, ref, mx, my);
914 * gets the directionally predicted 8x16 MV.
915 * @param n the block index
916 * @param mx the x component of the predicted motion vector
917 * @param my the y component of the predicted motion vector
919 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
921 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
922 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
924 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
935 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
937 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
939 if(diagonal_ref == ref){
947 pred_motion(h, n, 2, list, ref, mx, my);
950 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
951 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
952 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
954 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
956 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
957 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
958 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
964 pred_motion(h, 0, 4, 0, 0, mx, my);
969 static inline void direct_dist_scale_factor(H264Context * const h){
970 const int poc = h->s.current_picture_ptr->poc;
971 const int poc1 = h->ref_list[1][0].poc;
973 for(i=0; i<h->ref_count[0]; i++){
974 int poc0 = h->ref_list[0][i].poc;
975 int td = av_clip(poc1 - poc0, -128, 127);
976 if(td == 0 /* FIXME || pic0 is a long-term ref */){
977 h->dist_scale_factor[i] = 256;
979 int tb = av_clip(poc - poc0, -128, 127);
980 int tx = (16384 + (FFABS(td) >> 1)) / td;
981 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
985 for(i=0; i<h->ref_count[0]; i++){
986 h->dist_scale_factor_field[2*i] =
987 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
991 static inline void direct_ref_list_init(H264Context * const h){
992 MpegEncContext * const s = &h->s;
993 Picture * const ref1 = &h->ref_list[1][0];
994 Picture * const cur = s->current_picture_ptr;
996 if(cur->pict_type == I_TYPE)
997 cur->ref_count[0] = 0;
998 if(cur->pict_type != B_TYPE)
999 cur->ref_count[1] = 0;
1000 for(list=0; list<2; list++){
1001 cur->ref_count[list] = h->ref_count[list];
1002 for(j=0; j<h->ref_count[list]; j++)
1003 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1005 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1007 for(list=0; list<2; list++){
1008 for(i=0; i<ref1->ref_count[list]; i++){
1009 const int poc = ref1->ref_poc[list][i];
1010 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1011 for(j=0; j<h->ref_count[list]; j++)
1012 if(h->ref_list[list][j].poc == poc){
1013 h->map_col_to_list0[list][i] = j;
1019 for(list=0; list<2; list++){
1020 for(i=0; i<ref1->ref_count[list]; i++){
1021 j = h->map_col_to_list0[list][i];
1022 h->map_col_to_list0_field[list][2*i] = 2*j;
1023 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1029 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1030 MpegEncContext * const s = &h->s;
1031 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1032 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1033 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1034 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1035 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1036 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1037 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1038 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1039 const int is_b8x8 = IS_8X8(*mb_type);
1040 unsigned int sub_mb_type;
1043 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1044 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1045 /* FIXME save sub mb types from previous frames (or derive from MVs)
1046 * so we know exactly what block size to use */
1047 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1048 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1049 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1050 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1051 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1053 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1057 *mb_type |= MB_TYPE_DIRECT2;
1059 *mb_type |= MB_TYPE_INTERLACED;
1061 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1063 if(h->direct_spatial_mv_pred){
1068 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1070 /* ref = min(neighbors) */
1071 for(list=0; list<2; list++){
1072 int refa = h->ref_cache[list][scan8[0] - 1];
1073 int refb = h->ref_cache[list][scan8[0] - 8];
1074 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1076 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1078 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1080 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1086 if(ref[0] < 0 && ref[1] < 0){
1087 ref[0] = ref[1] = 0;
1088 mv[0][0] = mv[0][1] =
1089 mv[1][0] = mv[1][1] = 0;
1091 for(list=0; list<2; list++){
1093 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1095 mv[list][0] = mv[list][1] = 0;
1100 *mb_type &= ~MB_TYPE_P0L1;
1101 sub_mb_type &= ~MB_TYPE_P0L1;
1102 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_P0L0;
1104 sub_mb_type &= ~MB_TYPE_P0L0;
1107 if(IS_16X16(*mb_type)){
1110 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1111 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1112 if(!IS_INTRA(mb_type_col)
1113 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1114 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1115 && (h->x264_build>33 || !h->x264_build)))){
1117 a= pack16to32(mv[0][0],mv[0][1]);
1119 b= pack16to32(mv[1][0],mv[1][1]);
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 b= pack16to32(mv[1][0],mv[1][1]);
1124 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1127 for(i8=0; i8<4; i8++){
1128 const int x8 = i8&1;
1129 const int y8 = i8>>1;
1131 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1133 h->sub_mb_type[i8] = sub_mb_type;
1135 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1137 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1138 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1141 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1142 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1143 && (h->x264_build>33 || !h->x264_build)))){
1144 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1145 if(IS_SUB_8X8(sub_mb_type)){
1146 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1147 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1149 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1151 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1154 for(i4=0; i4<4; i4++){
1155 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1156 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1158 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1160 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1166 }else{ /* direct temporal mv pred */
1167 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1168 const int *dist_scale_factor = h->dist_scale_factor;
1171 if(IS_INTERLACED(*mb_type)){
1172 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1173 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1174 dist_scale_factor = h->dist_scale_factor_field;
1176 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1177 /* FIXME assumes direct_8x8_inference == 1 */
1178 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1179 int mb_types_col[2];
1182 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1183 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1184 | (*mb_type & MB_TYPE_INTERLACED);
1185 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1187 if(IS_INTERLACED(*mb_type)){
1188 /* frame to field scaling */
1189 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1190 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1192 l1ref0 -= 2*h->b8_stride;
1193 l1ref1 -= 2*h->b8_stride;
1194 l1mv0 -= 4*h->b_stride;
1195 l1mv1 -= 4*h->b_stride;
1199 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1200 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1202 *mb_type |= MB_TYPE_16x8;
1204 *mb_type |= MB_TYPE_8x8;
1206 /* field to frame scaling */
1207 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1208 * but in MBAFF, top and bottom POC are equal */
1209 int dy = (s->mb_y&1) ? 1 : 2;
1211 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1212 l1ref0 += dy*h->b8_stride;
1213 l1ref1 += dy*h->b8_stride;
1214 l1mv0 += 2*dy*h->b_stride;
1215 l1mv1 += 2*dy*h->b_stride;
1218 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1220 *mb_type |= MB_TYPE_16x16;
1222 *mb_type |= MB_TYPE_8x8;
1225 for(i8=0; i8<4; i8++){
1226 const int x8 = i8&1;
1227 const int y8 = i8>>1;
1229 const int16_t (*l1mv)[2]= l1mv0;
1231 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1233 h->sub_mb_type[i8] = sub_mb_type;
1235 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1236 if(IS_INTRA(mb_types_col[y8])){
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1238 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1239 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1243 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1245 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1247 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1250 scale = dist_scale_factor[ref0];
1251 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1254 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1255 int my_col = (mv_col[1]<<y_shift)/2;
1256 int mx = (scale * mv_col[0] + 128) >> 8;
1257 int my = (scale * my_col + 128) >> 8;
1258 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1259 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1266 /* one-to-one mv scaling */
1268 if(IS_16X16(*mb_type)){
1271 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1272 if(IS_INTRA(mb_type_col)){
1275 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1276 : map_col_to_list0[1][l1ref1[0]];
1277 const int scale = dist_scale_factor[ref0];
1278 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1280 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1281 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1283 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1284 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1286 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1290 for(i8=0; i8<4; i8++){
1291 const int x8 = i8&1;
1292 const int y8 = i8>>1;
1294 const int16_t (*l1mv)[2]= l1mv0;
1296 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1298 h->sub_mb_type[i8] = sub_mb_type;
1299 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1300 if(IS_INTRA(mb_type_col)){
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1302 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1303 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1307 ref0 = l1ref0[x8 + y8*h->b8_stride];
1309 ref0 = map_col_to_list0[0][ref0];
1311 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1314 scale = dist_scale_factor[ref0];
1316 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1317 if(IS_SUB_8X8(sub_mb_type)){
1318 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1319 int mx = (scale * mv_col[0] + 128) >> 8;
1320 int my = (scale * mv_col[1] + 128) >> 8;
1321 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1322 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1324 for(i4=0; i4<4; i4++){
1325 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1326 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1327 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1328 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1329 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1330 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1337 static inline void write_back_motion(H264Context *h, int mb_type){
1338 MpegEncContext * const s = &h->s;
1339 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1340 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1343 if(!USES_LIST(mb_type, 0))
1344 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1346 for(list=0; list<h->list_count; list++){
1348 if(!USES_LIST(mb_type, list))
1352 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1353 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1355 if( h->pps.cabac ) {
1356 if(IS_SKIP(mb_type))
1357 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1360 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1361 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1366 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1367 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1368 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1369 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1370 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1374 if(h->slice_type == B_TYPE && h->pps.cabac){
1375 if(IS_8X8(mb_type)){
1376 uint8_t *direct_table = &h->direct_table[b8_xy];
1377 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1378 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1379 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1385 * Decodes a network abstraction layer unit.
1386 * @param consumed is the number of bytes used as input
1387 * @param length is the length of the array
1388 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1389 * @returns decoded bytes, might be src+1 if no escapes
1391 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1396 // src[0]&0x80; //forbidden bit
1397 h->nal_ref_idc= src[0]>>5;
1398 h->nal_unit_type= src[0]&0x1F;
1402 for(i=0; i<length; i++)
1403 printf("%2X ", src[i]);
1405 for(i=0; i+1<length; i+=2){
1406 if(src[i]) continue;
1407 if(i>0 && src[i-1]==0) i--;
1408 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1410 /* startcode, so we must be past the end */
1417 if(i>=length-1){ //no escaped 0
1418 *dst_length= length;
1419 *consumed= length+1; //+1 for the header
1423 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1424 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1425 dst= h->rbsp_buffer[bufidx];
1431 //printf("decoding esc\n");
1434 //remove escapes (very rare 1:2^22)
1435 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1436 if(src[si+2]==3){ //escape
1441 }else //next start code
1445 dst[di++]= src[si++];
1449 *consumed= si + 1;//+1 for the header
1450 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 * identifies the exact end of the bitstream
1456 * @return the length of the trailing, or 0 if damaged
1458 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1462 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1472 * idct tranforms the 16 dc values and dequantize them.
1473 * @param qp quantization parameter
1475 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1478 int temp[16]; //FIXME check if this is a good idea
1479 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1480 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1482 //memset(block, 64, 2*256);
1485 const int offset= y_offset[i];
1486 const int z0= block[offset+stride*0] + block[offset+stride*4];
1487 const int z1= block[offset+stride*0] - block[offset+stride*4];
1488 const int z2= block[offset+stride*1] - block[offset+stride*5];
1489 const int z3= block[offset+stride*1] + block[offset+stride*5];
1498 const int offset= x_offset[i];
1499 const int z0= temp[4*0+i] + temp[4*2+i];
1500 const int z1= temp[4*0+i] - temp[4*2+i];
1501 const int z2= temp[4*1+i] - temp[4*3+i];
1502 const int z3= temp[4*1+i] + temp[4*3+i];
1504 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1505 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1506 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1507 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1513 * dct tranforms the 16 dc values.
1514 * @param qp quantization parameter ??? FIXME
1516 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1517 // const int qmul= dequant_coeff[qp][0];
1519 int temp[16]; //FIXME check if this is a good idea
1520 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1521 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1524 const int offset= y_offset[i];
1525 const int z0= block[offset+stride*0] + block[offset+stride*4];
1526 const int z1= block[offset+stride*0] - block[offset+stride*4];
1527 const int z2= block[offset+stride*1] - block[offset+stride*5];
1528 const int z3= block[offset+stride*1] + block[offset+stride*5];
1537 const int offset= x_offset[i];
1538 const int z0= temp[4*0+i] + temp[4*2+i];
1539 const int z1= temp[4*0+i] - temp[4*2+i];
1540 const int z2= temp[4*1+i] - temp[4*3+i];
1541 const int z3= temp[4*1+i] + temp[4*3+i];
1543 block[stride*0 +offset]= (z0 + z3)>>1;
1544 block[stride*2 +offset]= (z1 + z2)>>1;
1545 block[stride*8 +offset]= (z1 - z2)>>1;
1546 block[stride*10+offset]= (z0 - z3)>>1;
1554 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1555 const int stride= 16*2;
1556 const int xStride= 16;
1559 a= block[stride*0 + xStride*0];
1560 b= block[stride*0 + xStride*1];
1561 c= block[stride*1 + xStride*0];
1562 d= block[stride*1 + xStride*1];
1569 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1570 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1571 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1572 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1576 static void chroma_dc_dct_c(DCTELEM *block){
1577 const int stride= 16*2;
1578 const int xStride= 16;
1581 a= block[stride*0 + xStride*0];
1582 b= block[stride*0 + xStride*1];
1583 c= block[stride*1 + xStride*0];
1584 d= block[stride*1 + xStride*1];
1591 block[stride*0 + xStride*0]= (a+c);
1592 block[stride*0 + xStride*1]= (e+b);
1593 block[stride*1 + xStride*0]= (a-c);
1594 block[stride*1 + xStride*1]= (e-b);
1599 * gets the chroma qp.
1601 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1602 return h->pps.chroma_qp_table[t][qscale & 0xff];
1605 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1606 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1607 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1609 const int * const quant_table= quant_coeff[qscale];
1610 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1611 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1612 const unsigned int threshold2= (threshold1<<1);
1618 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1619 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1620 const unsigned int dc_threshold2= (dc_threshold1<<1);
1622 int level= block[0]*quant_coeff[qscale+18][0];
1623 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1625 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1628 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1631 // last_non_zero = i;
1636 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1637 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1638 const unsigned int dc_threshold2= (dc_threshold1<<1);
1640 int level= block[0]*quant_table[0];
1641 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1643 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1646 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1649 // last_non_zero = i;
1662 const int j= scantable[i];
1663 int level= block[j]*quant_table[j];
1665 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1666 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1667 if(((unsigned)(level+threshold1))>threshold2){
1669 level= (bias + level)>>QUANT_SHIFT;
1672 level= (bias - level)>>QUANT_SHIFT;
1681 return last_non_zero;
1684 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1685 const uint32_t a= ((uint32_t*)(src-stride))[0];
1686 ((uint32_t*)(src+0*stride))[0]= a;
1687 ((uint32_t*)(src+1*stride))[0]= a;
1688 ((uint32_t*)(src+2*stride))[0]= a;
1689 ((uint32_t*)(src+3*stride))[0]= a;
1692 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1693 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1694 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1695 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1696 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1699 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1700 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1701 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1703 ((uint32_t*)(src+0*stride))[0]=
1704 ((uint32_t*)(src+1*stride))[0]=
1705 ((uint32_t*)(src+2*stride))[0]=
1706 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1709 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1710 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1712 ((uint32_t*)(src+0*stride))[0]=
1713 ((uint32_t*)(src+1*stride))[0]=
1714 ((uint32_t*)(src+2*stride))[0]=
1715 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1718 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1719 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1721 ((uint32_t*)(src+0*stride))[0]=
1722 ((uint32_t*)(src+1*stride))[0]=
1723 ((uint32_t*)(src+2*stride))[0]=
1724 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1727 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1728 ((uint32_t*)(src+0*stride))[0]=
1729 ((uint32_t*)(src+1*stride))[0]=
1730 ((uint32_t*)(src+2*stride))[0]=
1731 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1735 #define LOAD_TOP_RIGHT_EDGE\
1736 const int av_unused t4= topright[0];\
1737 const int av_unused t5= topright[1];\
1738 const int av_unused t6= topright[2];\
1739 const int av_unused t7= topright[3];\
1741 #define LOAD_LEFT_EDGE\
1742 const int av_unused l0= src[-1+0*stride];\
1743 const int av_unused l1= src[-1+1*stride];\
1744 const int av_unused l2= src[-1+2*stride];\
1745 const int av_unused l3= src[-1+3*stride];\
1747 #define LOAD_TOP_EDGE\
1748 const int av_unused t0= src[ 0-1*stride];\
1749 const int av_unused t1= src[ 1-1*stride];\
1750 const int av_unused t2= src[ 2-1*stride];\
1751 const int av_unused t3= src[ 3-1*stride];\
1753 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1754 const int lt= src[-1-1*stride];
1758 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1760 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1763 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1767 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1770 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1772 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1773 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1776 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1781 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1783 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1786 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1790 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1793 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1795 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1796 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1799 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1800 const int lt= src[-1-1*stride];
1805 src[1+2*stride]=(lt + t0 + 1)>>1;
1807 src[2+2*stride]=(t0 + t1 + 1)>>1;
1809 src[3+2*stride]=(t1 + t2 + 1)>>1;
1810 src[3+0*stride]=(t2 + t3 + 1)>>1;
1812 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1814 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1816 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1817 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1818 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1819 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1822 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1826 src[0+0*stride]=(t0 + t1 + 1)>>1;
1828 src[0+2*stride]=(t1 + t2 + 1)>>1;
1830 src[1+2*stride]=(t2 + t3 + 1)>>1;
1832 src[2+2*stride]=(t3 + t4+ 1)>>1;
1833 src[3+2*stride]=(t4 + t5+ 1)>>1;
1834 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1836 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1838 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1840 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1841 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1844 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1847 src[0+0*stride]=(l0 + l1 + 1)>>1;
1848 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1850 src[0+1*stride]=(l1 + l2 + 1)>>1;
1852 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1854 src[0+2*stride]=(l2 + l3 + 1)>>1;
1856 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
1865 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
1866 const int lt= src[-1-1*stride];
1871 src[2+1*stride]=(lt + l0 + 1)>>1;
1873 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
1874 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
1875 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1877 src[2+2*stride]=(l0 + l1 + 1)>>1;
1879 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1881 src[2+3*stride]=(l1 + l2+ 1)>>1;
1883 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1884 src[0+3*stride]=(l2 + l3 + 1)>>1;
1885 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1888 void ff_pred16x16_vertical_c(uint8_t *src, int stride){
1890 const uint32_t a= ((uint32_t*)(src-stride))[0];
1891 const uint32_t b= ((uint32_t*)(src-stride))[1];
1892 const uint32_t c= ((uint32_t*)(src-stride))[2];
1893 const uint32_t d= ((uint32_t*)(src-stride))[3];
1895 for(i=0; i<16; i++){
1896 ((uint32_t*)(src+i*stride))[0]= a;
1897 ((uint32_t*)(src+i*stride))[1]= b;
1898 ((uint32_t*)(src+i*stride))[2]= c;
1899 ((uint32_t*)(src+i*stride))[3]= d;
1903 void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
1906 for(i=0; i<16; i++){
1907 ((uint32_t*)(src+i*stride))[0]=
1908 ((uint32_t*)(src+i*stride))[1]=
1909 ((uint32_t*)(src+i*stride))[2]=
1910 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
1914 void ff_pred16x16_dc_c(uint8_t *src, int stride){
1918 dc+= src[-1+i*stride];
1925 dc= 0x01010101*((dc + 16)>>5);
1927 for(i=0; i<16; i++){
1928 ((uint32_t*)(src+i*stride))[0]=
1929 ((uint32_t*)(src+i*stride))[1]=
1930 ((uint32_t*)(src+i*stride))[2]=
1931 ((uint32_t*)(src+i*stride))[3]= dc;
1935 void ff_pred16x16_left_dc_c(uint8_t *src, int stride){
1939 dc+= src[-1+i*stride];
1942 dc= 0x01010101*((dc + 8)>>4);
1944 for(i=0; i<16; i++){
1945 ((uint32_t*)(src+i*stride))[0]=
1946 ((uint32_t*)(src+i*stride))[1]=
1947 ((uint32_t*)(src+i*stride))[2]=
1948 ((uint32_t*)(src+i*stride))[3]= dc;
1952 void ff_pred16x16_top_dc_c(uint8_t *src, int stride){
1958 dc= 0x01010101*((dc + 8)>>4);
1960 for(i=0; i<16; i++){
1961 ((uint32_t*)(src+i*stride))[0]=
1962 ((uint32_t*)(src+i*stride))[1]=
1963 ((uint32_t*)(src+i*stride))[2]=
1964 ((uint32_t*)(src+i*stride))[3]= dc;
1968 void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
1971 for(i=0; i<16; i++){
1972 ((uint32_t*)(src+i*stride))[0]=
1973 ((uint32_t*)(src+i*stride))[1]=
1974 ((uint32_t*)(src+i*stride))[2]=
1975 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1979 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
1982 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1983 const uint8_t * const src0 = src+7-stride;
1984 const uint8_t *src1 = src+8*stride-1;
1985 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
1986 int H = src0[1] - src0[-1];
1987 int V = src1[0] - src2[ 0];
1988 for(k=2; k<=8; ++k) {
1989 src1 += stride; src2 -= stride;
1990 H += k*(src0[k] - src0[-k]);
1991 V += k*(src1[0] - src2[ 0]);
1994 H = ( 5*(H/4) ) / 16;
1995 V = ( 5*(V/4) ) / 16;
1997 /* required for 100% accuracy */
1998 i = H; H = V; V = i;
2000 H = ( 5*H+32 ) >> 6;
2001 V = ( 5*V+32 ) >> 6;
2004 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2005 for(j=16; j>0; --j) {
2008 for(i=-16; i<0; i+=4) {
2009 src[16+i] = cm[ (b ) >> 5 ];
2010 src[17+i] = cm[ (b+ H) >> 5 ];
2011 src[18+i] = cm[ (b+2*H) >> 5 ];
2012 src[19+i] = cm[ (b+3*H) >> 5 ];
2019 void ff_pred16x16_plane_c(uint8_t *src, int stride){
2020 pred16x16_plane_compat_c(src, stride, 0);
2023 void ff_pred8x8_vertical_c(uint8_t *src, int stride){
2025 const uint32_t a= ((uint32_t*)(src-stride))[0];
2026 const uint32_t b= ((uint32_t*)(src-stride))[1];
2029 ((uint32_t*)(src+i*stride))[0]= a;
2030 ((uint32_t*)(src+i*stride))[1]= b;
2034 void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
2038 ((uint32_t*)(src+i*stride))[0]=
2039 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2043 void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
2047 ((uint32_t*)(src+i*stride))[0]=
2048 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2052 void ff_pred8x8_left_dc_c(uint8_t *src, int stride){
2058 dc0+= src[-1+i*stride];
2059 dc2+= src[-1+(i+4)*stride];
2061 dc0= 0x01010101*((dc0 + 2)>>2);
2062 dc2= 0x01010101*((dc2 + 2)>>2);
2065 ((uint32_t*)(src+i*stride))[0]=
2066 ((uint32_t*)(src+i*stride))[1]= dc0;
2069 ((uint32_t*)(src+i*stride))[0]=
2070 ((uint32_t*)(src+i*stride))[1]= dc2;
2074 void ff_pred8x8_top_dc_c(uint8_t *src, int stride){
2080 dc0+= src[i-stride];
2081 dc1+= src[4+i-stride];
2083 dc0= 0x01010101*((dc0 + 2)>>2);
2084 dc1= 0x01010101*((dc1 + 2)>>2);
2087 ((uint32_t*)(src+i*stride))[0]= dc0;
2088 ((uint32_t*)(src+i*stride))[1]= dc1;
2091 ((uint32_t*)(src+i*stride))[0]= dc0;
2092 ((uint32_t*)(src+i*stride))[1]= dc1;
2097 void ff_pred8x8_dc_c(uint8_t *src, int stride){
2099 int dc0, dc1, dc2, dc3;
2103 dc0+= src[-1+i*stride] + src[i-stride];
2104 dc1+= src[4+i-stride];
2105 dc2+= src[-1+(i+4)*stride];
2107 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2108 dc0= 0x01010101*((dc0 + 4)>>3);
2109 dc1= 0x01010101*((dc1 + 2)>>2);
2110 dc2= 0x01010101*((dc2 + 2)>>2);
2113 ((uint32_t*)(src+i*stride))[0]= dc0;
2114 ((uint32_t*)(src+i*stride))[1]= dc1;
2117 ((uint32_t*)(src+i*stride))[0]= dc2;
2118 ((uint32_t*)(src+i*stride))[1]= dc3;
2122 void ff_pred8x8_plane_c(uint8_t *src, int stride){
2125 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2126 const uint8_t * const src0 = src+3-stride;
2127 const uint8_t *src1 = src+4*stride-1;
2128 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2129 int H = src0[1] - src0[-1];
2130 int V = src1[0] - src2[ 0];
2131 for(k=2; k<=4; ++k) {
2132 src1 += stride; src2 -= stride;
2133 H += k*(src0[k] - src0[-k]);
2134 V += k*(src1[0] - src2[ 0]);
2136 H = ( 17*H+16 ) >> 5;
2137 V = ( 17*V+16 ) >> 5;
2139 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2140 for(j=8; j>0; --j) {
2143 src[0] = cm[ (b ) >> 5 ];
2144 src[1] = cm[ (b+ H) >> 5 ];
2145 src[2] = cm[ (b+2*H) >> 5 ];
2146 src[3] = cm[ (b+3*H) >> 5 ];
2147 src[4] = cm[ (b+4*H) >> 5 ];
2148 src[5] = cm[ (b+5*H) >> 5 ];
2149 src[6] = cm[ (b+6*H) >> 5 ];
2150 src[7] = cm[ (b+7*H) >> 5 ];
2155 #define SRC(x,y) src[(x)+(y)*stride]
2157 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2158 #define PREDICT_8x8_LOAD_LEFT \
2159 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2160 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2161 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2162 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2165 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2166 #define PREDICT_8x8_LOAD_TOP \
2167 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2168 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2169 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2170 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2171 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2174 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2175 #define PREDICT_8x8_LOAD_TOPRIGHT \
2176 int t8, t9, t10, t11, t12, t13, t14, t15; \
2177 if(has_topright) { \
2178 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2179 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2180 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2182 #define PREDICT_8x8_LOAD_TOPLEFT \
2183 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2185 #define PREDICT_8x8_DC(v) \
2187 for( y = 0; y < 8; y++ ) { \
2188 ((uint32_t*)src)[0] = \
2189 ((uint32_t*)src)[1] = v; \
2193 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2195 PREDICT_8x8_DC(0x80808080);
2197 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2199 PREDICT_8x8_LOAD_LEFT;
2200 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2203 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2205 PREDICT_8x8_LOAD_TOP;
2206 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2209 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2211 PREDICT_8x8_LOAD_LEFT;
2212 PREDICT_8x8_LOAD_TOP;
2213 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2214 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2217 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2219 PREDICT_8x8_LOAD_LEFT;
2220 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2221 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2222 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2225 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2228 PREDICT_8x8_LOAD_TOP;
2237 for( y = 1; y < 8; y++ )
2238 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2240 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2242 PREDICT_8x8_LOAD_TOP;
2243 PREDICT_8x8_LOAD_TOPRIGHT;
2244 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2245 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2246 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2247 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2248 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2249 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2250 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2251 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2252 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2253 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2254 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2255 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2256 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2257 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2258 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2260 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2262 PREDICT_8x8_LOAD_TOP;
2263 PREDICT_8x8_LOAD_LEFT;
2264 PREDICT_8x8_LOAD_TOPLEFT;
2265 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2266 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2267 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2268 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2269 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2270 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2271 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2272 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2273 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2274 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2275 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2276 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2277 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2278 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2279 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2282 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2284 PREDICT_8x8_LOAD_TOP;
2285 PREDICT_8x8_LOAD_LEFT;
2286 PREDICT_8x8_LOAD_TOPLEFT;
2287 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2288 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2289 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2290 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2291 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2292 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2293 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2294 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2295 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2296 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2297 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2298 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2299 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2300 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2301 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2302 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2303 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2304 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2305 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2306 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2307 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2308 SRC(7,0)= (t6 + t7 + 1) >> 1;
2310 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2312 PREDICT_8x8_LOAD_TOP;
2313 PREDICT_8x8_LOAD_LEFT;
2314 PREDICT_8x8_LOAD_TOPLEFT;
2315 SRC(0,7)= (l6 + l7 + 1) >> 1;
2316 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2317 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2318 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2319 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2320 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2321 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2322 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2323 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2324 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2325 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2326 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2327 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2328 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2329 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2330 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2331 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2332 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2333 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2334 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2335 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2336 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2338 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2340 PREDICT_8x8_LOAD_TOP;
2341 PREDICT_8x8_LOAD_TOPRIGHT;
2342 SRC(0,0)= (t0 + t1 + 1) >> 1;
2343 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2344 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2345 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2346 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2347 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2348 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2349 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2350 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2351 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2352 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2353 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2354 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2355 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2356 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2357 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2358 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2359 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2360 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2361 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2362 SRC(7,6)= (t10 + t11 + 1) >> 1;
2363 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2365 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2367 PREDICT_8x8_LOAD_LEFT;
2368 SRC(0,0)= (l0 + l1 + 1) >> 1;
2369 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2370 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2371 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2372 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2373 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2374 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2375 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2376 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2377 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2378 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2379 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2380 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2381 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2382 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2383 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2384 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2385 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2387 #undef PREDICT_8x8_LOAD_LEFT
2388 #undef PREDICT_8x8_LOAD_TOP
2389 #undef PREDICT_8x8_LOAD_TOPLEFT
2390 #undef PREDICT_8x8_LOAD_TOPRIGHT
2391 #undef PREDICT_8x8_DC
2397 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2398 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2399 int src_x_offset, int src_y_offset,
2400 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2401 MpegEncContext * const s = &h->s;
2402 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2403 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2404 const int luma_xy= (mx&3) + ((my&3)<<2);
2405 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2406 uint8_t * src_cb, * src_cr;
2407 int extra_width= h->emu_edge_width;
2408 int extra_height= h->emu_edge_height;
2410 const int full_mx= mx>>2;
2411 const int full_my= my>>2;
2412 const int pic_width = 16*s->mb_width;
2413 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2415 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
2418 if(mx&7) extra_width -= 3;
2419 if(my&7) extra_height -= 3;
2421 if( full_mx < 0-extra_width
2422 || full_my < 0-extra_height
2423 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2424 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2425 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2426 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2430 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2432 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2435 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
2438 // chroma offset when predicting from a field of opposite parity
2439 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2440 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2442 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2443 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2446 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2447 src_cb= s->edge_emu_buffer;
2449 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2452 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2453 src_cr= s->edge_emu_buffer;
2455 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2458 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2459 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2460 int x_offset, int y_offset,
2461 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2462 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2463 int list0, int list1){
2464 MpegEncContext * const s = &h->s;
2465 qpel_mc_func *qpix_op= qpix_put;
2466 h264_chroma_mc_func chroma_op= chroma_put;
2468 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2469 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2470 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2471 x_offset += 8*s->mb_x;
2472 y_offset += 8*(s->mb_y >> MB_MBAFF);
2475 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2476 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2477 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2478 qpix_op, chroma_op);
2481 chroma_op= chroma_avg;
2485 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2486 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2487 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2488 qpix_op, chroma_op);
2492 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2493 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2494 int x_offset, int y_offset,
2495 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2496 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2497 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2498 int list0, int list1){
2499 MpegEncContext * const s = &h->s;
2501 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2502 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2503 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2504 x_offset += 8*s->mb_x;
2505 y_offset += 8*(s->mb_y >> MB_MBAFF);
2508 /* don't optimize for luma-only case, since B-frames usually
2509 * use implicit weights => chroma too. */
2510 uint8_t *tmp_cb = s->obmc_scratchpad;
2511 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2512 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2513 int refn0 = h->ref_cache[0][ scan8[n] ];
2514 int refn1 = h->ref_cache[1][ scan8[n] ];
2516 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2517 dest_y, dest_cb, dest_cr,
2518 x_offset, y_offset, qpix_put, chroma_put);
2519 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2520 tmp_y, tmp_cb, tmp_cr,
2521 x_offset, y_offset, qpix_put, chroma_put);
2523 if(h->use_weight == 2){
2524 int weight0 = h->implicit_weight[refn0][refn1];
2525 int weight1 = 64 - weight0;
2526 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2527 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2528 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2530 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2531 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2532 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2533 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2534 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2535 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2536 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2537 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2538 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2541 int list = list1 ? 1 : 0;
2542 int refn = h->ref_cache[list][ scan8[n] ];
2543 Picture *ref= &h->ref_list[list][refn];
2544 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2545 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2546 qpix_put, chroma_put);
2548 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2549 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2550 if(h->use_weight_chroma){
2551 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2552 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2553 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2554 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2559 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2560 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2561 int x_offset, int y_offset,
2562 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2563 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2564 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2565 int list0, int list1){
2566 if((h->use_weight==2 && list0 && list1
2567 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2568 || h->use_weight==1)
2569 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2570 x_offset, y_offset, qpix_put, chroma_put,
2571 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2573 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2574 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2577 static inline void prefetch_motion(H264Context *h, int list){
2578 /* fetch pixels for estimated mv 4 macroblocks ahead
2579 * optimized for 64byte cache lines */
2580 MpegEncContext * const s = &h->s;
2581 const int refn = h->ref_cache[list][scan8[0]];
2583 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2584 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2585 uint8_t **src= h->ref_list[list][refn].data;
2586 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2587 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2588 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2589 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2593 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2594 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2595 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2596 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2597 MpegEncContext * const s = &h->s;
2598 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2599 const int mb_type= s->current_picture.mb_type[mb_xy];
2601 assert(IS_INTER(mb_type));
2603 prefetch_motion(h, 0);
2605 if(IS_16X16(mb_type)){
2606 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2607 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2608 &weight_op[0], &weight_avg[0],
2609 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2610 }else if(IS_16X8(mb_type)){
2611 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2612 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2613 &weight_op[1], &weight_avg[1],
2614 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2615 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2616 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2617 &weight_op[1], &weight_avg[1],
2618 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2619 }else if(IS_8X16(mb_type)){
2620 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
2621 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2622 &weight_op[2], &weight_avg[2],
2623 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2624 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
2625 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2626 &weight_op[2], &weight_avg[2],
2627 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2631 assert(IS_8X8(mb_type));
2634 const int sub_mb_type= h->sub_mb_type[i];
2636 int x_offset= (i&1)<<2;
2637 int y_offset= (i&2)<<1;
2639 if(IS_SUB_8X8(sub_mb_type)){
2640 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2641 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2642 &weight_op[3], &weight_avg[3],
2643 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2644 }else if(IS_SUB_8X4(sub_mb_type)){
2645 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2646 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2647 &weight_op[4], &weight_avg[4],
2648 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2649 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2650 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2651 &weight_op[4], &weight_avg[4],
2652 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2653 }else if(IS_SUB_4X8(sub_mb_type)){
2654 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2655 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2656 &weight_op[5], &weight_avg[5],
2657 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2658 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2659 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2660 &weight_op[5], &weight_avg[5],
2661 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2664 assert(IS_SUB_4X4(sub_mb_type));
2666 int sub_x_offset= x_offset + 2*(j&1);
2667 int sub_y_offset= y_offset + (j&2);
2668 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2669 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2670 &weight_op[6], &weight_avg[6],
2671 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2677 prefetch_motion(h, 1);
2680 static void decode_init_vlc(void){
2681 static int done = 0;
2687 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2688 &chroma_dc_coeff_token_len [0], 1, 1,
2689 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2692 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2693 &coeff_token_len [i][0], 1, 1,
2694 &coeff_token_bits[i][0], 1, 1, 1);
2698 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2699 &chroma_dc_total_zeros_len [i][0], 1, 1,
2700 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2702 for(i=0; i<15; i++){
2703 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2704 &total_zeros_len [i][0], 1, 1,
2705 &total_zeros_bits[i][0], 1, 1, 1);
2709 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2710 &run_len [i][0], 1, 1,
2711 &run_bits[i][0], 1, 1, 1);
2713 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2714 &run_len [6][0], 1, 1,
2715 &run_bits[6][0], 1, 1, 1);
2720 * Sets the intra prediction function pointers.
2722 static void init_pred_ptrs(H264Context *h){
2723 // MpegEncContext * const s = &h->s;
2725 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2726 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2727 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2728 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2729 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2730 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2731 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2732 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2733 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2734 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2735 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2736 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2738 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2739 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2740 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2741 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2742 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2743 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2744 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2745 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2746 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2747 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2748 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2749 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2751 h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
2752 h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
2753 h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
2754 h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
2755 h->pred8x8[LEFT_DC_PRED8x8]= ff_pred8x8_left_dc_c;
2756 h->pred8x8[TOP_DC_PRED8x8 ]= ff_pred8x8_top_dc_c;
2757 h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
2759 h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
2760 h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
2761 h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
2762 h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
2763 h->pred16x16[LEFT_DC_PRED8x8]= ff_pred16x16_left_dc_c;
2764 h->pred16x16[TOP_DC_PRED8x8 ]= ff_pred16x16_top_dc_c;
2765 h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
2768 static void free_tables(H264Context *h){
2770 av_freep(&h->intra4x4_pred_mode);
2771 av_freep(&h->chroma_pred_mode_table);
2772 av_freep(&h->cbp_table);
2773 av_freep(&h->mvd_table[0]);
2774 av_freep(&h->mvd_table[1]);
2775 av_freep(&h->direct_table);
2776 av_freep(&h->non_zero_count);
2777 av_freep(&h->slice_table_base);
2778 av_freep(&h->top_borders[1]);
2779 av_freep(&h->top_borders[0]);
2780 h->slice_table= NULL;
2782 av_freep(&h->mb2b_xy);
2783 av_freep(&h->mb2b8_xy);
2785 av_freep(&h->s.obmc_scratchpad);
2787 for(i = 0; i < MAX_SPS_COUNT; i++)
2788 av_freep(h->sps_buffers + i);
2790 for(i = 0; i < MAX_PPS_COUNT; i++)
2791 av_freep(h->pps_buffers + i);
2794 static void init_dequant8_coeff_table(H264Context *h){
2796 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2797 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2798 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2800 for(i=0; i<2; i++ ){
2801 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2802 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2806 for(q=0; q<52; q++){
2807 int shift = ff_div6[q];
2808 int idx = ff_rem6[q];
2810 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2811 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2812 h->pps.scaling_matrix8[i][x]) << shift;
2817 static void init_dequant4_coeff_table(H264Context *h){
2819 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2820 for(i=0; i<6; i++ ){
2821 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2823 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2824 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2831 for(q=0; q<52; q++){
2832 int shift = ff_div6[q] + 2;
2833 int idx = ff_rem6[q];
2835 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2836 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2837 h->pps.scaling_matrix4[i][x]) << shift;
2842 static void init_dequant_tables(H264Context *h){
2844 init_dequant4_coeff_table(h);
2845 if(h->pps.transform_8x8_mode)
2846 init_dequant8_coeff_table(h);
2847 if(h->sps.transform_bypass){
2850 h->dequant4_coeff[i][0][x] = 1<<6;
2851 if(h->pps.transform_8x8_mode)
2854 h->dequant8_coeff[i][0][x] = 1<<6;
2861 * needs width/height
2863 static int alloc_tables(H264Context *h){
2864 MpegEncContext * const s = &h->s;
2865 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2868 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2870 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2871 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2872 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2873 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2874 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2876 if( h->pps.cabac ) {
2877 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2878 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2879 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2880 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2883 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2884 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2886 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2887 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2888 for(y=0; y<s->mb_height; y++){
2889 for(x=0; x<s->mb_width; x++){
2890 const int mb_xy= x + y*s->mb_stride;
2891 const int b_xy = 4*x + 4*y*h->b_stride;
2892 const int b8_xy= 2*x + 2*y*h->b8_stride;
2894 h->mb2b_xy [mb_xy]= b_xy;
2895 h->mb2b8_xy[mb_xy]= b8_xy;
2899 s->obmc_scratchpad = NULL;
2901 if(!h->dequant4_coeff[0])
2902 init_dequant_tables(h);
2910 static void common_init(H264Context *h){
2911 MpegEncContext * const s = &h->s;
2913 s->width = s->avctx->width;
2914 s->height = s->avctx->height;
2915 s->codec_id= s->avctx->codec->id;
2919 h->dequant_coeff_pps= -1;
2920 s->unrestricted_mv=1;
2921 s->decode=1; //FIXME
2923 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2924 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2927 static int decode_init(AVCodecContext *avctx){
2928 H264Context *h= avctx->priv_data;
2929 MpegEncContext * const s = &h->s;
2931 MPV_decode_defaults(s);
2936 s->out_format = FMT_H264;
2937 s->workaround_bugs= avctx->workaround_bugs;
2940 // s->decode_mb= ff_h263_decode_mb;
2941 s->quarter_sample = 1;
2943 avctx->pix_fmt= PIX_FMT_YUV420P;
2947 if(avctx->extradata_size > 0 && avctx->extradata &&
2948 *(char *)avctx->extradata == 1){
2958 static int frame_start(H264Context *h){
2959 MpegEncContext * const s = &h->s;
2962 if(MPV_frame_start(s, s->avctx) < 0)
2964 ff_er_frame_start(s);
2966 assert(s->linesize && s->uvlinesize);
2968 for(i=0; i<16; i++){
2969 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2970 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2973 h->block_offset[16+i]=
2974 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2975 h->block_offset[24+16+i]=
2976 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2979 /* can't be in alloc_tables because linesize isn't known there.
2980 * FIXME: redo bipred weight to not require extra buffer? */
2981 if(!s->obmc_scratchpad)
2982 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2984 /* some macroblocks will be accessed before they're available */
2986 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2988 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2992 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2993 MpegEncContext * const s = &h->s;
2997 src_cb -= uvlinesize;
2998 src_cr -= uvlinesize;
3000 // There are two lines saved, the line above the the top macroblock of a pair,
3001 // and the line above the bottom macroblock
3002 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3003 for(i=1; i<17; i++){
3004 h->left_border[i]= src_y[15+i* linesize];
3007 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3008 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3010 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
3011 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3012 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3014 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3015 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3017 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3018 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3022 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
3023 MpegEncContext * const s = &h->s;
3030 if(h->deblocking_filter == 2) {
3031 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
3032 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
3033 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
3035 deblock_left = (s->mb_x > 0);
3036 deblock_top = (s->mb_y > 0);
3039 src_y -= linesize + 1;
3040 src_cb -= uvlinesize + 1;
3041 src_cr -= uvlinesize + 1;
3043 #define XCHG(a,b,t,xchg)\
3050 for(i = !deblock_top; i<17; i++){
3051 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3056 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3057 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3058 if(s->mb_x+1 < s->mb_width){
3059 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3063 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
3065 for(i = !deblock_top; i<9; i++){
3066 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3067 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3071 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3072 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3077 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3078 MpegEncContext * const s = &h->s;
3081 src_y -= 2 * linesize;
3082 src_cb -= 2 * uvlinesize;
3083 src_cr -= 2 * uvlinesize;
3085 // There are two lines saved, the line above the the top macroblock of a pair,
3086 // and the line above the bottom macroblock
3087 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3088 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3089 for(i=2; i<34; i++){
3090 h->left_border[i]= src_y[15+i* linesize];
3093 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3094 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3095 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3096 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3098 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
3099 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3100 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3101 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3102 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3103 for(i=2; i<18; i++){
3104 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3105 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3107 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3108 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3109 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3110 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3114 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3115 MpegEncContext * const s = &h->s;
3118 int deblock_left = (s->mb_x > 0);
3119 int deblock_top = (s->mb_y > 1);
3121 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3123 src_y -= 2 * linesize + 1;
3124 src_cb -= 2 * uvlinesize + 1;
3125 src_cr -= 2 * uvlinesize + 1;
3127 #define XCHG(a,b,t,xchg)\
3134 for(i = (!deblock_top)<<1; i<34; i++){
3135 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3140 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3141 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3142 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3143 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3144 if(s->mb_x+1 < s->mb_width){
3145 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3146 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3150 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
3152 for(i = (!deblock_top) << 1; i<18; i++){
3153 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3154 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3158 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3159 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3160 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3161 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3166 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
3167 MpegEncContext * const s = &h->s;
3168 const int mb_x= s->mb_x;
3169 const int mb_y= s->mb_y;
3170 const int mb_xy= mb_x + mb_y*s->mb_stride;
3171 const int mb_type= s->current_picture.mb_type[mb_xy];
3172 uint8_t *dest_y, *dest_cb, *dest_cr;
3173 int linesize, uvlinesize /*dct_offset*/;
3175 int *block_offset = &h->block_offset[0];
3176 const unsigned int bottom = mb_y & 1;
3177 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
3178 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3179 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3181 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3182 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3183 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3185 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3186 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3188 if (!simple && MB_FIELD) {
3189 linesize = h->mb_linesize = s->linesize * 2;
3190 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3191 block_offset = &h->block_offset[24];
3192 if(mb_y&1){ //FIXME move out of this func?
3193 dest_y -= s->linesize*15;
3194 dest_cb-= s->uvlinesize*7;
3195 dest_cr-= s->uvlinesize*7;
3199 for(list=0; list<h->list_count; list++){
3200 if(!USES_LIST(mb_type, list))
3202 if(IS_16X16(mb_type)){
3203 int8_t *ref = &h->ref_cache[list][scan8[0]];
3204 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3206 for(i=0; i<16; i+=4){
3207 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3208 int ref = h->ref_cache[list][scan8[i]];
3210 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3216 linesize = h->mb_linesize = s->linesize;
3217 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3218 // dct_offset = s->linesize * 16;
3221 if(transform_bypass){
3223 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3224 }else if(IS_8x8DCT(mb_type)){
3225 idct_dc_add = s->dsp.h264_idct8_dc_add;
3226 idct_add = s->dsp.h264_idct8_add;
3228 idct_dc_add = s->dsp.h264_idct_dc_add;
3229 idct_add = s->dsp.h264_idct_add;
3232 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3233 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3234 int mbt_y = mb_y&~1;
3235 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3236 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3237 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3238 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3241 if (!simple && IS_INTRA_PCM(mb_type)) {
3244 // The pixels are stored in h->mb array in the same order as levels,
3245 // copy them in output in the correct order.
3246 for(i=0; i<16; i++) {
3247 for (y=0; y<4; y++) {
3248 for (x=0; x<4; x++) {
3249 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3253 for(i=16; i<16+4; i++) {
3254 for (y=0; y<4; y++) {
3255 for (x=0; x<4; x++) {
3256 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3260 for(i=20; i<20+4; i++) {
3261 for (y=0; y<4; y++) {
3262 for (x=0; x<4; x++) {
3263 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3268 if(IS_INTRA(mb_type)){
3269 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3270 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
3272 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
3273 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3274 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3277 if(IS_INTRA4x4(mb_type)){
3278 if(simple || !s->encoding){
3279 if(IS_8x8DCT(mb_type)){
3280 for(i=0; i<16; i+=4){
3281 uint8_t * const ptr= dest_y + block_offset[i];
3282 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3283 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3284 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3285 (h->topright_samples_available<<i)&0x4000, linesize);
3287 if(nnz == 1 && h->mb[i*16])
3288 idct_dc_add(ptr, h->mb + i*16, linesize);
3290 idct_add(ptr, h->mb + i*16, linesize);
3294 for(i=0; i<16; i++){
3295 uint8_t * const ptr= dest_y + block_offset[i];
3297 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3300 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3301 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3302 assert(mb_y || linesize <= block_offset[i]);
3303 if(!topright_avail){
3304 tr= ptr[3 - linesize]*0x01010101;
3305 topright= (uint8_t*) &tr;
3307 topright= ptr + 4 - linesize;
3311 h->pred4x4[ dir ](ptr, topright, linesize);
3312 nnz = h->non_zero_count_cache[ scan8[i] ];
3315 if(nnz == 1 && h->mb[i*16])
3316 idct_dc_add(ptr, h->mb + i*16, linesize);
3318 idct_add(ptr, h->mb + i*16, linesize);
3320 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3325 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3327 if(!transform_bypass)
3328 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3330 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3332 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3333 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
3335 hl_motion(h, dest_y, dest_cb, dest_cr,
3336 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3337 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3338 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3342 if(!IS_INTRA4x4(mb_type)){
3344 if(IS_INTRA16x16(mb_type)){
3345 for(i=0; i<16; i++){
3346 if(h->non_zero_count_cache[ scan8[i] ])
3347 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3348 else if(h->mb[i*16])
3349 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3352 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3353 for(i=0; i<16; i+=di){
3354 int nnz = h->non_zero_count_cache[ scan8[i] ];
3356 if(nnz==1 && h->mb[i*16])
3357 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3359 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3364 for(i=0; i<16; i++){
3365 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3366 uint8_t * const ptr= dest_y + block_offset[i];
3367 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3373 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
3374 uint8_t *dest[2] = {dest_cb, dest_cr};
3375 if(transform_bypass){
3376 idct_add = idct_dc_add = s->dsp.add_pixels4;
3378 idct_add = s->dsp.h264_idct_add;
3379 idct_dc_add = s->dsp.h264_idct_dc_add;
3380 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
3381 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
3384 for(i=16; i<16+8; i++){
3385 if(h->non_zero_count_cache[ scan8[i] ])
3386 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3387 else if(h->mb[i*16])
3388 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3391 for(i=16; i<16+8; i++){
3392 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3393 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3394 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3400 if(h->deblocking_filter) {
3401 if (!simple && FRAME_MBAFF) {
3402 //FIXME try deblocking one mb at a time?
3403 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3404 const int mb_y = s->mb_y - 1;
3405 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3406 const int mb_xy= mb_x + mb_y*s->mb_stride;
3407 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3408 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3409 if (!bottom) return;
3410 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3411 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3412 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3414 if(IS_INTRA(mb_type_top | mb_type_bottom))
3415 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3417 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3421 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3422 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3423 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
3424 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
3425 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3428 tprintf(h->s.avctx, "call mbaff filter_mb\n");
3429 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3430 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3431 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3432 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3434 tprintf(h->s.avctx, "call filter_mb\n");
3435 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
3436 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3437 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3443 * Process a macroblock; this case avoids checks for expensive uncommon cases.
3445 static void hl_decode_mb_simple(H264Context *h){
3446 hl_decode_mb_internal(h, 1);
3450 * Process a macroblock; this handles edge cases, such as interlacing.
3452 static void av_noinline hl_decode_mb_complex(H264Context *h){
3453 hl_decode_mb_internal(h, 0);
3456 static void hl_decode_mb(H264Context *h){
3457 MpegEncContext * const s = &h->s;
3458 const int mb_x= s->mb_x;
3459 const int mb_y= s->mb_y;
3460 const int mb_xy= mb_x + mb_y*s->mb_stride;
3461 const int mb_type= s->current_picture.mb_type[mb_xy];
3462 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
3468 hl_decode_mb_complex(h);
3469 else hl_decode_mb_simple(h);
3473 * fills the default_ref_list.
3475 static int fill_default_ref_list(H264Context *h){
3476 MpegEncContext * const s = &h->s;
3478 int smallest_poc_greater_than_current = -1;
3479 Picture sorted_short_ref[32];
3481 if(h->slice_type==B_TYPE){
3485 /* sort frame according to poc in B slice */
3486 for(out_i=0; out_i<h->short_ref_count; out_i++){
3488 int best_poc=INT_MAX;
3490 for(i=0; i<h->short_ref_count; i++){
3491 const int poc= h->short_ref[i]->poc;
3492 if(poc > limit && poc < best_poc){
3498 assert(best_i != INT_MIN);
3501 sorted_short_ref[out_i]= *h->short_ref[best_i];
3502 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3503 if (-1 == smallest_poc_greater_than_current) {
3504 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3505 smallest_poc_greater_than_current = out_i;
3511 if(s->picture_structure == PICT_FRAME){
3512 if(h->slice_type==B_TYPE){
3514 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3516 // find the largest poc
3517 for(list=0; list<2; list++){
3520 int step= list ? -1 : 1;
3522 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3523 while(j<0 || j>= h->short_ref_count){
3524 if(j != -99 && step == (list ? -1 : 1))
3527 j= smallest_poc_greater_than_current + (step>>1);
3529 if(sorted_short_ref[j].reference != 3) continue;
3530 h->default_ref_list[list][index ]= sorted_short_ref[j];
3531 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3534 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3535 if(h->long_ref[i] == NULL) continue;
3536 if(h->long_ref[i]->reference != 3) continue;
3538 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3539 h->default_ref_list[ list ][index++].pic_id= i;;
3542 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3543 // swap the two first elements of L1 when
3544 // L0 and L1 are identical
3545 Picture temp= h->default_ref_list[1][0];
3546 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3547 h->default_ref_list[1][1] = temp;
3550 if(index < h->ref_count[ list ])
3551 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3555 for(i=0; i<h->short_ref_count; i++){
3556 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3557 h->default_ref_list[0][index ]= *h->short_ref[i];
3558 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3560 for(i = 0; i < 16; i++){
3561 if(h->long_ref[i] == NULL) continue;
3562 if(h->long_ref[i]->reference != 3) continue;
3563 h->default_ref_list[0][index ]= *h->long_ref[i];
3564 h->default_ref_list[0][index++].pic_id= i;;
3566 if(index < h->ref_count[0])
3567 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3570 if(h->slice_type==B_TYPE){
3572 //FIXME second field balh
3576 for (i=0; i<h->ref_count[0]; i++) {
3577 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3579 if(h->slice_type==B_TYPE){
3580 for (i=0; i<h->ref_count[1]; i++) {
3581 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3588 static void print_short_term(H264Context *h);
3589 static void print_long_term(H264Context *h);
3591 static int decode_ref_pic_list_reordering(H264Context *h){
3592 MpegEncContext * const s = &h->s;
3595 print_short_term(h);
3597 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3599 for(list=0; list<h->list_count; list++){
3600 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3602 if(get_bits1(&s->gb)){
3603 int pred= h->curr_pic_num;
3605 for(index=0; ; index++){
3606 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3607 unsigned int pic_id;
3609 Picture *ref = NULL;
3611 if(reordering_of_pic_nums_idc==3)
3614 if(index >= h->ref_count[list]){
3615 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3619 if(reordering_of_pic_nums_idc<3){
3620 if(reordering_of_pic_nums_idc<2){
3621 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3623 if(abs_diff_pic_num >= h->max_pic_num){
3624 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3628 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3629 else pred+= abs_diff_pic_num;
3630 pred &= h->max_pic_num - 1;
3632 for(i= h->short_ref_count-1; i>=0; i--){
3633 ref = h->short_ref[i];
3634 assert(ref->reference == 3);
3635 assert(!ref->long_ref);
3636 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3640 ref->pic_id= ref->frame_num;
3642 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3644 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3647 ref = h->long_ref[pic_id];
3649 ref->pic_id= pic_id;
3650 assert(ref->reference == 3);
3651 assert(ref->long_ref);
3659 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3660 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3662 for(i=index; i+1<h->ref_count[list]; i++){
3663 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3666 for(; i > index; i--){
3667 h->ref_list[list][i]= h->ref_list[list][i-1];
3669 h->ref_list[list][index]= *ref;
3672 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3678 for(list=0; list<h->list_count; list++){
3679 for(index= 0; index < h->ref_count[list]; index++){
3680 if(!h->ref_list[list][index].data[0])
3681 h->ref_list[list][index]= s->current_picture;
3685 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3686 direct_dist_scale_factor(h);
3687 direct_ref_list_init(h);
3691 static void fill_mbaff_ref_list(H264Context *h){
3693 for(list=0; list<2; list++){ //FIXME try list_count
3694 for(i=0; i<h->ref_count[list]; i++){
3695 Picture *frame = &h->ref_list[list][i];
3696 Picture *field = &h->ref_list[list][16+2*i];
3699 field[0].linesize[j] <<= 1;
3700 field[1] = field[0];
3702 field[1].data[j] += frame->linesize[j];
3704 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3705 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3707 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3708 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3712 for(j=0; j<h->ref_count[1]; j++){
3713 for(i=0; i<h->ref_count[0]; i++)
3714 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3715 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3716 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3720 static int pred_weight_table(H264Context *h){
3721 MpegEncContext * const s = &h->s;
3723 int luma_def, chroma_def;
3726 h->use_weight_chroma= 0;
3727 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3728 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3729 luma_def = 1<<h->luma_log2_weight_denom;
3730 chroma_def = 1<<h->chroma_log2_weight_denom;
3732 for(list=0; list<2; list++){
3733 for(i=0; i<h->ref_count[list]; i++){
3734 int luma_weight_flag, chroma_weight_flag;
3736 luma_weight_flag= get_bits1(&s->gb);
3737 if(luma_weight_flag){
3738 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3739 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3740 if( h->luma_weight[list][i] != luma_def
3741 || h->luma_offset[list][i] != 0)
3744 h->luma_weight[list][i]= luma_def;
3745 h->luma_offset[list][i]= 0;
3748 chroma_weight_flag= get_bits1(&s->gb);
3749 if(chroma_weight_flag){
3752 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3753 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3754 if( h->chroma_weight[list][i][j] != chroma_def
3755 || h->chroma_offset[list][i][j] != 0)
3756 h->use_weight_chroma= 1;
3761 h->chroma_weight[list][i][j]= chroma_def;
3762 h->chroma_offset[list][i][j]= 0;
3766 if(h->slice_type != B_TYPE) break;
3768 h->use_weight= h->use_weight || h->use_weight_chroma;
3772 static void implicit_weight_table(H264Context *h){
3773 MpegEncContext * const s = &h->s;
3775 int cur_poc = s->current_picture_ptr->poc;
3777 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3778 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3780 h->use_weight_chroma= 0;
3785 h->use_weight_chroma= 2;
3786 h->luma_log2_weight_denom= 5;
3787 h->chroma_log2_weight_denom= 5;
3789 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3790 int poc0 = h->ref_list[0][ref0].poc;
3791 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3792 int poc1 = h->ref_list[1][ref1].poc;
3793 int td = av_clip(poc1 - poc0, -128, 127);
3795 int tb = av_clip(cur_poc - poc0, -128, 127);
3796 int tx = (16384 + (FFABS(td) >> 1)) / td;
3797 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3798 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3799 h->implicit_weight[ref0][ref1] = 32;
3801 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3803 h->implicit_weight[ref0][ref1] = 32;
3808 static inline void unreference_pic(H264Context *h, Picture *pic){
3811 if(pic == h->delayed_output_pic)
3814 for(i = 0; h->delayed_pic[i]; i++)
3815 if(pic == h->delayed_pic[i]){
3823 * instantaneous decoder refresh.
3825 static void idr(H264Context *h){
3828 for(i=0; i<16; i++){
3829 if (h->long_ref[i] != NULL) {
3830 unreference_pic(h, h->long_ref[i]);
3831 h->long_ref[i]= NULL;
3834 h->long_ref_count=0;
3836 for(i=0; i<h->short_ref_count; i++){
3837 unreference_pic(h, h->short_ref[i]);
3838 h->short_ref[i]= NULL;
3840 h->short_ref_count=0;
3843 /* forget old pics after a seek */
3844 static void flush_dpb(AVCodecContext *avctx){
3845 H264Context *h= avctx->priv_data;
3847 for(i=0; i<16; i++) {
3848 if(h->delayed_pic[i])
3849 h->delayed_pic[i]->reference= 0;
3850 h->delayed_pic[i]= NULL;
3852 if(h->delayed_output_pic)
3853 h->delayed_output_pic->reference= 0;
3854 h->delayed_output_pic= NULL;
3856 if(h->s.current_picture_ptr)
3857 h->s.current_picture_ptr->reference= 0;
3862 * @return the removed picture or NULL if an error occurs
3864 static Picture * remove_short(H264Context *h, int frame_num){
3865 MpegEncContext * const s = &h->s;
3868 if(s->avctx->debug&FF_DEBUG_MMCO)
3869 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3871 for(i=0; i<h->short_ref_count; i++){
3872 Picture *pic= h->short_ref[i];
3873 if(s->avctx->debug&FF_DEBUG_MMCO)
3874 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3875 if(pic->frame_num == frame_num){
3876 h->short_ref[i]= NULL;
3877 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3878 h->short_ref_count--;
3887 * @return the removed picture or NULL if an error occurs
3889 static Picture * remove_long(H264Context *h, int i){
3892 pic= h->long_ref[i];
3893 h->long_ref[i]= NULL;
3894 if(pic) h->long_ref_count--;
3900 * print short term list
3902 static void print_short_term(H264Context *h) {
3904 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3905 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3906 for(i=0; i<h->short_ref_count; i++){
3907 Picture *pic= h->short_ref[i];
3908 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3914 * print long term list
3916 static void print_long_term(H264Context *h) {
3918 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3919 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3920 for(i = 0; i < 16; i++){
3921 Picture *pic= h->long_ref[i];
3923 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3930 * Executes the reference picture marking (memory management control operations).
3932 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3933 MpegEncContext * const s = &h->s;
3935 int current_is_long=0;
3938 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3939 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3941 for(i=0; i<mmco_count; i++){
3942 if(s->avctx->debug&FF_DEBUG_MMCO)
3943 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3945 switch(mmco[i].opcode){
3946 case MMCO_SHORT2UNUSED:
3947 pic= remove_short(h, mmco[i].short_frame_num);
3949 unreference_pic(h, pic);
3950 else if(s->avctx->debug&FF_DEBUG_MMCO)
3951 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3953 case MMCO_SHORT2LONG:
3954 pic= remove_long(h, mmco[i].long_index);
3955 if(pic) unreference_pic(h, pic);
3957 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3958 if (h->long_ref[ mmco[i].long_index ]){
3959 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3960 h->long_ref_count++;
3963 case MMCO_LONG2UNUSED:
3964 pic= remove_long(h, mmco[i].long_index);
3966 unreference_pic(h, pic);
3967 else if(s->avctx->debug&FF_DEBUG_MMCO)
3968 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3971 pic= remove_long(h, mmco[i].long_index);
3972 if(pic) unreference_pic(h, pic);
3974 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3975 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3976 h->long_ref_count++;
3980 case MMCO_SET_MAX_LONG:
3981 assert(mmco[i].long_index <= 16);
3982 // just remove the long term which index is greater than new max
3983 for(j = mmco[i].long_index; j<16; j++){
3984 pic = remove_long(h, j);
3985 if (pic) unreference_pic(h, pic);
3989 while(h->short_ref_count){
3990 pic= remove_short(h, h->short_ref[0]->frame_num);
3991 if(pic) unreference_pic(h, pic);
3993 for(j = 0; j < 16; j++) {
3994 pic= remove_long(h, j);
3995 if(pic) unreference_pic(h, pic);
4002 if(!current_is_long){
4003 pic= remove_short(h, s->current_picture_ptr->frame_num);
4005 unreference_pic(h, pic);
4006 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4009 if(h->short_ref_count)
4010 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4012 h->short_ref[0]= s->current_picture_ptr;
4013 h->short_ref[0]->long_ref=0;
4014 h->short_ref_count++;
4017 print_short_term(h);
4022 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
4023 MpegEncContext * const s = &h->s;
4026 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4027 s->broken_link= get_bits1(gb) -1;
4028 h->mmco[0].long_index= get_bits1(gb) - 1; // current_long_term_idx
4029 if(h->mmco[0].long_index == -1)
4032 h->mmco[0].opcode= MMCO_LONG;
4036 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
4037 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4038 MMCOOpcode opcode= get_ue_golomb(gb);
4040 h->mmco[i].opcode= opcode;
4041 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4042 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4043 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4044 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4048 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4049 unsigned int long_index= get_ue_golomb(gb);
4050 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
4051 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4054 h->mmco[i].long_index= long_index;
4057 if(opcode > (unsigned)MMCO_LONG){
4058 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4061 if(opcode == MMCO_END)
4066 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4068 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4069 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4070 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4080 static int init_poc(H264Context *h){
4081 MpegEncContext * const s = &h->s;
4082 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4085 if(h->nal_unit_type == NAL_IDR_SLICE){
4086 h->frame_num_offset= 0;
4088 if(h->frame_num < h->prev_frame_num)
4089 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4091 h->frame_num_offset= h->prev_frame_num_offset;
4094 if(h->sps.poc_type==0){
4095 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4097 if(h->nal_unit_type == NAL_IDR_SLICE){
4102 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4103 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4104 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4105 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4107 h->poc_msb = h->prev_poc_msb;
4108 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4110 field_poc[1] = h->poc_msb + h->poc_lsb;
4111 if(s->picture_structure == PICT_FRAME)
4112 field_poc[1] += h->delta_poc_bottom;
4113 }else if(h->sps.poc_type==1){
4114 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4117 if(h->sps.poc_cycle_length != 0)
4118 abs_frame_num = h->frame_num_offset + h->frame_num;
4122 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4125 expected_delta_per_poc_cycle = 0;
4126 for(i=0; i < h->sps.poc_cycle_length; i++)
4127 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4129 if(abs_frame_num > 0){
4130 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4131 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4133 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4134 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4135 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4139 if(h->nal_ref_idc == 0)
4140 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4142 field_poc[0] = expectedpoc + h->delta_poc[0];
4143 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4145 if(s->picture_structure == PICT_FRAME)
4146 field_poc[1] += h->delta_poc[1];
4149 if(h->nal_unit_type == NAL_IDR_SLICE){
4152 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4153 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4159 if(s->picture_structure != PICT_BOTTOM_FIELD)
4160 s->current_picture_ptr->field_poc[0]= field_poc[0];
4161 if(s->picture_structure != PICT_TOP_FIELD)
4162 s->current_picture_ptr->field_poc[1]= field_poc[1];
4163 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4164 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4171 * initialize scan tables
4173 static void init_scan_tables(H264Context *h){
4174 MpegEncContext * const s = &h->s;
4176 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4177 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4178 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4180 for(i=0; i<16; i++){
4181 #define T(x) (x>>2) | ((x<<2) & 0xF)
4182 h->zigzag_scan[i] = T(zigzag_scan[i]);
4183 h-> field_scan[i] = T( field_scan[i]);
4187 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4188 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4189 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4190 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4191 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4193 for(i=0; i<64; i++){
4194 #define T(x) (x>>3) | ((x&7)<<3)
4195 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4196 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4197 h->field_scan8x8[i] = T(field_scan8x8[i]);
4198 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4202 if(h->sps.transform_bypass){ //FIXME same ugly
4203 h->zigzag_scan_q0 = zigzag_scan;
4204 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4205 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4206 h->field_scan_q0 = field_scan;
4207 h->field_scan8x8_q0 = field_scan8x8;
4208 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4210 h->zigzag_scan_q0 = h->zigzag_scan;
4211 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4212 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4213 h->field_scan_q0 = h->field_scan;
4214 h->field_scan8x8_q0 = h->field_scan8x8;
4215 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4219 * decodes a slice header.
4220 * this will allso call MPV_common_init() and frame_start() as needed
4222 static int decode_slice_header(H264Context *h){
4223 MpegEncContext * const s = &h->s;
4224 unsigned int first_mb_in_slice;
4225 unsigned int pps_id;
4226 int num_ref_idx_active_override_flag;
4227 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4228 unsigned int slice_type, tmp;
4229 int default_ref_list_done = 0;
4231 s->current_picture.reference= h->nal_ref_idc != 0;
4232 s->dropable= h->nal_ref_idc == 0;
4234 first_mb_in_slice= get_ue_golomb(&s->gb);
4236 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
4238 s->current_picture_ptr= NULL;
4241 slice_type= get_ue_golomb(&s->gb);
4243 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4248 h->slice_type_fixed=1;
4250 h->slice_type_fixed=0;
4252 slice_type= slice_type_map[ slice_type ];
4253 if (slice_type == I_TYPE
4254 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4255 default_ref_list_done = 1;
4257 h->slice_type= slice_type;
4259 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4261 pps_id= get_ue_golomb(&s->gb);
4262 if(pps_id>=MAX_PPS_COUNT){
4263 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4266 if(!h->pps_buffers[pps_id]) {
4267 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4270 h->pps= *h->pps_buffers[pps_id];
4272 if(!h->sps_buffers[h->pps.sps_id]) {
4273 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4276 h->sps = *h->sps_buffers[h->pps.sps_id];
4278 if(h->dequant_coeff_pps != pps_id){
4279 h->dequant_coeff_pps = pps_id;
4280 init_dequant_tables(h);
4283 s->mb_width= h->sps.mb_width;
4284 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4286 h->b_stride= s->mb_width*4;
4287 h->b8_stride= s->mb_width*2;
4289 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4290 if(h->sps.frame_mbs_only_flag)
4291 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4293 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4295 if (s->context_initialized
4296 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4300 if (!s->context_initialized) {
4301 if (MPV_common_init(s) < 0)
4304 init_scan_tables(h);
4307 s->avctx->width = s->width;
4308 s->avctx->height = s->height;
4309 s->avctx->sample_aspect_ratio= h->sps.sar;
4310 if(!s->avctx->sample_aspect_ratio.den)
4311 s->avctx->sample_aspect_ratio.den = 1;
4313 if(h->sps.timing_info_present_flag){
4314 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4315 if(h->x264_build > 0 && h->x264_build < 44)
4316 s->avctx->time_base.den *= 2;
4317 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4318 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4322 if(h->slice_num == 0){
4323 if(frame_start(h) < 0)
4327 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4328 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4331 h->mb_aff_frame = 0;
4332 if(h->sps.frame_mbs_only_flag){
4333 s->picture_structure= PICT_FRAME;
4335 if(get_bits1(&s->gb)) { //field_pic_flag
4336 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4337 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4339 s->picture_structure= PICT_FRAME;
4340 h->mb_aff_frame = h->sps.mb_aff;
4343 assert(s->mb_num == s->mb_width * s->mb_height);
4344 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
4345 first_mb_in_slice >= s->mb_num){
4346 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4349 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4350 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4351 assert(s->mb_y < s->mb_height);
4353 if(s->picture_structure==PICT_FRAME){
4354 h->curr_pic_num= h->frame_num;
4355 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4357 h->curr_pic_num= 2*h->frame_num;
4358 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4361 if(h->nal_unit_type == NAL_IDR_SLICE){
4362 get_ue_golomb(&s->gb); /* idr_pic_id */
4365 if(h->sps.poc_type==0){
4366 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4368 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4369 h->delta_poc_bottom= get_se_golomb(&s->gb);
4373 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4374 h->delta_poc[0]= get_se_golomb(&s->gb);
4376 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4377 h->delta_poc[1]= get_se_golomb(&s->gb);
4382 if(h->pps.redundant_pic_cnt_present){
4383 h->redundant_pic_count= get_ue_golomb(&s->gb);
4386 //set defaults, might be overriden a few line later
4387 h->ref_count[0]= h->pps.ref_count[0];
4388 h->ref_count[1]= h->pps.ref_count[1];
4390 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4391 if(h->slice_type == B_TYPE){
4392 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4393 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4394 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4396 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4398 if(num_ref_idx_active_override_flag){
4399 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4400 if(h->slice_type==B_TYPE)
4401 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4403 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4404 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4405 h->ref_count[0]= h->ref_count[1]= 1;
4409 if(h->slice_type == B_TYPE)
4416 if(!default_ref_list_done){
4417 fill_default_ref_list(h);
4420 if(decode_ref_pic_list_reordering(h) < 0)
4423 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4424 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4425 pred_weight_table(h);
4426 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4427 implicit_weight_table(h);
4431 if(s->current_picture.reference)
4432 decode_ref_pic_marking(h, &s->gb);
4435 fill_mbaff_ref_list(h);
4437 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4438 tmp = get_ue_golomb(&s->gb);
4440 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4443 h->cabac_init_idc= tmp;
4446 h->last_qscale_diff = 0;
4447 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4449 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4453 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4454 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4455 //FIXME qscale / qp ... stuff
4456 if(h->slice_type == SP_TYPE){
4457 get_bits1(&s->gb); /* sp_for_switch_flag */
4459 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4460 get_se_golomb(&s->gb); /* slice_qs_delta */
4463 h->deblocking_filter = 1;
4464 h->slice_alpha_c0_offset = 0;
4465 h->slice_beta_offset = 0;
4466 if( h->pps.deblocking_filter_parameters_present ) {
4467 tmp= get_ue_golomb(&s->gb);
4469 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4472 h->deblocking_filter= tmp;
4473 if(h->deblocking_filter < 2)
4474 h->deblocking_filter^= 1; // 1<->0
4476 if( h->deblocking_filter ) {
4477 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4478 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4481 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4482 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4483 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4484 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4485 h->deblocking_filter= 0;
4488 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4489 slice_group_change_cycle= get_bits(&s->gb, ?);
4494 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4495 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4497 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4498 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4500 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4502 av_get_pict_type_char(h->slice_type),
4503 pps_id, h->frame_num,
4504 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4505 h->ref_count[0], h->ref_count[1],
4507 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4509 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4513 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4514 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4515 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4517 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4518 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4527 static inline int get_level_prefix(GetBitContext *gb){
4531 OPEN_READER(re, gb);
4532 UPDATE_CACHE(re, gb);
4533 buf=GET_CACHE(re, gb);
4535 log= 32 - av_log2(buf);
4537 print_bin(buf>>(32-log), log);
4538 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4541 LAST_SKIP_BITS(re, gb, log);
4542 CLOSE_READER(re, gb);
4547 static inline int get_dct8x8_allowed(H264Context *h){
4550 if(!IS_SUB_8X8(h->sub_mb_type[i])
4551 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4558 * decodes a residual block.
4559 * @param n block index
4560 * @param scantable scantable
4561 * @param max_coeff number of coefficients in the block
4562 * @return <0 if an error occured
4564 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4565 MpegEncContext * const s = &h->s;
4566 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4568 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4570 //FIXME put trailing_onex into the context
4572 if(n == CHROMA_DC_BLOCK_INDEX){
4573 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4574 total_coeff= coeff_token>>2;
4576 if(n == LUMA_DC_BLOCK_INDEX){
4577 total_coeff= pred_non_zero_count(h, 0);
4578 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4579 total_coeff= coeff_token>>2;
4581 total_coeff= pred_non_zero_count(h, n);
4582 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4583 total_coeff= coeff_token>>2;
4584 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4588 //FIXME set last_non_zero?
4592 if(total_coeff > (unsigned)max_coeff) {
4593 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4597 trailing_ones= coeff_token&3;
4598 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4599 assert(total_coeff<=16);
4601 for(i=0; i<trailing_ones; i++){
4602 level[i]= 1 - 2*get_bits1(gb);
4606 int level_code, mask;
4607 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4608 int prefix= get_level_prefix(gb);
4610 //first coefficient has suffix_length equal to 0 or 1
4611 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4613 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4615 level_code= (prefix<<suffix_length); //part
4616 }else if(prefix==14){
4618 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4620 level_code= prefix + get_bits(gb, 4); //part
4621 }else if(prefix==15){
4622 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4623 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4625 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4629 if(trailing_ones < 3) level_code += 2;
4634 mask= -(level_code&1);
4635 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4638 //remaining coefficients have suffix_length > 0
4639 for(;i<total_coeff;i++) {
4640 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4641 prefix = get_level_prefix(gb);
4643 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4644 }else if(prefix==15){
4645 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4647 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4650 mask= -(level_code&1);
4651 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4652 if(level_code > suffix_limit[suffix_length])
4657 if(total_coeff == max_coeff)
4660 if(n == CHROMA_DC_BLOCK_INDEX)
4661 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4663 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4666 coeff_num = zeros_left + total_coeff - 1;
4667 j = scantable[coeff_num];
4669 block[j] = level[0];
4670 for(i=1;i<total_coeff;i++) {
4673 else if(zeros_left < 7){
4674 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4676 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4678 zeros_left -= run_before;
4679 coeff_num -= 1 + run_before;
4680 j= scantable[ coeff_num ];
4685 block[j] = (level[0] * qmul[j] + 32)>>6;
4686 for(i=1;i<total_coeff;i++) {
4689 else if(zeros_left < 7){
4690 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4692 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4694 zeros_left -= run_before;
4695 coeff_num -= 1 + run_before;
4696 j= scantable[ coeff_num ];
4698 block[j]= (level[i] * qmul[j] + 32)>>6;
4703 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4710 static void predict_field_decoding_flag(H264Context *h){
4711 MpegEncContext * const s = &h->s;
4712 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4713 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4714 ? s->current_picture.mb_type[mb_xy-1]
4715 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4716 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4718 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4722 * decodes a P_SKIP or B_SKIP macroblock
4724 static void decode_mb_skip(H264Context *h){
4725 MpegEncContext * const s = &h->s;
4726 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4729 memset(h->non_zero_count[mb_xy], 0, 16);
4730 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4733 mb_type|= MB_TYPE_INTERLACED;
4735 if( h->slice_type == B_TYPE )
4737 // just for fill_caches. pred_direct_motion will set the real mb_type
4738 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4740 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4741 pred_direct_motion(h, &mb_type);
4742 mb_type|= MB_TYPE_SKIP;
4747 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4749 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4750 pred_pskip_motion(h, &mx, &my);
4751 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4752 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4755 write_back_motion(h, mb_type);
4756 s->current_picture.mb_type[mb_xy]= mb_type;
4757 s->current_picture.qscale_table[mb_xy]= s->qscale;
4758 h->slice_table[ mb_xy ]= h->slice_num;
4759 h->prev_mb_skipped= 1;
4763 * decodes a macroblock
4764 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4766 static int decode_mb_cavlc(H264Context *h){
4767 MpegEncContext * const s = &h->s;
4768 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4769 int partition_count;
4770 unsigned int mb_type, cbp;
4771 int dct8x8_allowed= h->pps.transform_8x8_mode;
4773 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4775 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4776 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4778 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4779 if(s->mb_skip_run==-1)
4780 s->mb_skip_run= get_ue_golomb(&s->gb);
4782 if (s->mb_skip_run--) {
4783 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4784 if(s->mb_skip_run==0)
4785 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4787 predict_field_decoding_flag(h);
4794 if( (s->mb_y&1) == 0 )
4795 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4797 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4799 h->prev_mb_skipped= 0;
4801 mb_type= get_ue_golomb(&s->gb);
4802 if(h->slice_type == B_TYPE){
4804 partition_count= b_mb_type_info[mb_type].partition_count;
4805 mb_type= b_mb_type_info[mb_type].type;
4808 goto decode_intra_mb;
4810 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4812 partition_count= p_mb_type_info[mb_type].partition_count;
4813 mb_type= p_mb_type_info[mb_type].type;
4816 goto decode_intra_mb;
4819 assert(h->slice_type == I_TYPE);
4822 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4826 cbp= i_mb_type_info[mb_type].cbp;
4827 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4828 mb_type= i_mb_type_info[mb_type].type;
4832 mb_type |= MB_TYPE_INTERLACED;
4834 h->slice_table[ mb_xy ]= h->slice_num;
4836 if(IS_INTRA_PCM(mb_type)){
4839 // We assume these blocks are very rare so we do not optimize it.
4840 align_get_bits(&s->gb);
4842 // The pixels are stored in the same order as levels in h->mb array.
4843 for(y=0; y<16; y++){
4844 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4845 for(x=0; x<16; x++){
4846 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4847 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4851 const int index= 256 + 4*(y&3) + 32*(y>>2);
4853 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4854 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4858 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4860 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4861 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4865 // In deblocking, the quantizer is 0
4866 s->current_picture.qscale_table[mb_xy]= 0;
4867 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4868 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4869 // All coeffs are present
4870 memset(h->non_zero_count[mb_xy], 16, 16);
4872 s->current_picture.mb_type[mb_xy]= mb_type;
4877 h->ref_count[0] <<= 1;
4878 h->ref_count[1] <<= 1;
4881 fill_caches(h, mb_type, 0);
4884 if(IS_INTRA(mb_type)){
4886 // init_top_left_availability(h);
4887 if(IS_INTRA4x4(mb_type)){
4890 if(dct8x8_allowed && get_bits1(&s->gb)){
4891 mb_type |= MB_TYPE_8x8DCT;
4895 // fill_intra4x4_pred_table(h);
4896 for(i=0; i<16; i+=di){
4897 int mode= pred_intra_mode(h, i);
4899 if(!get_bits1(&s->gb)){
4900 const int rem_mode= get_bits(&s->gb, 3);
4901 mode = rem_mode + (rem_mode >= mode);
4905 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4907 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4909 write_back_intra_pred_mode(h);
4910 if( check_intra4x4_pred_mode(h) < 0)
4913 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4914 if(h->intra16x16_pred_mode < 0)
4918 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4921 h->chroma_pred_mode= pred_mode;
4922 }else if(partition_count==4){
4923 int i, j, sub_partition_count[4], list, ref[2][4];
4925 if(h->slice_type == B_TYPE){
4927 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4928 if(h->sub_mb_type[i] >=13){
4929 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4932 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4933 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4935 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4936 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4937 pred_direct_motion(h, &mb_type);
4938 h->ref_cache[0][scan8[4]] =
4939 h->ref_cache[1][scan8[4]] =
4940 h->ref_cache[0][scan8[12]] =
4941 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4944 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4946 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4947 if(h->sub_mb_type[i] >=4){
4948 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4951 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4952 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4956 for(list=0; list<h->list_count; list++){
4957 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4959 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4960 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4961 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4963 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4975 dct8x8_allowed = get_dct8x8_allowed(h);
4977 for(list=0; list<h->list_count; list++){
4979 if(IS_DIRECT(h->sub_mb_type[i])) {
4980 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4983 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4984 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4986 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4987 const int sub_mb_type= h->sub_mb_type[i];
4988 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4989 for(j=0; j<sub_partition_count[i]; j++){
4991 const int index= 4*i + block_width*j;
4992 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4993 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4994 mx += get_se_golomb(&s->gb);
4995 my += get_se_golomb(&s->gb);
4996 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4998 if(IS_SUB_8X8(sub_mb_type)){
5000 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5002 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5003 }else if(IS_SUB_8X4(sub_mb_type)){
5004 mv_cache[ 1 ][0]= mx;
5005 mv_cache[ 1 ][1]= my;
5006 }else if(IS_SUB_4X8(sub_mb_type)){
5007 mv_cache[ 8 ][0]= mx;
5008 mv_cache[ 8 ][1]= my;
5010 mv_cache[ 0 ][0]= mx;
5011 mv_cache[ 0 ][1]= my;
5014 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5020 }else if(IS_DIRECT(mb_type)){
5021 pred_direct_motion(h, &mb_type);
5022 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5024 int list, mx, my, i;
5025 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5026 if(IS_16X16(mb_type)){
5027 for(list=0; list<h->list_count; list++){
5029 if(IS_DIR(mb_type, 0, list)){
5030 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5031 if(val >= h->ref_count[list]){
5032 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5036 val= LIST_NOT_USED&0xFF;
5037 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5039 for(list=0; list<h->list_count; list++){
5041 if(IS_DIR(mb_type, 0, list)){
5042 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5043 mx += get_se_golomb(&s->gb);
5044 my += get_se_golomb(&s->gb);
5045 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5047 val= pack16to32(mx,my);
5050 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
5053 else if(IS_16X8(mb_type)){
5054 for(list=0; list<h->list_count; list++){
5057 if(IS_DIR(mb_type, i, list)){
5058 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5059 if(val >= h->ref_count[list]){
5060 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5064 val= LIST_NOT_USED&0xFF;
5065 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5068 for(list=0; list<h->list_count; list++){
5071 if(IS_DIR(mb_type, i, list)){
5072 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5073 mx += get_se_golomb(&s->gb);
5074 my += get_se_golomb(&s->gb);
5075 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5077 val= pack16to32(mx,my);
5080 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
5084 assert(IS_8X16(mb_type));
5085 for(list=0; list<h->list_count; list++){
5088 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5089 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5090 if(val >= h->ref_count[list]){
5091 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5095 val= LIST_NOT_USED&0xFF;
5096 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5099 for(list=0; list<h->list_count; list++){
5102 if(IS_DIR(mb_type, i, list)){
5103 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5104 mx += get_se_golomb(&s->gb);
5105 my += get_se_golomb(&s->gb);
5106 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5108 val= pack16to32(mx,my);
5111 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
5117 if(IS_INTER(mb_type))
5118 write_back_motion(h, mb_type);
5120 if(!IS_INTRA16x16(mb_type)){
5121 cbp= get_ue_golomb(&s->gb);
5123 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
5127 if(IS_INTRA4x4(mb_type))
5128 cbp= golomb_to_intra4x4_cbp[cbp];
5130 cbp= golomb_to_inter_cbp[cbp];
5134 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5135 if(get_bits1(&s->gb))
5136 mb_type |= MB_TYPE_8x8DCT;
5138 s->current_picture.mb_type[mb_xy]= mb_type;
5140 if(cbp || IS_INTRA16x16(mb_type)){
5141 int i8x8, i4x4, chroma_idx;
5143 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5144 const uint8_t *scan, *scan8x8, *dc_scan;
5146 // fill_non_zero_count_cache(h);
5148 if(IS_INTERLACED(mb_type)){
5149 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5150 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5151 dc_scan= luma_dc_field_scan;
5153 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5154 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5155 dc_scan= luma_dc_zigzag_scan;
5158 dquant= get_se_golomb(&s->gb);
5160 if( dquant > 25 || dquant < -26 ){
5161 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5165 s->qscale += dquant;
5166 if(((unsigned)s->qscale) > 51){
5167 if(s->qscale<0) s->qscale+= 52;
5168 else s->qscale-= 52;
5171 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
5172 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
5173 if(IS_INTRA16x16(mb_type)){
5174 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5175 return -1; //FIXME continue if partitioned and other return -1 too
5178 assert((cbp&15) == 0 || (cbp&15) == 15);
5181 for(i8x8=0; i8x8<4; i8x8++){
5182 for(i4x4=0; i4x4<4; i4x4++){
5183 const int index= i4x4 + 4*i8x8;
5184 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5190 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5193 for(i8x8=0; i8x8<4; i8x8++){
5194 if(cbp & (1<<i8x8)){
5195 if(IS_8x8DCT(mb_type)){
5196 DCTELEM *buf = &h->mb[64*i8x8];
5198 for(i4x4=0; i4x4<4; i4x4++){
5199 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5200 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5203 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5204 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5206 for(i4x4=0; i4x4<4; i4x4++){
5207 const int index= i4x4 + 4*i8x8;
5209 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5215 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5216 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5222 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5223 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5229 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5230 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
5231 for(i4x4=0; i4x4<4; i4x4++){
5232 const int index= 16 + 4*chroma_idx + i4x4;
5233 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
5239 uint8_t * const nnz= &h->non_zero_count_cache[0];
5240 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5241 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5244 uint8_t * const nnz= &h->non_zero_count_cache[0];
5245 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5246 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5247 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5249 s->current_picture.qscale_table[mb_xy]= s->qscale;
5250 write_back_non_zero_count(h);
5253 h->ref_count[0] >>= 1;
5254 h->ref_count[1] >>= 1;
5260 static int decode_cabac_field_decoding_flag(H264Context *h) {
5261 MpegEncContext * const s = &h->s;
5262 const int mb_x = s->mb_x;
5263 const int mb_y = s->mb_y & ~1;
5264 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5265 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5267 unsigned int ctx = 0;
5269 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5272 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5276 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5279 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5280 uint8_t *state= &h->cabac_state[ctx_base];
5284 MpegEncContext * const s = &h->s;
5285 const int mba_xy = h->left_mb_xy[0];
5286 const int mbb_xy = h->top_mb_xy;
5288 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5290 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5292 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5293 return 0; /* I4x4 */
5296 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5297 return 0; /* I4x4 */
5300 if( get_cabac_terminate( &h->cabac ) )
5301 return 25; /* PCM */
5303 mb_type = 1; /* I16x16 */
5304 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5305 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5306 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5307 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5308 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5312 static int decode_cabac_mb_type( H264Context *h ) {
5313 MpegEncContext * const s = &h->s;
5315 if( h->slice_type == I_TYPE ) {
5316 return decode_cabac_intra_mb_type(h, 3, 1);
5317 } else if( h->slice_type == P_TYPE ) {
5318 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5320 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5321 /* P_L0_D16x16, P_8x8 */
5322 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5324 /* P_L0_D8x16, P_L0_D16x8 */
5325 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5328 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5330 } else if( h->slice_type == B_TYPE ) {
5331 const int mba_xy = h->left_mb_xy[0];
5332 const int mbb_xy = h->top_mb_xy;
5336 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5338 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5341 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5342 return 0; /* B_Direct_16x16 */
5344 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5345 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5348 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5349 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5350 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5351 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5353 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5354 else if( bits == 13 ) {
5355 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5356 } else if( bits == 14 )
5357 return 11; /* B_L1_L0_8x16 */
5358 else if( bits == 15 )
5359 return 22; /* B_8x8 */
5361 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5362 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5364 /* TODO SI/SP frames? */
5369 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5370 MpegEncContext * const s = &h->s;
5374 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5375 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5378 && h->slice_table[mba_xy] == h->slice_num
5379 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5380 mba_xy += s->mb_stride;
5382 mbb_xy = mb_xy - s->mb_stride;
5384 && h->slice_table[mbb_xy] == h->slice_num
5385 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5386 mbb_xy -= s->mb_stride;
5388 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5390 int mb_xy = mb_x + mb_y*s->mb_stride;
5392 mbb_xy = mb_xy - s->mb_stride;
5395 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5397 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5400 if( h->slice_type == B_TYPE )
5402 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5405 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5408 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5411 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5412 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5413 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5415 if( mode >= pred_mode )
5421 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5422 const int mba_xy = h->left_mb_xy[0];
5423 const int mbb_xy = h->top_mb_xy;
5427 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5428 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5431 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5434 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5437 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5439 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5445 static const uint8_t block_idx_x[16] = {
5446 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5448 static const uint8_t block_idx_y[16] = {
5449 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5451 static const uint8_t block_idx_xy[4][4] = {
5458 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5463 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5465 tprintf(h->s.avctx, "cbp_b = top_cbp = %x\n", cbp_b);
5468 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5473 x = block_idx_x[4*i8x8];
5474 y = block_idx_y[4*i8x8];
5478 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5479 cbp_a = h->left_cbp;
5480 tprintf(h->s.avctx, "cbp_a = left_cbp = %x\n", cbp_a);
5486 /* No need to test for skip as we put 0 for skip block */
5487 /* No need to test for IPCM as we put 1 for IPCM block */
5489 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5490 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5495 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5496 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5500 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5506 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5510 cbp_a = (h->left_cbp>>4)&0x03;
5511 cbp_b = (h-> top_cbp>>4)&0x03;
5514 if( cbp_a > 0 ) ctx++;
5515 if( cbp_b > 0 ) ctx += 2;
5516 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5520 if( cbp_a == 2 ) ctx++;
5521 if( cbp_b == 2 ) ctx += 2;
5522 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5524 static int decode_cabac_mb_dqp( H264Context *h) {
5525 MpegEncContext * const s = &h->s;
5531 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5533 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5535 if( h->last_qscale_diff != 0 )
5538 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5544 if(val > 102) //prevent infinite loop
5551 return -(val + 1)/2;
5553 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5554 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5556 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5558 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5562 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5564 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5565 return 0; /* B_Direct_8x8 */
5566 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5567 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5569 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5570 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5571 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5574 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5575 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5579 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5580 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5583 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5584 int refa = h->ref_cache[list][scan8[n] - 1];
5585 int refb = h->ref_cache[list][scan8[n] - 8];
5589 if( h->slice_type == B_TYPE) {
5590 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5592 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5601 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5607 if(ref >= 32 /*h->ref_list[list]*/){
5608 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5609 return 0; //FIXME we should return -1 and check the return everywhere
5615 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5616 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5617 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5618 int ctxbase = (l == 0) ? 40 : 47;
5623 else if( amvd > 32 )
5628 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5633 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5641 while( get_cabac_bypass( &h->cabac ) ) {
5645 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5650 if( get_cabac_bypass( &h->cabac ) )
5654 return get_cabac_bypass_sign( &h->cabac, -mvd );
5657 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5662 nza = h->left_cbp&0x100;
5663 nzb = h-> top_cbp&0x100;
5664 } else if( cat == 1 || cat == 2 ) {
5665 nza = h->non_zero_count_cache[scan8[idx] - 1];
5666 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5667 } else if( cat == 3 ) {
5668 nza = (h->left_cbp>>(6+idx))&0x01;
5669 nzb = (h-> top_cbp>>(6+idx))&0x01;
5672 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5673 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5682 return ctx + 4 * cat;
5685 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5686 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5687 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5688 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5689 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5692 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5693 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5694 static const int significant_coeff_flag_offset[2][6] = {
5695 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5696 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5698 static const int last_coeff_flag_offset[2][6] = {
5699 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5700 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5702 static const int coeff_abs_level_m1_offset[6] = {
5703 227+0, 227+10, 227+20, 227+30, 227+39, 426
5705 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5706 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5707 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5708 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5709 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5710 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5711 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5712 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5713 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5719 int coeff_count = 0;
5722 int abslevelgt1 = 0;
5724 uint8_t *significant_coeff_ctx_base;
5725 uint8_t *last_coeff_ctx_base;
5726 uint8_t *abs_level_m1_ctx_base;
5729 #define CABAC_ON_STACK
5731 #ifdef CABAC_ON_STACK
5734 cc.range = h->cabac.range;
5735 cc.low = h->cabac.low;
5736 cc.bytestream= h->cabac.bytestream;
5738 #define CC &h->cabac
5742 /* cat: 0-> DC 16x16 n = 0
5743 * 1-> AC 16x16 n = luma4x4idx
5744 * 2-> Luma4x4 n = luma4x4idx
5745 * 3-> DC Chroma n = iCbCr
5746 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5747 * 5-> Luma8x8 n = 4 * luma8x8idx
5750 /* read coded block flag */
5752 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5753 if( cat == 1 || cat == 2 )
5754 h->non_zero_count_cache[scan8[n]] = 0;
5756 h->non_zero_count_cache[scan8[16+n]] = 0;
5757 #ifdef CABAC_ON_STACK
5758 h->cabac.range = cc.range ;
5759 h->cabac.low = cc.low ;
5760 h->cabac.bytestream= cc.bytestream;
5766 significant_coeff_ctx_base = h->cabac_state
5767 + significant_coeff_flag_offset[MB_FIELD][cat];
5768 last_coeff_ctx_base = h->cabac_state
5769 + last_coeff_flag_offset[MB_FIELD][cat];
5770 abs_level_m1_ctx_base = h->cabac_state
5771 + coeff_abs_level_m1_offset[cat];
5774 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5775 for(last= 0; last < coefs; last++) { \
5776 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5777 if( get_cabac( CC, sig_ctx )) { \
5778 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5779 index[coeff_count++] = last; \
5780 if( get_cabac( CC, last_ctx ) ) { \
5786 if( last == max_coeff -1 ) {\
5787 index[coeff_count++] = last;\
5789 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5790 #if defined(ARCH_X86) && defined(CONFIG_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5791 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5793 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5795 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5797 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5800 assert(coeff_count > 0);
5803 h->cbp_table[mb_xy] |= 0x100;
5804 else if( cat == 1 || cat == 2 )
5805 h->non_zero_count_cache[scan8[n]] = coeff_count;
5807 h->cbp_table[mb_xy] |= 0x40 << n;
5809 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5812 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5815 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5816 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5817 int j= scantable[index[coeff_count]];
5819 if( get_cabac( CC, ctx ) == 0 ) {
5821 block[j] = get_cabac_bypass_sign( CC, -1);
5823 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5829 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5830 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5834 if( coeff_abs >= 15 ) {
5836 while( get_cabac_bypass( CC ) ) {
5842 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5848 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5849 else block[j] = coeff_abs;
5851 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5852 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5858 #ifdef CABAC_ON_STACK
5859 h->cabac.range = cc.range ;
5860 h->cabac.low = cc.low ;
5861 h->cabac.bytestream= cc.bytestream;
5866 static inline void compute_mb_neighbors(H264Context *h)
5868 MpegEncContext * const s = &h->s;
5869 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5870 h->top_mb_xy = mb_xy - s->mb_stride;
5871 h->left_mb_xy[0] = mb_xy - 1;
5873 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5874 const int top_pair_xy = pair_xy - s->mb_stride;
5875 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5876 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5877 const int curr_mb_frame_flag = !MB_FIELD;
5878 const int bottom = (s->mb_y & 1);
5880 ? !curr_mb_frame_flag // bottom macroblock
5881 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5883 h->top_mb_xy -= s->mb_stride;
5885 if (left_mb_frame_flag != curr_mb_frame_flag) {
5886 h->left_mb_xy[0] = pair_xy - 1;
5893 * decodes a macroblock
5894 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5896 static int decode_mb_cabac(H264Context *h) {
5897 MpegEncContext * const s = &h->s;
5898 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5899 int mb_type, partition_count, cbp = 0;
5900 int dct8x8_allowed= h->pps.transform_8x8_mode;
5902 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5904 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5905 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5907 /* a skipped mb needs the aff flag from the following mb */
5908 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5909 predict_field_decoding_flag(h);
5910 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5911 skip = h->next_mb_skipped;
5913 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5914 /* read skip flags */
5916 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5917 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5918 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5919 if(h->next_mb_skipped)
5920 predict_field_decoding_flag(h);
5922 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5927 h->cbp_table[mb_xy] = 0;
5928 h->chroma_pred_mode_table[mb_xy] = 0;
5929 h->last_qscale_diff = 0;
5936 if( (s->mb_y&1) == 0 )
5938 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5940 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5942 h->prev_mb_skipped = 0;
5944 compute_mb_neighbors(h);
5945 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5946 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5950 if( h->slice_type == B_TYPE ) {
5952 partition_count= b_mb_type_info[mb_type].partition_count;
5953 mb_type= b_mb_type_info[mb_type].type;
5956 goto decode_intra_mb;
5958 } else if( h->slice_type == P_TYPE ) {
5960 partition_count= p_mb_type_info[mb_type].partition_count;
5961 mb_type= p_mb_type_info[mb_type].type;
5964 goto decode_intra_mb;
5967 assert(h->slice_type == I_TYPE);
5969 partition_count = 0;
5970 cbp= i_mb_type_info[mb_type].cbp;
5971 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5972 mb_type= i_mb_type_info[mb_type].type;
5975 mb_type |= MB_TYPE_INTERLACED;
5977 h->slice_table[ mb_xy ]= h->slice_num;
5979 if(IS_INTRA_PCM(mb_type)) {
5983 // We assume these blocks are very rare so we do not optimize it.
5984 // FIXME The two following lines get the bitstream position in the cabac
5985 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5986 ptr= h->cabac.bytestream;
5987 if(h->cabac.low&0x1) ptr--;
5989 if(h->cabac.low&0x1FF) ptr--;
5992 // The pixels are stored in the same order as levels in h->mb array.
5993 for(y=0; y<16; y++){
5994 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5995 for(x=0; x<16; x++){
5996 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5997 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6001 const int index= 256 + 4*(y&3) + 32*(y>>2);
6003 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6004 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6008 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6010 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6011 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6015 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6017 // All blocks are present
6018 h->cbp_table[mb_xy] = 0x1ef;
6019 h->chroma_pred_mode_table[mb_xy] = 0;
6020 // In deblocking, the quantizer is 0
6021 s->current_picture.qscale_table[mb_xy]= 0;
6022 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
6023 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
6024 // All coeffs are present
6025 memset(h->non_zero_count[mb_xy], 16, 16);
6026 s->current_picture.mb_type[mb_xy]= mb_type;
6031 h->ref_count[0] <<= 1;
6032 h->ref_count[1] <<= 1;
6035 fill_caches(h, mb_type, 0);
6037 if( IS_INTRA( mb_type ) ) {
6039 if( IS_INTRA4x4( mb_type ) ) {
6040 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6041 mb_type |= MB_TYPE_8x8DCT;
6042 for( i = 0; i < 16; i+=4 ) {
6043 int pred = pred_intra_mode( h, i );
6044 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6045 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6048 for( i = 0; i < 16; i++ ) {
6049 int pred = pred_intra_mode( h, i );
6050 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6052 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6055 write_back_intra_pred_mode(h);
6056 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6058 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6059 if( h->intra16x16_pred_mode < 0 ) return -1;
6061 h->chroma_pred_mode_table[mb_xy] =
6062 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6064 pred_mode= check_intra_pred_mode( h, pred_mode );
6065 if( pred_mode < 0 ) return -1;
6066 h->chroma_pred_mode= pred_mode;
6067 } else if( partition_count == 4 ) {
6068 int i, j, sub_partition_count[4], list, ref[2][4];
6070 if( h->slice_type == B_TYPE ) {
6071 for( i = 0; i < 4; i++ ) {
6072 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6073 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6074 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6076 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6077 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6078 pred_direct_motion(h, &mb_type);
6079 h->ref_cache[0][scan8[4]] =
6080 h->ref_cache[1][scan8[4]] =
6081 h->ref_cache[0][scan8[12]] =
6082 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
6083 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6084 for( i = 0; i < 4; i++ )
6085 if( IS_DIRECT(h->sub_mb_type[i]) )
6086 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6090 for( i = 0; i < 4; i++ ) {
6091 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6092 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6093 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6097 for( list = 0; list < h->list_count; list++ ) {
6098 for( i = 0; i < 4; i++ ) {
6099 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6100 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6101 if( h->ref_count[list] > 1 )
6102 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6108 h->ref_cache[list][ scan8[4*i]+1 ]=
6109 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6114 dct8x8_allowed = get_dct8x8_allowed(h);
6116 for(list=0; list<h->list_count; list++){
6118 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6119 if(IS_DIRECT(h->sub_mb_type[i])){
6120 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6124 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6125 const int sub_mb_type= h->sub_mb_type[i];
6126 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6127 for(j=0; j<sub_partition_count[i]; j++){
6130 const int index= 4*i + block_width*j;
6131 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6132 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6133 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6135 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6136 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6137 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6139 if(IS_SUB_8X8(sub_mb_type)){
6141 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6143 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6146 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6148 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6149 }else if(IS_SUB_8X4(sub_mb_type)){
6150 mv_cache[ 1 ][0]= mx;
6151 mv_cache[ 1 ][1]= my;
6153 mvd_cache[ 1 ][0]= mx - mpx;
6154 mvd_cache[ 1 ][1]= my - mpy;
6155 }else if(IS_SUB_4X8(sub_mb_type)){
6156 mv_cache[ 8 ][0]= mx;
6157 mv_cache[ 8 ][1]= my;
6159 mvd_cache[ 8 ][0]= mx - mpx;
6160 mvd_cache[ 8 ][1]= my - mpy;
6162 mv_cache[ 0 ][0]= mx;
6163 mv_cache[ 0 ][1]= my;
6165 mvd_cache[ 0 ][0]= mx - mpx;
6166 mvd_cache[ 0 ][1]= my - mpy;
6169 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6170 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6171 p[0] = p[1] = p[8] = p[9] = 0;
6172 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6176 } else if( IS_DIRECT(mb_type) ) {
6177 pred_direct_motion(h, &mb_type);
6178 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6179 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6180 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6182 int list, mx, my, i, mpx, mpy;
6183 if(IS_16X16(mb_type)){
6184 for(list=0; list<h->list_count; list++){
6185 if(IS_DIR(mb_type, 0, list)){
6186 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6187 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6189 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
6191 for(list=0; list<h->list_count; list++){
6192 if(IS_DIR(mb_type, 0, list)){
6193 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6195 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6196 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6197 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6199 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6200 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6202 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6205 else if(IS_16X8(mb_type)){
6206 for(list=0; list<h->list_count; list++){
6208 if(IS_DIR(mb_type, i, list)){
6209 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6210 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6212 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6215 for(list=0; list<h->list_count; list++){
6217 if(IS_DIR(mb_type, i, list)){
6218 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6219 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6220 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6221 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6223 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6224 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6226 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6227 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6232 assert(IS_8X16(mb_type));
6233 for(list=0; list<h->list_count; list++){
6235 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6236 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6237 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6239 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6242 for(list=0; list<h->list_count; list++){
6244 if(IS_DIR(mb_type, i, list)){
6245 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6246 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6247 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6249 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6250 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6251 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6253 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6254 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6261 if( IS_INTER( mb_type ) ) {
6262 h->chroma_pred_mode_table[mb_xy] = 0;
6263 write_back_motion( h, mb_type );
6266 if( !IS_INTRA16x16( mb_type ) ) {
6267 cbp = decode_cabac_mb_cbp_luma( h );
6268 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6271 h->cbp_table[mb_xy] = h->cbp = cbp;
6273 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6274 if( decode_cabac_mb_transform_size( h ) )
6275 mb_type |= MB_TYPE_8x8DCT;
6277 s->current_picture.mb_type[mb_xy]= mb_type;
6279 if( cbp || IS_INTRA16x16( mb_type ) ) {
6280 const uint8_t *scan, *scan8x8, *dc_scan;
6283 if(IS_INTERLACED(mb_type)){
6284 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6285 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6286 dc_scan= luma_dc_field_scan;
6288 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6289 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6290 dc_scan= luma_dc_zigzag_scan;
6293 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6294 if( dqp == INT_MIN ){
6295 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6299 if(((unsigned)s->qscale) > 51){
6300 if(s->qscale<0) s->qscale+= 52;
6301 else s->qscale-= 52;
6303 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6304 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6306 if( IS_INTRA16x16( mb_type ) ) {
6308 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6309 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6312 for( i = 0; i < 16; i++ ) {
6313 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6314 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6318 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6322 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6323 if( cbp & (1<<i8x8) ) {
6324 if( IS_8x8DCT(mb_type) ) {
6325 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6326 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6329 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6330 const int index = 4*i8x8 + i4x4;
6331 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6333 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6335 //STOP_TIMER("decode_residual")
6338 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6339 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6346 for( c = 0; c < 2; c++ ) {
6347 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6348 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6355 for( c = 0; c < 2; c++ ) {
6356 const uint32_t *qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6357 for( i = 0; i < 4; i++ ) {
6358 const int index = 16 + 4 * c + i;
6359 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6360 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15) < 0)
6365 uint8_t * const nnz= &h->non_zero_count_cache[0];
6366 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6367 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6370 uint8_t * const nnz= &h->non_zero_count_cache[0];
6371 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6372 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6373 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6374 h->last_qscale_diff = 0;
6377 s->current_picture.qscale_table[mb_xy]= s->qscale;
6378 write_back_non_zero_count(h);
6381 h->ref_count[0] >>= 1;
6382 h->ref_count[1] >>= 1;
6389 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6391 const int index_a = qp + h->slice_alpha_c0_offset;
6392 const int alpha = (alpha_table+52)[index_a];
6393 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6398 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6399 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6401 /* 16px edge length, because bS=4 is triggered by being at
6402 * the edge of an intra MB, so all 4 bS are the same */
6403 for( d = 0; d < 16; d++ ) {
6404 const int p0 = pix[-1];
6405 const int p1 = pix[-2];
6406 const int p2 = pix[-3];
6408 const int q0 = pix[0];
6409 const int q1 = pix[1];
6410 const int q2 = pix[2];
6412 if( FFABS( p0 - q0 ) < alpha &&
6413 FFABS( p1 - p0 ) < beta &&
6414 FFABS( q1 - q0 ) < beta ) {
6416 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6417 if( FFABS( p2 - p0 ) < beta)
6419 const int p3 = pix[-4];
6421 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6422 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6423 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6426 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6428 if( FFABS( q2 - q0 ) < beta)
6430 const int q3 = pix[3];
6432 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6433 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6434 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6437 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6441 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6442 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6444 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6450 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6452 const int index_a = qp + h->slice_alpha_c0_offset;
6453 const int alpha = (alpha_table+52)[index_a];
6454 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6459 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6460 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6462 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6466 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6468 for( i = 0; i < 16; i++, pix += stride) {
6474 int bS_index = (i >> 1);
6477 bS_index |= (i & 1);
6480 if( bS[bS_index] == 0 ) {
6484 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6485 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6486 alpha = (alpha_table+52)[index_a];
6487 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6489 if( bS[bS_index] < 4 ) {
6490 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6491 const int p0 = pix[-1];
6492 const int p1 = pix[-2];
6493 const int p2 = pix[-3];
6494 const int q0 = pix[0];
6495 const int q1 = pix[1];
6496 const int q2 = pix[2];
6498 if( FFABS( p0 - q0 ) < alpha &&
6499 FFABS( p1 - p0 ) < beta &&
6500 FFABS( q1 - q0 ) < beta ) {
6504 if( FFABS( p2 - p0 ) < beta ) {
6505 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6508 if( FFABS( q2 - q0 ) < beta ) {
6509 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6513 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6514 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6515 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6516 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6519 const int p0 = pix[-1];
6520 const int p1 = pix[-2];
6521 const int p2 = pix[-3];
6523 const int q0 = pix[0];
6524 const int q1 = pix[1];
6525 const int q2 = pix[2];
6527 if( FFABS( p0 - q0 ) < alpha &&
6528 FFABS( p1 - p0 ) < beta &&
6529 FFABS( q1 - q0 ) < beta ) {
6531 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6532 if( FFABS( p2 - p0 ) < beta)
6534 const int p3 = pix[-4];
6536 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6537 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6538 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6541 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6543 if( FFABS( q2 - q0 ) < beta)
6545 const int q3 = pix[3];
6547 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6548 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6549 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6552 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6556 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6557 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6559 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6564 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6566 for( i = 0; i < 8; i++, pix += stride) {
6574 if( bS[bS_index] == 0 ) {
6578 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6579 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6580 alpha = (alpha_table+52)[index_a];
6581 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6583 if( bS[bS_index] < 4 ) {
6584 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6585 const int p0 = pix[-1];
6586 const int p1 = pix[-2];
6587 const int q0 = pix[0];
6588 const int q1 = pix[1];
6590 if( FFABS( p0 - q0 ) < alpha &&
6591 FFABS( p1 - p0 ) < beta &&
6592 FFABS( q1 - q0 ) < beta ) {
6593 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6595 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6596 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6597 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6600 const int p0 = pix[-1];
6601 const int p1 = pix[-2];
6602 const int q0 = pix[0];
6603 const int q1 = pix[1];
6605 if( FFABS( p0 - q0 ) < alpha &&
6606 FFABS( p1 - p0 ) < beta &&
6607 FFABS( q1 - q0 ) < beta ) {
6609 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6610 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6611 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6617 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6619 const int index_a = qp + h->slice_alpha_c0_offset;
6620 const int alpha = (alpha_table+52)[index_a];
6621 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6622 const int pix_next = stride;
6627 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6628 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6630 /* 16px edge length, see filter_mb_edgev */
6631 for( d = 0; d < 16; d++ ) {
6632 const int p0 = pix[-1*pix_next];
6633 const int p1 = pix[-2*pix_next];
6634 const int p2 = pix[-3*pix_next];
6635 const int q0 = pix[0];
6636 const int q1 = pix[1*pix_next];
6637 const int q2 = pix[2*pix_next];
6639 if( FFABS( p0 - q0 ) < alpha &&
6640 FFABS( p1 - p0 ) < beta &&
6641 FFABS( q1 - q0 ) < beta ) {
6643 const int p3 = pix[-4*pix_next];
6644 const int q3 = pix[ 3*pix_next];
6646 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6647 if( FFABS( p2 - p0 ) < beta) {
6649 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6650 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6651 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6654 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6656 if( FFABS( q2 - q0 ) < beta) {
6658 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6659 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6660 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6663 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6667 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6668 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6670 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6677 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6679 const int index_a = qp + h->slice_alpha_c0_offset;
6680 const int alpha = (alpha_table+52)[index_a];
6681 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6686 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6687 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6689 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6693 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6694 MpegEncContext * const s = &h->s;
6696 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6698 mb_xy = mb_x + mb_y*s->mb_stride;
6700 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6701 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6702 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6703 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6706 assert(!FRAME_MBAFF);
6708 mb_type = s->current_picture.mb_type[mb_xy];
6709 qp = s->current_picture.qscale_table[mb_xy];
6710 qp0 = s->current_picture.qscale_table[mb_xy-1];
6711 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6712 qpc = get_chroma_qp( h, 0, qp );
6713 qpc0 = get_chroma_qp( h, 0, qp0 );
6714 qpc1 = get_chroma_qp( h, 0, qp1 );
6715 qp0 = (qp + qp0 + 1) >> 1;
6716 qp1 = (qp + qp1 + 1) >> 1;
6717 qpc0 = (qpc + qpc0 + 1) >> 1;
6718 qpc1 = (qpc + qpc1 + 1) >> 1;
6719 qp_thresh = 15 - h->slice_alpha_c0_offset;
6720 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6721 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6724 if( IS_INTRA(mb_type) ) {
6725 int16_t bS4[4] = {4,4,4,4};
6726 int16_t bS3[4] = {3,3,3,3};
6727 if( IS_8x8DCT(mb_type) ) {
6728 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6729 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6730 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6731 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6733 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6734 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6735 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6736 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6737 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6738 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6739 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6740 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6742 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6743 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6744 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6745 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6746 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6747 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6748 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6749 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6752 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6753 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6755 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6757 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6759 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6760 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6761 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6762 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6764 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6765 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6766 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6767 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6769 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6770 bSv[0][0] = 0x0004000400040004ULL;
6771 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6772 bSv[1][0] = 0x0004000400040004ULL;
6774 #define FILTER(hv,dir,edge)\
6775 if(bSv[dir][edge]) {\
6776 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6778 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6779 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6785 } else if( IS_8x8DCT(mb_type) ) {
6804 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6805 MpegEncContext * const s = &h->s;
6806 const int mb_xy= mb_x + mb_y*s->mb_stride;
6807 const int mb_type = s->current_picture.mb_type[mb_xy];
6808 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6809 int first_vertical_edge_done = 0;
6811 /* FIXME: A given frame may occupy more than one position in
6812 * the reference list. So ref2frm should be populated with
6813 * frame numbers, not indices. */
6814 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6815 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6817 //for sufficiently low qp, filtering wouldn't do anything
6818 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6820 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6821 int qp = s->current_picture.qscale_table[mb_xy];
6823 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6824 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6830 // left mb is in picture
6831 && h->slice_table[mb_xy-1] != 255
6832 // and current and left pair do not have the same interlaced type
6833 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6834 // and left mb is in the same slice if deblocking_filter == 2
6835 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6836 /* First vertical edge is different in MBAFF frames
6837 * There are 8 different bS to compute and 2 different Qp
6839 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6840 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6845 int mb_qp, mbn0_qp, mbn1_qp;
6847 first_vertical_edge_done = 1;
6849 if( IS_INTRA(mb_type) )
6850 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6852 for( i = 0; i < 8; i++ ) {
6853 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6855 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6857 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6858 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6859 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6866 mb_qp = s->current_picture.qscale_table[mb_xy];
6867 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6868 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6869 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6870 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6871 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6872 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6873 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6874 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6875 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6876 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6877 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6878 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6881 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6882 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6883 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6884 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6885 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6887 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6888 for( dir = 0; dir < 2; dir++ )
6891 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6892 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6893 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6895 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6896 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6897 // how often to recheck mv-based bS when iterating between edges
6898 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6899 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6900 // how often to recheck mv-based bS when iterating along each edge
6901 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6903 if (first_vertical_edge_done) {
6905 first_vertical_edge_done = 0;
6908 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6911 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6912 && !IS_INTERLACED(mb_type)
6913 && IS_INTERLACED(mbm_type)
6915 // This is a special case in the norm where the filtering must
6916 // be done twice (one each of the field) even if we are in a
6917 // frame macroblock.
6919 static const int nnz_idx[4] = {4,5,6,3};
6920 unsigned int tmp_linesize = 2 * linesize;
6921 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6922 int mbn_xy = mb_xy - 2 * s->mb_stride;
6927 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6928 if( IS_INTRA(mb_type) ||
6929 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6930 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6932 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6933 for( i = 0; i < 4; i++ ) {
6934 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6935 mbn_nnz[nnz_idx[i]] != 0 )
6941 // Do not use s->qscale as luma quantizer because it has not the same
6942 // value in IPCM macroblocks.
6943 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6944 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6945 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6946 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6947 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6948 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6949 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6950 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6957 for( edge = start; edge < edges; edge++ ) {
6958 /* mbn_xy: neighbor macroblock */
6959 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6960 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6964 if( (edge&1) && IS_8x8DCT(mb_type) )
6967 if( IS_INTRA(mb_type) ||
6968 IS_INTRA(mbn_type) ) {
6971 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6972 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6981 bS[0] = bS[1] = bS[2] = bS[3] = value;
6986 if( edge & mask_edge ) {
6987 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6990 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6991 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6994 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6995 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6996 int bn_idx= b_idx - (dir ? 8:1);
6998 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6999 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7000 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7001 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7003 bS[0] = bS[1] = bS[2] = bS[3] = v;
7009 for( i = 0; i < 4; i++ ) {
7010 int x = dir == 0 ? edge : i;
7011 int y = dir == 0 ? i : edge;
7012 int b_idx= 8 + 4 + x + 8*y;
7013 int bn_idx= b_idx - (dir ? 8:1);
7015 if( h->non_zero_count_cache[b_idx] != 0 ||
7016 h->non_zero_count_cache[bn_idx] != 0 ) {
7022 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7023 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7024 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7025 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7033 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7038 // Do not use s->qscale as luma quantizer because it has not the same
7039 // value in IPCM macroblocks.
7040 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7041 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7042 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7043 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7045 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7046 if( (edge&1) == 0 ) {
7047 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
7048 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
7049 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
7050 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
7053 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7054 if( (edge&1) == 0 ) {
7055 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
7056 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
7057 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
7058 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
7065 static int decode_slice(H264Context *h){
7066 MpegEncContext * const s = &h->s;
7067 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7071 if( h->pps.cabac ) {
7075 align_get_bits( &s->gb );
7078 ff_init_cabac_states( &h->cabac);
7079 ff_init_cabac_decoder( &h->cabac,
7080 s->gb.buffer + get_bits_count(&s->gb)/8,
7081 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7082 /* calculate pre-state */
7083 for( i= 0; i < 460; i++ ) {
7085 if( h->slice_type == I_TYPE )
7086 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7088 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7091 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7093 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7098 int ret = decode_mb_cabac(h);
7100 //STOP_TIMER("decode_mb_cabac")
7102 if(ret>=0) hl_decode_mb(h);
7104 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7107 if(ret>=0) ret = decode_mb_cabac(h);
7109 if(ret>=0) hl_decode_mb(h);
7112 eos = get_cabac_terminate( &h->cabac );
7114 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7115 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7116 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7120 if( ++s->mb_x >= s->mb_width ) {
7122 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7129 if( eos || s->mb_y >= s->mb_height ) {
7130 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7131 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7138 int ret = decode_mb_cavlc(h);
7140 if(ret>=0) hl_decode_mb(h);
7142 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7144 ret = decode_mb_cavlc(h);
7146 if(ret>=0) hl_decode_mb(h);
7151 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7152 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7157 if(++s->mb_x >= s->mb_width){
7159 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7164 if(s->mb_y >= s->mb_height){
7165 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7167 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7168 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7172 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7179 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7180 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7181 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7182 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7186 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7195 for(;s->mb_y < s->mb_height; s->mb_y++){
7196 for(;s->mb_x < s->mb_width; s->mb_x++){
7197 int ret= decode_mb(h);
7202 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7203 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7208 if(++s->mb_x >= s->mb_width){
7210 if(++s->mb_y >= s->mb_height){
7211 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7212 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7216 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7223 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7224 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7225 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7229 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7236 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7239 return -1; //not reached
7242 static int decode_unregistered_user_data(H264Context *h, int size){
7243 MpegEncContext * const s = &h->s;
7244 uint8_t user_data[16+256];
7250 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7251 user_data[i]= get_bits(&s->gb, 8);
7255 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7256 if(e==1 && build>=0)
7257 h->x264_build= build;
7259 if(s->avctx->debug & FF_DEBUG_BUGS)
7260 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7263 skip_bits(&s->gb, 8);
7268 static int decode_sei(H264Context *h){
7269 MpegEncContext * const s = &h->s;
7271 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7276 type+= show_bits(&s->gb, 8);
7277 }while(get_bits(&s->gb, 8) == 255);
7281 size+= show_bits(&s->gb, 8);
7282 }while(get_bits(&s->gb, 8) == 255);
7286 if(decode_unregistered_user_data(h, size) < 0)
7290 skip_bits(&s->gb, 8*size);
7293 //FIXME check bits here
7294 align_get_bits(&s->gb);
7300 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7301 MpegEncContext * const s = &h->s;
7303 cpb_count = get_ue_golomb(&s->gb) + 1;
7304 get_bits(&s->gb, 4); /* bit_rate_scale */
7305 get_bits(&s->gb, 4); /* cpb_size_scale */
7306 for(i=0; i<cpb_count; i++){
7307 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7308 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7309 get_bits1(&s->gb); /* cbr_flag */
7311 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7312 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7313 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7314 get_bits(&s->gb, 5); /* time_offset_length */
7317 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7318 MpegEncContext * const s = &h->s;
7319 int aspect_ratio_info_present_flag;
7320 unsigned int aspect_ratio_idc;
7321 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7323 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7325 if( aspect_ratio_info_present_flag ) {
7326 aspect_ratio_idc= get_bits(&s->gb, 8);
7327 if( aspect_ratio_idc == EXTENDED_SAR ) {
7328 sps->sar.num= get_bits(&s->gb, 16);
7329 sps->sar.den= get_bits(&s->gb, 16);
7330 }else if(aspect_ratio_idc < 14){
7331 sps->sar= pixel_aspect[aspect_ratio_idc];
7333 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7340 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7342 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7343 get_bits1(&s->gb); /* overscan_appropriate_flag */
7346 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7347 get_bits(&s->gb, 3); /* video_format */
7348 get_bits1(&s->gb); /* video_full_range_flag */
7349 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7350 get_bits(&s->gb, 8); /* colour_primaries */
7351 get_bits(&s->gb, 8); /* transfer_characteristics */
7352 get_bits(&s->gb, 8); /* matrix_coefficients */
7356 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7357 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7358 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7361 sps->timing_info_present_flag = get_bits1(&s->gb);
7362 if(sps->timing_info_present_flag){
7363 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7364 sps->time_scale = get_bits_long(&s->gb, 32);
7365 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7368 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7369 if(nal_hrd_parameters_present_flag)
7370 decode_hrd_parameters(h, sps);
7371 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7372 if(vcl_hrd_parameters_present_flag)
7373 decode_hrd_parameters(h, sps);
7374 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7375 get_bits1(&s->gb); /* low_delay_hrd_flag */
7376 get_bits1(&s->gb); /* pic_struct_present_flag */
7378 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7379 if(sps->bitstream_restriction_flag){
7380 unsigned int num_reorder_frames;
7381 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7382 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7383 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7384 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7385 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7386 num_reorder_frames= get_ue_golomb(&s->gb);
7387 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7389 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7390 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7394 sps->num_reorder_frames= num_reorder_frames;
7400 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7401 const uint8_t *jvt_list, const uint8_t *fallback_list){
7402 MpegEncContext * const s = &h->s;
7403 int i, last = 8, next = 8;
7404 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7405 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7406 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7408 for(i=0;i<size;i++){
7410 next = (last + get_se_golomb(&s->gb)) & 0xff;
7411 if(!i && !next){ /* matrix not written, we use the preset one */
7412 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7415 last = factors[scan[i]] = next ? next : last;
7419 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7420 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7421 MpegEncContext * const s = &h->s;
7422 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7423 const uint8_t *fallback[4] = {
7424 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7425 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7426 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7427 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7429 if(get_bits1(&s->gb)){
7430 sps->scaling_matrix_present |= is_sps;
7431 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7432 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7433 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7434 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7435 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7436 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7437 if(is_sps || pps->transform_8x8_mode){
7438 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7439 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7441 } else if(fallback_sps) {
7442 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7443 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7448 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7451 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7452 const size_t size, const char *name)
7455 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7460 vec[id] = av_mallocz(size);
7462 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7467 static inline int decode_seq_parameter_set(H264Context *h){
7468 MpegEncContext * const s = &h->s;
7469 int profile_idc, level_idc;
7470 unsigned int sps_id, tmp, mb_width, mb_height;
7474 profile_idc= get_bits(&s->gb, 8);
7475 get_bits1(&s->gb); //constraint_set0_flag
7476 get_bits1(&s->gb); //constraint_set1_flag
7477 get_bits1(&s->gb); //constraint_set2_flag
7478 get_bits1(&s->gb); //constraint_set3_flag
7479 get_bits(&s->gb, 4); // reserved
7480 level_idc= get_bits(&s->gb, 8);
7481 sps_id= get_ue_golomb(&s->gb);
7483 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7487 sps->profile_idc= profile_idc;
7488 sps->level_idc= level_idc;
7490 if(sps->profile_idc >= 100){ //high profile
7491 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7492 get_bits1(&s->gb); //residual_color_transform_flag
7493 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7494 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7495 sps->transform_bypass = get_bits1(&s->gb);
7496 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7498 sps->scaling_matrix_present = 0;
7500 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7501 sps->poc_type= get_ue_golomb(&s->gb);
7503 if(sps->poc_type == 0){ //FIXME #define
7504 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7505 } else if(sps->poc_type == 1){//FIXME #define
7506 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7507 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7508 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7509 tmp= get_ue_golomb(&s->gb);
7511 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7512 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7515 sps->poc_cycle_length= tmp;
7517 for(i=0; i<sps->poc_cycle_length; i++)
7518 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7519 }else if(sps->poc_type != 2){
7520 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7524 tmp= get_ue_golomb(&s->gb);
7525 if(tmp > MAX_PICTURE_COUNT-2){
7526 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7528 sps->ref_frame_count= tmp;
7529 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7530 mb_width= get_ue_golomb(&s->gb) + 1;
7531 mb_height= get_ue_golomb(&s->gb) + 1;
7532 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7533 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7534 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7537 sps->mb_width = mb_width;
7538 sps->mb_height= mb_height;
7540 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7541 if(!sps->frame_mbs_only_flag)
7542 sps->mb_aff= get_bits1(&s->gb);
7546 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7548 #ifndef ALLOW_INTERLACE
7550 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7552 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7553 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7555 sps->crop= get_bits1(&s->gb);
7557 sps->crop_left = get_ue_golomb(&s->gb);
7558 sps->crop_right = get_ue_golomb(&s->gb);
7559 sps->crop_top = get_ue_golomb(&s->gb);
7560 sps->crop_bottom= get_ue_golomb(&s->gb);
7561 if(sps->crop_left || sps->crop_top){
7562 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7568 sps->crop_bottom= 0;
7571 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7572 if( sps->vui_parameters_present_flag )
7573 decode_vui_parameters(h, sps);
7575 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7576 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7577 sps_id, sps->profile_idc, sps->level_idc,
7579 sps->ref_frame_count,
7580 sps->mb_width, sps->mb_height,
7581 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7582 sps->direct_8x8_inference_flag ? "8B8" : "",
7583 sps->crop_left, sps->crop_right,
7584 sps->crop_top, sps->crop_bottom,
7585 sps->vui_parameters_present_flag ? "VUI" : ""
7592 build_qp_table(PPS *pps, int t, int index)
7595 for(i = 0; i < 255; i++)
7596 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7599 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7600 MpegEncContext * const s = &h->s;
7601 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7604 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7608 tmp= get_ue_golomb(&s->gb);
7609 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7610 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7615 pps->cabac= get_bits1(&s->gb);
7616 pps->pic_order_present= get_bits1(&s->gb);
7617 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7618 if(pps->slice_group_count > 1 ){
7619 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7620 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7621 switch(pps->mb_slice_group_map_type){
7624 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7625 | run_length[ i ] |1 |ue(v) |
7630 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7632 | top_left_mb[ i ] |1 |ue(v) |
7633 | bottom_right_mb[ i ] |1 |ue(v) |
7641 | slice_group_change_direction_flag |1 |u(1) |
7642 | slice_group_change_rate_minus1 |1 |ue(v) |
7647 | slice_group_id_cnt_minus1 |1 |ue(v) |
7648 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7650 | slice_group_id[ i ] |1 |u(v) |
7655 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7656 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7657 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7658 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7659 pps->ref_count[0]= pps->ref_count[1]= 1;
7663 pps->weighted_pred= get_bits1(&s->gb);
7664 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7665 pps->init_qp= get_se_golomb(&s->gb) + 26;
7666 pps->init_qs= get_se_golomb(&s->gb) + 26;
7667 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7668 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7669 pps->constrained_intra_pred= get_bits1(&s->gb);
7670 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7672 pps->transform_8x8_mode= 0;
7673 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7674 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7675 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7677 if(get_bits_count(&s->gb) < bit_length){
7678 pps->transform_8x8_mode= get_bits1(&s->gb);
7679 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7680 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7682 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7685 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7686 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7687 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7688 h->pps.chroma_qp_diff= 1;
7690 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7692 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7693 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7694 pps_id, pps->sps_id,
7695 pps->cabac ? "CABAC" : "CAVLC",
7696 pps->slice_group_count,
7697 pps->ref_count[0], pps->ref_count[1],
7698 pps->weighted_pred ? "weighted" : "",
7699 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7700 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7701 pps->constrained_intra_pred ? "CONSTR" : "",
7702 pps->redundant_pic_cnt_present ? "REDU" : "",
7703 pps->transform_8x8_mode ? "8x8DCT" : ""
7710 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7711 MpegEncContext * const s = &h->s;
7712 AVCodecContext * const avctx= s->avctx;
7716 for(i=0; i<50; i++){
7717 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7720 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7722 s->current_picture_ptr= NULL;
7733 if(buf_index >= buf_size) break;
7735 for(i = 0; i < h->nal_length_size; i++)
7736 nalsize = (nalsize << 8) | buf[buf_index++];
7737 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7742 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7747 // start code prefix search
7748 for(; buf_index + 3 < buf_size; buf_index++){
7749 // This should always succeed in the first iteration.
7750 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7754 if(buf_index+3 >= buf_size) break;
7759 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7760 if (ptr==NULL || dst_length < 0){
7763 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7765 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7767 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7768 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7771 if (h->is_avc && (nalsize != consumed))
7772 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7774 buf_index += consumed;
7776 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7777 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7780 switch(h->nal_unit_type){
7782 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7784 init_get_bits(&s->gb, ptr, bit_length);
7786 h->inter_gb_ptr= &s->gb;
7787 s->data_partitioning = 0;
7789 if(decode_slice_header(h) < 0){
7790 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7793 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
7794 if(h->redundant_pic_count==0 && s->hurry_up < 5
7795 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7796 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7797 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7798 && avctx->skip_frame < AVDISCARD_ALL)
7802 init_get_bits(&s->gb, ptr, bit_length);
7804 h->inter_gb_ptr= NULL;
7805 s->data_partitioning = 1;
7807 if(decode_slice_header(h) < 0){
7808 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7812 init_get_bits(&h->intra_gb, ptr, bit_length);
7813 h->intra_gb_ptr= &h->intra_gb;
7816 init_get_bits(&h->inter_gb, ptr, bit_length);
7817 h->inter_gb_ptr= &h->inter_gb;
7819 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7820 && s->context_initialized
7822 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7823 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7824 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7825 && avctx->skip_frame < AVDISCARD_ALL)
7829 init_get_bits(&s->gb, ptr, bit_length);
7833 init_get_bits(&s->gb, ptr, bit_length);
7834 decode_seq_parameter_set(h);
7836 if(s->flags& CODEC_FLAG_LOW_DELAY)
7839 if(avctx->has_b_frames < 2)
7840 avctx->has_b_frames= !s->low_delay;
7843 init_get_bits(&s->gb, ptr, bit_length);
7845 decode_picture_parameter_set(h, bit_length);
7849 case NAL_END_SEQUENCE:
7850 case NAL_END_STREAM:
7851 case NAL_FILLER_DATA:
7853 case NAL_AUXILIARY_SLICE:
7856 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7864 * returns the number of bytes consumed for building the current frame
7866 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7867 if(s->flags&CODEC_FLAG_TRUNCATED){
7868 pos -= s->parse_context.last_index;
7869 if(pos<0) pos=0; // FIXME remove (unneeded?)
7873 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7874 if(pos+10>buf_size) pos=buf_size; // oops ;)
7880 static int decode_frame(AVCodecContext *avctx,
7881 void *data, int *data_size,
7882 uint8_t *buf, int buf_size)
7884 H264Context *h = avctx->priv_data;
7885 MpegEncContext *s = &h->s;
7886 AVFrame *pict = data;
7889 s->flags= avctx->flags;
7890 s->flags2= avctx->flags2;
7892 /* no supplementary picture */
7893 if (buf_size == 0) {
7897 //FIXME factorize this with the output code below
7898 out = h->delayed_pic[0];
7900 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7901 if(h->delayed_pic[i]->poc < out->poc){
7902 out = h->delayed_pic[i];
7906 for(i=out_idx; h->delayed_pic[i]; i++)
7907 h->delayed_pic[i] = h->delayed_pic[i+1];
7910 *data_size = sizeof(AVFrame);
7911 *pict= *(AVFrame*)out;
7917 if(s->flags&CODEC_FLAG_TRUNCATED){
7918 int next= ff_h264_find_frame_end(h, buf, buf_size);
7920 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7922 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7925 if(h->is_avc && !h->got_avcC) {
7926 int i, cnt, nalsize;
7927 unsigned char *p = avctx->extradata;
7928 if(avctx->extradata_size < 7) {
7929 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7933 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7936 /* sps and pps in the avcC always have length coded with 2 bytes,
7937 so put a fake nal_length_size = 2 while parsing them */
7938 h->nal_length_size = 2;
7939 // Decode sps from avcC
7940 cnt = *(p+5) & 0x1f; // Number of sps
7942 for (i = 0; i < cnt; i++) {
7943 nalsize = AV_RB16(p) + 2;
7944 if(decode_nal_units(h, p, nalsize) < 0) {
7945 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7950 // Decode pps from avcC
7951 cnt = *(p++); // Number of pps
7952 for (i = 0; i < cnt; i++) {
7953 nalsize = AV_RB16(p) + 2;
7954 if(decode_nal_units(h, p, nalsize) != nalsize) {
7955 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7960 // Now store right nal length size, that will be use to parse all other nals
7961 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7962 // Do not reparse avcC
7966 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7967 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7971 buf_index=decode_nal_units(h, buf, buf_size);
7975 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7976 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7980 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7981 Picture *out = s->current_picture_ptr;
7982 Picture *cur = s->current_picture_ptr;
7983 Picture *prev = h->delayed_output_pic;
7984 int i, pics, cross_idr, out_of_order, out_idx;
7988 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7989 s->current_picture_ptr->pict_type= s->pict_type;
7991 h->prev_frame_num_offset= h->frame_num_offset;
7992 h->prev_frame_num= h->frame_num;
7993 if(s->current_picture_ptr->reference){
7994 h->prev_poc_msb= h->poc_msb;
7995 h->prev_poc_lsb= h->poc_lsb;
7997 if(s->current_picture_ptr->reference)
7998 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8004 //FIXME do something with unavailable reference frames
8006 #if 0 //decode order
8007 *data_size = sizeof(AVFrame);
8009 /* Sort B-frames into display order */
8011 if(h->sps.bitstream_restriction_flag
8012 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8013 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8018 while(h->delayed_pic[pics]) pics++;
8020 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
8022 h->delayed_pic[pics++] = cur;
8023 if(cur->reference == 0)
8027 for(i=0; h->delayed_pic[i]; i++)
8028 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8031 out = h->delayed_pic[0];
8033 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8034 if(h->delayed_pic[i]->poc < out->poc){
8035 out = h->delayed_pic[i];
8039 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8040 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8042 else if(prev && pics <= s->avctx->has_b_frames)
8044 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8046 ((!cross_idr && prev && out->poc > prev->poc + 2)
8047 || cur->pict_type == B_TYPE)))
8050 s->avctx->has_b_frames++;
8053 else if(out_of_order)
8056 if(out_of_order || pics > s->avctx->has_b_frames){
8057 for(i=out_idx; h->delayed_pic[i]; i++)
8058 h->delayed_pic[i] = h->delayed_pic[i+1];
8064 *data_size = sizeof(AVFrame);
8065 if(prev && prev != out && prev->reference == 1)
8066 prev->reference = 0;
8067 h->delayed_output_pic = out;
8071 *pict= *(AVFrame*)out;
8073 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8076 assert(pict->data[0] || !*data_size);
8077 ff_print_debug_info(s, pict);
8078 //printf("out %d\n", (int)pict->data[0]);
8081 /* Return the Picture timestamp as the frame number */
8082 /* we substract 1 because it is added on utils.c */
8083 avctx->frame_number = s->picture_number - 1;
8085 return get_consumed_bytes(s, buf_index, buf_size);
8088 static inline void fill_mb_avail(H264Context *h){
8089 MpegEncContext * const s = &h->s;
8090 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8093 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8094 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8095 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8101 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8102 h->mb_avail[4]= 1; //FIXME move out
8103 h->mb_avail[5]= 0; //FIXME move out
8110 #define SIZE (COUNT*40)
8116 // int int_temp[10000];
8118 AVCodecContext avctx;
8120 dsputil_init(&dsp, &avctx);
8122 init_put_bits(&pb, temp, SIZE);
8123 printf("testing unsigned exp golomb\n");
8124 for(i=0; i<COUNT; i++){
8126 set_ue_golomb(&pb, i);
8127 STOP_TIMER("set_ue_golomb");
8129 flush_put_bits(&pb);
8131 init_get_bits(&gb, temp, 8*SIZE);
8132 for(i=0; i<COUNT; i++){
8135 s= show_bits(&gb, 24);
8138 j= get_ue_golomb(&gb);
8140 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8143 STOP_TIMER("get_ue_golomb");
8147 init_put_bits(&pb, temp, SIZE);
8148 printf("testing signed exp golomb\n");
8149 for(i=0; i<COUNT; i++){
8151 set_se_golomb(&pb, i - COUNT/2);
8152 STOP_TIMER("set_se_golomb");
8154 flush_put_bits(&pb);
8156 init_get_bits(&gb, temp, 8*SIZE);
8157 for(i=0; i<COUNT; i++){
8160 s= show_bits(&gb, 24);
8163 j= get_se_golomb(&gb);
8164 if(j != i - COUNT/2){
8165 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8168 STOP_TIMER("get_se_golomb");
8171 printf("testing 4x4 (I)DCT\n");
8174 uint8_t src[16], ref[16];
8175 uint64_t error= 0, max_error=0;
8177 for(i=0; i<COUNT; i++){
8179 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8180 for(j=0; j<16; j++){
8181 ref[j]= random()%255;
8182 src[j]= random()%255;
8185 h264_diff_dct_c(block, src, ref, 4);
8188 for(j=0; j<16; j++){
8189 // printf("%d ", block[j]);
8190 block[j]= block[j]*4;
8191 if(j&1) block[j]= (block[j]*4 + 2)/5;
8192 if(j&4) block[j]= (block[j]*4 + 2)/5;
8196 s->dsp.h264_idct_add(ref, block, 4);
8197 /* for(j=0; j<16; j++){
8198 printf("%d ", ref[j]);
8202 for(j=0; j<16; j++){
8203 int diff= FFABS(src[j] - ref[j]);
8206 max_error= FFMAX(max_error, diff);
8209 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8211 printf("testing quantizer\n");
8212 for(qp=0; qp<52; qp++){
8214 src1_block[i]= src2_block[i]= random()%255;
8218 printf("Testing NAL layer\n");
8220 uint8_t bitstream[COUNT];
8221 uint8_t nal[COUNT*2];
8223 memset(&h, 0, sizeof(H264Context));
8225 for(i=0; i<COUNT; i++){
8233 for(j=0; j<COUNT; j++){
8234 bitstream[j]= (random() % 255) + 1;
8237 for(j=0; j<zeros; j++){
8238 int pos= random() % COUNT;
8239 while(bitstream[pos] == 0){
8248 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8250 printf("encoding failed\n");
8254 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8258 if(out_length != COUNT){
8259 printf("incorrect length %d %d\n", out_length, COUNT);
8263 if(consumed != nal_length){
8264 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8268 if(memcmp(bitstream, out, COUNT)){
8269 printf("mismatch\n");
8274 printf("Testing RBSP\n");
8282 static int decode_end(AVCodecContext *avctx)
8284 H264Context *h = avctx->priv_data;
8285 MpegEncContext *s = &h->s;
8287 av_freep(&h->rbsp_buffer[0]);
8288 av_freep(&h->rbsp_buffer[1]);
8289 free_tables(h); //FIXME cleanup init stuff perhaps
8292 // memset(h, 0, sizeof(H264Context));
8298 AVCodec h264_decoder = {
8302 sizeof(H264Context),
8307 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,