2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
41 static VLC coeff_token_vlc[4];
42 static VLC chroma_dc_coeff_token_vlc;
44 static VLC total_zeros_vlc[15];
45 static VLC chroma_dc_total_zeros_vlc[3];
47 static VLC run_vlc[6];
50 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
51 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
52 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
53 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
55 static av_always_inline uint32_t pack16to32(int a, int b){
56 #ifdef WORDS_BIGENDIAN
57 return (b&0xFFFF) + (a<<16);
59 return (a&0xFFFF) + (b<<16);
63 const uint8_t ff_rem6[52]={
64 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
67 const uint8_t ff_div6[52]={
68 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
74 * @param h height of the rectangle, should be a constant
75 * @param w width of the rectangle, should be a constant
76 * @param size the size of val (1 or 4), should be a constant
78 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
79 uint8_t *p= (uint8_t*)vp;
80 assert(size==1 || size==4);
86 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
87 assert((stride&(w-1))==0);
89 const uint16_t v= size==4 ? val : val*0x0101;
90 *(uint16_t*)(p + 0*stride)= v;
92 *(uint16_t*)(p + 1*stride)= v;
94 *(uint16_t*)(p + 2*stride)=
95 *(uint16_t*)(p + 3*stride)= v;
97 const uint32_t v= size==4 ? val : val*0x01010101;
98 *(uint32_t*)(p + 0*stride)= v;
100 *(uint32_t*)(p + 1*stride)= v;
102 *(uint32_t*)(p + 2*stride)=
103 *(uint32_t*)(p + 3*stride)= v;
105 //gcc can't optimize 64bit math on x86_32
106 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
107 const uint64_t v= val*0x0100000001ULL;
108 *(uint64_t*)(p + 0*stride)= v;
110 *(uint64_t*)(p + 1*stride)= v;
112 *(uint64_t*)(p + 2*stride)=
113 *(uint64_t*)(p + 3*stride)= v;
115 const uint64_t v= val*0x0100000001ULL;
116 *(uint64_t*)(p + 0+0*stride)=
117 *(uint64_t*)(p + 8+0*stride)=
118 *(uint64_t*)(p + 0+1*stride)=
119 *(uint64_t*)(p + 8+1*stride)= v;
121 *(uint64_t*)(p + 0+2*stride)=
122 *(uint64_t*)(p + 8+2*stride)=
123 *(uint64_t*)(p + 0+3*stride)=
124 *(uint64_t*)(p + 8+3*stride)= v;
126 *(uint32_t*)(p + 0+0*stride)=
127 *(uint32_t*)(p + 4+0*stride)= val;
129 *(uint32_t*)(p + 0+1*stride)=
130 *(uint32_t*)(p + 4+1*stride)= val;
132 *(uint32_t*)(p + 0+2*stride)=
133 *(uint32_t*)(p + 4+2*stride)=
134 *(uint32_t*)(p + 0+3*stride)=
135 *(uint32_t*)(p + 4+3*stride)= val;
137 *(uint32_t*)(p + 0+0*stride)=
138 *(uint32_t*)(p + 4+0*stride)=
139 *(uint32_t*)(p + 8+0*stride)=
140 *(uint32_t*)(p +12+0*stride)=
141 *(uint32_t*)(p + 0+1*stride)=
142 *(uint32_t*)(p + 4+1*stride)=
143 *(uint32_t*)(p + 8+1*stride)=
144 *(uint32_t*)(p +12+1*stride)= val;
146 *(uint32_t*)(p + 0+2*stride)=
147 *(uint32_t*)(p + 4+2*stride)=
148 *(uint32_t*)(p + 8+2*stride)=
149 *(uint32_t*)(p +12+2*stride)=
150 *(uint32_t*)(p + 0+3*stride)=
151 *(uint32_t*)(p + 4+3*stride)=
152 *(uint32_t*)(p + 8+3*stride)=
153 *(uint32_t*)(p +12+3*stride)= val;
160 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
161 MpegEncContext * const s = &h->s;
162 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
163 int topleft_xy, top_xy, topright_xy, left_xy[2];
164 int topleft_type, top_type, topright_type, left_type[2];
168 //FIXME deblocking could skip the intra and nnz parts.
169 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
172 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
174 top_xy = mb_xy - s->mb_stride;
175 topleft_xy = top_xy - 1;
176 topright_xy= top_xy + 1;
177 left_xy[1] = left_xy[0] = mb_xy-1;
187 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
188 const int top_pair_xy = pair_xy - s->mb_stride;
189 const int topleft_pair_xy = top_pair_xy - 1;
190 const int topright_pair_xy = top_pair_xy + 1;
191 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
192 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
193 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
194 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
195 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
196 const int bottom = (s->mb_y & 1);
197 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
199 ? !curr_mb_frame_flag // bottom macroblock
200 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
202 top_xy -= s->mb_stride;
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
208 topleft_xy -= s->mb_stride;
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
214 topright_xy -= s->mb_stride;
216 if (left_mb_frame_flag != curr_mb_frame_flag) {
217 left_xy[1] = left_xy[0] = pair_xy - 1;
218 if (curr_mb_frame_flag) {
239 left_xy[1] += s->mb_stride;
252 h->top_mb_xy = top_xy;
253 h->left_mb_xy[0] = left_xy[0];
254 h->left_mb_xy[1] = left_xy[1];
258 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
259 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
260 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
262 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
264 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
266 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
267 for(list=0; list<h->list_count; list++){
268 if(USES_LIST(mb_type,list)){
269 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
270 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
271 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
272 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
278 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
279 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
281 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
282 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
284 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
285 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
290 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
291 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
292 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
293 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
294 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
297 if(IS_INTRA(mb_type)){
298 h->topleft_samples_available=
299 h->top_samples_available=
300 h->left_samples_available= 0xFFFF;
301 h->topright_samples_available= 0xEEEA;
303 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
304 h->topleft_samples_available= 0xB3FF;
305 h->top_samples_available= 0x33FF;
306 h->topright_samples_available= 0x26EA;
309 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available&= 0xDF5F;
311 h->left_samples_available&= 0x5F5F;
315 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
316 h->topleft_samples_available&= 0x7FFF;
318 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
319 h->topright_samples_available&= 0xFBFF;
321 if(IS_INTRA4x4(mb_type)){
322 if(IS_INTRA4x4(top_type)){
323 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
324 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
325 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
326 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
329 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
334 h->intra4x4_pred_mode_cache[4+8*0]=
335 h->intra4x4_pred_mode_cache[5+8*0]=
336 h->intra4x4_pred_mode_cache[6+8*0]=
337 h->intra4x4_pred_mode_cache[7+8*0]= pred;
340 if(IS_INTRA4x4(left_type[i])){
341 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
342 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
345 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
350 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
351 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
366 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
368 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
369 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
370 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
371 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
373 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
374 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
376 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
377 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
380 h->non_zero_count_cache[4+8*0]=
381 h->non_zero_count_cache[5+8*0]=
382 h->non_zero_count_cache[6+8*0]=
383 h->non_zero_count_cache[7+8*0]=
385 h->non_zero_count_cache[1+8*0]=
386 h->non_zero_count_cache[2+8*0]=
388 h->non_zero_count_cache[1+8*3]=
389 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
393 for (i=0; i<2; i++) {
395 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
396 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
397 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
398 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
400 h->non_zero_count_cache[3+8*1 + 2*8*i]=
401 h->non_zero_count_cache[3+8*2 + 2*8*i]=
402 h->non_zero_count_cache[0+8*1 + 8*i]=
403 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
410 h->top_cbp = h->cbp_table[top_xy];
411 } else if(IS_INTRA(mb_type)) {
418 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
419 } else if(IS_INTRA(mb_type)) {
425 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
428 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
433 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
435 for(list=0; list<h->list_count; list++){
436 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
437 /*if(!h->mv_cache_clean[list]){
438 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
439 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
440 h->mv_cache_clean[list]= 1;
444 h->mv_cache_clean[list]= 0;
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
449 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
450 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
451 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
452 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
453 h->ref_cache[list][scan8[0] + 0 - 1*8]=
454 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
455 h->ref_cache[list][scan8[0] + 2 - 1*8]=
456 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
458 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
459 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
460 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
461 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
462 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
466 int cache_idx = scan8[0] - 1 + i*2*8;
467 if(USES_LIST(left_type[i], list)){
468 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
469 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
470 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
471 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
472 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
473 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
475 *(uint32_t*)h->mv_cache [list][cache_idx ]=
476 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
477 h->ref_cache[list][cache_idx ]=
478 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
482 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
485 if(USES_LIST(topleft_type, list)){
486 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
487 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
488 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
489 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
491 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
492 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
495 if(USES_LIST(topright_type, list)){
496 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
497 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
498 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
499 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
501 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
502 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
505 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
508 h->ref_cache[list][scan8[5 ]+1] =
509 h->ref_cache[list][scan8[7 ]+1] =
510 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
511 h->ref_cache[list][scan8[4 ]] =
512 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
513 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
514 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
515 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
516 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
517 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
520 /* XXX beurk, Load mvd */
521 if(USES_LIST(top_type, list)){
522 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
523 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
524 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
525 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
526 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
528 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
529 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
530 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
531 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
533 if(USES_LIST(left_type[0], list)){
534 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
535 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
536 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
538 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
539 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
541 if(USES_LIST(left_type[1], list)){
542 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
543 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
544 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
546 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
547 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
549 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
550 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
551 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
552 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
553 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
555 if(h->slice_type == B_TYPE){
556 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
558 if(IS_DIRECT(top_type)){
559 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
560 }else if(IS_8X8(top_type)){
561 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
562 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
563 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
568 if(IS_DIRECT(left_type[0]))
569 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
570 else if(IS_8X8(left_type[0]))
571 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
573 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
575 if(IS_DIRECT(left_type[1]))
576 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
577 else if(IS_8X8(left_type[1]))
578 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
580 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
586 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
587 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
588 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
589 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
590 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
591 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
592 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
593 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
594 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
595 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
597 #define MAP_F2F(idx, mb_type)\
598 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
599 h->ref_cache[list][idx] <<= 1;\
600 h->mv_cache[list][idx][1] /= 2;\
601 h->mvd_cache[list][idx][1] /= 2;\
606 #define MAP_F2F(idx, mb_type)\
607 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
608 h->ref_cache[list][idx] >>= 1;\
609 h->mv_cache[list][idx][1] <<= 1;\
610 h->mvd_cache[list][idx][1] <<= 1;\
620 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
623 static inline void write_back_intra_pred_mode(H264Context *h){
624 MpegEncContext * const s = &h->s;
625 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
627 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
628 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
629 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
630 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
631 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
632 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
633 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
637 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
639 static inline int check_intra4x4_pred_mode(H264Context *h){
640 MpegEncContext * const s = &h->s;
641 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
642 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
645 if(!(h->top_samples_available&0x8000)){
647 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
649 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
652 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
657 if(!(h->left_samples_available&0x8000)){
659 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
661 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
664 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
670 } //FIXME cleanup like next
673 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
675 static inline int check_intra_pred_mode(H264Context *h, int mode){
676 MpegEncContext * const s = &h->s;
677 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
678 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
681 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
685 if(!(h->top_samples_available&0x8000)){
688 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
693 if(!(h->left_samples_available&0x8000)){
696 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
705 * gets the predicted intra4x4 prediction mode.
707 static inline int pred_intra_mode(H264Context *h, int n){
708 const int index8= scan8[n];
709 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
710 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
711 const int min= FFMIN(left, top);
713 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
715 if(min<0) return DC_PRED;
719 static inline void write_back_non_zero_count(H264Context *h){
720 MpegEncContext * const s = &h->s;
721 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
723 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
724 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
725 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
726 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
727 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
728 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
729 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
731 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
732 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
733 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
735 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
736 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
737 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
740 // store all luma nnzs, for deblocking
743 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
744 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
749 * gets the predicted number of non zero coefficients.
750 * @param n block index
752 static inline int pred_non_zero_count(H264Context *h, int n){
753 const int index8= scan8[n];
754 const int left= h->non_zero_count_cache[index8 - 1];
755 const int top = h->non_zero_count_cache[index8 - 8];
758 if(i<64) i= (i+1)>>1;
760 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
765 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
766 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
767 MpegEncContext *s = &h->s;
769 /* there is no consistent mapping of mvs to neighboring locations that will
770 * make mbaff happy, so we can't move all this logic to fill_caches */
772 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
774 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
775 *C = h->mv_cache[list][scan8[0]-2];
778 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
779 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
780 if(IS_INTERLACED(mb_types[topright_xy])){
781 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
782 const int x4 = X4, y4 = Y4;\
783 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
784 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
785 return LIST_NOT_USED;\
786 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
787 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
788 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
789 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
791 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
794 if(topright_ref == PART_NOT_AVAILABLE
795 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
796 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
798 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
799 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
802 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
804 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
805 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
811 if(topright_ref != PART_NOT_AVAILABLE){
812 *C= h->mv_cache[list][ i - 8 + part_width ];
815 tprintf(s->avctx, "topright MV not available\n");
817 *C= h->mv_cache[list][ i - 8 - 1 ];
818 return h->ref_cache[list][ i - 8 - 1 ];
823 * gets the predicted MV.
824 * @param n the block index
825 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
826 * @param mx the x component of the predicted motion vector
827 * @param my the y component of the predicted motion vector
829 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
830 const int index8= scan8[n];
831 const int top_ref= h->ref_cache[list][ index8 - 8 ];
832 const int left_ref= h->ref_cache[list][ index8 - 1 ];
833 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
834 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
836 int diagonal_ref, match_count;
838 assert(part_width==1 || part_width==2 || part_width==4);
848 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
849 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
850 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
851 if(match_count > 1){ //most common
852 *mx= mid_pred(A[0], B[0], C[0]);
853 *my= mid_pred(A[1], B[1], C[1]);
854 }else if(match_count==1){
858 }else if(top_ref==ref){
866 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
870 *mx= mid_pred(A[0], B[0], C[0]);
871 *my= mid_pred(A[1], B[1], C[1]);
875 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
879 * gets the directionally predicted 16x8 MV.
880 * @param n the block index
881 * @param mx the x component of the predicted motion vector
882 * @param my the y component of the predicted motion vector
884 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
886 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
887 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
889 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
897 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
898 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
900 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
910 pred_motion(h, n, 4, list, ref, mx, my);
914 * gets the directionally predicted 8x16 MV.
915 * @param n the block index
916 * @param mx the x component of the predicted motion vector
917 * @param my the y component of the predicted motion vector
919 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
921 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
922 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
924 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
935 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
937 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
939 if(diagonal_ref == ref){
947 pred_motion(h, n, 2, list, ref, mx, my);
950 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
951 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
952 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
954 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
956 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
957 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
958 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
964 pred_motion(h, 0, 4, 0, 0, mx, my);
969 static inline void direct_dist_scale_factor(H264Context * const h){
970 const int poc = h->s.current_picture_ptr->poc;
971 const int poc1 = h->ref_list[1][0].poc;
973 for(i=0; i<h->ref_count[0]; i++){
974 int poc0 = h->ref_list[0][i].poc;
975 int td = av_clip(poc1 - poc0, -128, 127);
976 if(td == 0 /* FIXME || pic0 is a long-term ref */){
977 h->dist_scale_factor[i] = 256;
979 int tb = av_clip(poc - poc0, -128, 127);
980 int tx = (16384 + (FFABS(td) >> 1)) / td;
981 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
985 for(i=0; i<h->ref_count[0]; i++){
986 h->dist_scale_factor_field[2*i] =
987 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
991 static inline void direct_ref_list_init(H264Context * const h){
992 MpegEncContext * const s = &h->s;
993 Picture * const ref1 = &h->ref_list[1][0];
994 Picture * const cur = s->current_picture_ptr;
996 if(cur->pict_type == I_TYPE)
997 cur->ref_count[0] = 0;
998 if(cur->pict_type != B_TYPE)
999 cur->ref_count[1] = 0;
1000 for(list=0; list<2; list++){
1001 cur->ref_count[list] = h->ref_count[list];
1002 for(j=0; j<h->ref_count[list]; j++)
1003 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1005 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1007 for(list=0; list<2; list++){
1008 for(i=0; i<ref1->ref_count[list]; i++){
1009 const int poc = ref1->ref_poc[list][i];
1010 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1011 for(j=0; j<h->ref_count[list]; j++)
1012 if(h->ref_list[list][j].poc == poc){
1013 h->map_col_to_list0[list][i] = j;
1019 for(list=0; list<2; list++){
1020 for(i=0; i<ref1->ref_count[list]; i++){
1021 j = h->map_col_to_list0[list][i];
1022 h->map_col_to_list0_field[list][2*i] = 2*j;
1023 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1029 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1030 MpegEncContext * const s = &h->s;
1031 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1032 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1033 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1034 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1035 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1036 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1037 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1038 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1039 const int is_b8x8 = IS_8X8(*mb_type);
1040 unsigned int sub_mb_type;
1043 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1044 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1045 /* FIXME save sub mb types from previous frames (or derive from MVs)
1046 * so we know exactly what block size to use */
1047 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1048 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1049 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1050 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1051 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1053 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1057 *mb_type |= MB_TYPE_DIRECT2;
1059 *mb_type |= MB_TYPE_INTERLACED;
1061 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1063 if(h->direct_spatial_mv_pred){
1068 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1070 /* ref = min(neighbors) */
1071 for(list=0; list<2; list++){
1072 int refa = h->ref_cache[list][scan8[0] - 1];
1073 int refb = h->ref_cache[list][scan8[0] - 8];
1074 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1076 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1078 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1080 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1086 if(ref[0] < 0 && ref[1] < 0){
1087 ref[0] = ref[1] = 0;
1088 mv[0][0] = mv[0][1] =
1089 mv[1][0] = mv[1][1] = 0;
1091 for(list=0; list<2; list++){
1093 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1095 mv[list][0] = mv[list][1] = 0;
1100 *mb_type &= ~MB_TYPE_P0L1;
1101 sub_mb_type &= ~MB_TYPE_P0L1;
1102 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_P0L0;
1104 sub_mb_type &= ~MB_TYPE_P0L0;
1107 if(IS_16X16(*mb_type)){
1110 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1111 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1112 if(!IS_INTRA(mb_type_col)
1113 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1114 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1115 && (h->x264_build>33 || !h->x264_build)))){
1117 a= pack16to32(mv[0][0],mv[0][1]);
1119 b= pack16to32(mv[1][0],mv[1][1]);
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 b= pack16to32(mv[1][0],mv[1][1]);
1124 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1127 for(i8=0; i8<4; i8++){
1128 const int x8 = i8&1;
1129 const int y8 = i8>>1;
1131 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1133 h->sub_mb_type[i8] = sub_mb_type;
1135 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1137 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1138 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1141 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1142 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1143 && (h->x264_build>33 || !h->x264_build)))){
1144 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1145 if(IS_SUB_8X8(sub_mb_type)){
1146 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1147 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1149 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1151 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1154 for(i4=0; i4<4; i4++){
1155 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1156 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1158 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1160 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1166 }else{ /* direct temporal mv pred */
1167 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1168 const int *dist_scale_factor = h->dist_scale_factor;
1171 if(IS_INTERLACED(*mb_type)){
1172 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1173 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1174 dist_scale_factor = h->dist_scale_factor_field;
1176 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1177 /* FIXME assumes direct_8x8_inference == 1 */
1178 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1179 int mb_types_col[2];
1182 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1183 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1184 | (*mb_type & MB_TYPE_INTERLACED);
1185 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1187 if(IS_INTERLACED(*mb_type)){
1188 /* frame to field scaling */
1189 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1190 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1192 l1ref0 -= 2*h->b8_stride;
1193 l1ref1 -= 2*h->b8_stride;
1194 l1mv0 -= 4*h->b_stride;
1195 l1mv1 -= 4*h->b_stride;
1199 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1200 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1202 *mb_type |= MB_TYPE_16x8;
1204 *mb_type |= MB_TYPE_8x8;
1206 /* field to frame scaling */
1207 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1208 * but in MBAFF, top and bottom POC are equal */
1209 int dy = (s->mb_y&1) ? 1 : 2;
1211 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1212 l1ref0 += dy*h->b8_stride;
1213 l1ref1 += dy*h->b8_stride;
1214 l1mv0 += 2*dy*h->b_stride;
1215 l1mv1 += 2*dy*h->b_stride;
1218 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1220 *mb_type |= MB_TYPE_16x16;
1222 *mb_type |= MB_TYPE_8x8;
1225 for(i8=0; i8<4; i8++){
1226 const int x8 = i8&1;
1227 const int y8 = i8>>1;
1229 const int16_t (*l1mv)[2]= l1mv0;
1231 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1233 h->sub_mb_type[i8] = sub_mb_type;
1235 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1236 if(IS_INTRA(mb_types_col[y8])){
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1238 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1239 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1243 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1245 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1247 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1250 scale = dist_scale_factor[ref0];
1251 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1254 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1255 int my_col = (mv_col[1]<<y_shift)/2;
1256 int mx = (scale * mv_col[0] + 128) >> 8;
1257 int my = (scale * my_col + 128) >> 8;
1258 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1259 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1266 /* one-to-one mv scaling */
1268 if(IS_16X16(*mb_type)){
1271 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1272 if(IS_INTRA(mb_type_col)){
1275 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1276 : map_col_to_list0[1][l1ref1[0]];
1277 const int scale = dist_scale_factor[ref0];
1278 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1280 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1281 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1283 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1284 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1286 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1290 for(i8=0; i8<4; i8++){
1291 const int x8 = i8&1;
1292 const int y8 = i8>>1;
1294 const int16_t (*l1mv)[2]= l1mv0;
1296 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1298 h->sub_mb_type[i8] = sub_mb_type;
1299 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1300 if(IS_INTRA(mb_type_col)){
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1302 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1303 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1307 ref0 = l1ref0[x8 + y8*h->b8_stride];
1309 ref0 = map_col_to_list0[0][ref0];
1311 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1314 scale = dist_scale_factor[ref0];
1316 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1317 if(IS_SUB_8X8(sub_mb_type)){
1318 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1319 int mx = (scale * mv_col[0] + 128) >> 8;
1320 int my = (scale * mv_col[1] + 128) >> 8;
1321 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1322 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1324 for(i4=0; i4<4; i4++){
1325 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1326 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1327 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1328 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1329 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1330 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1337 static inline void write_back_motion(H264Context *h, int mb_type){
1338 MpegEncContext * const s = &h->s;
1339 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1340 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1343 if(!USES_LIST(mb_type, 0))
1344 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1346 for(list=0; list<h->list_count; list++){
1348 if(!USES_LIST(mb_type, list))
1352 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1353 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1355 if( h->pps.cabac ) {
1356 if(IS_SKIP(mb_type))
1357 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1360 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1361 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1366 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1367 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1368 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1369 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1370 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1374 if(h->slice_type == B_TYPE && h->pps.cabac){
1375 if(IS_8X8(mb_type)){
1376 uint8_t *direct_table = &h->direct_table[b8_xy];
1377 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1378 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1379 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1385 * Decodes a network abstraction layer unit.
1386 * @param consumed is the number of bytes used as input
1387 * @param length is the length of the array
1388 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1389 * @returns decoded bytes, might be src+1 if no escapes
1391 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1396 // src[0]&0x80; //forbidden bit
1397 h->nal_ref_idc= src[0]>>5;
1398 h->nal_unit_type= src[0]&0x1F;
1402 for(i=0; i<length; i++)
1403 printf("%2X ", src[i]);
1405 for(i=0; i+1<length; i+=2){
1406 if(src[i]) continue;
1407 if(i>0 && src[i-1]==0) i--;
1408 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1410 /* startcode, so we must be past the end */
1417 if(i>=length-1){ //no escaped 0
1418 *dst_length= length;
1419 *consumed= length+1; //+1 for the header
1423 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1424 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1425 dst= h->rbsp_buffer[bufidx];
1431 //printf("decoding esc\n");
1434 //remove escapes (very rare 1:2^22)
1435 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1436 if(src[si+2]==3){ //escape
1441 }else //next start code
1445 dst[di++]= src[si++];
1449 *consumed= si + 1;//+1 for the header
1450 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 * identifies the exact end of the bitstream
1456 * @return the length of the trailing, or 0 if damaged
1458 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1462 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1472 * idct tranforms the 16 dc values and dequantize them.
1473 * @param qp quantization parameter
1475 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1478 int temp[16]; //FIXME check if this is a good idea
1479 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1480 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1482 //memset(block, 64, 2*256);
1485 const int offset= y_offset[i];
1486 const int z0= block[offset+stride*0] + block[offset+stride*4];
1487 const int z1= block[offset+stride*0] - block[offset+stride*4];
1488 const int z2= block[offset+stride*1] - block[offset+stride*5];
1489 const int z3= block[offset+stride*1] + block[offset+stride*5];
1498 const int offset= x_offset[i];
1499 const int z0= temp[4*0+i] + temp[4*2+i];
1500 const int z1= temp[4*0+i] - temp[4*2+i];
1501 const int z2= temp[4*1+i] - temp[4*3+i];
1502 const int z3= temp[4*1+i] + temp[4*3+i];
1504 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1505 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1506 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1507 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1513 * dct tranforms the 16 dc values.
1514 * @param qp quantization parameter ??? FIXME
1516 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1517 // const int qmul= dequant_coeff[qp][0];
1519 int temp[16]; //FIXME check if this is a good idea
1520 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1521 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1524 const int offset= y_offset[i];
1525 const int z0= block[offset+stride*0] + block[offset+stride*4];
1526 const int z1= block[offset+stride*0] - block[offset+stride*4];
1527 const int z2= block[offset+stride*1] - block[offset+stride*5];
1528 const int z3= block[offset+stride*1] + block[offset+stride*5];
1537 const int offset= x_offset[i];
1538 const int z0= temp[4*0+i] + temp[4*2+i];
1539 const int z1= temp[4*0+i] - temp[4*2+i];
1540 const int z2= temp[4*1+i] - temp[4*3+i];
1541 const int z3= temp[4*1+i] + temp[4*3+i];
1543 block[stride*0 +offset]= (z0 + z3)>>1;
1544 block[stride*2 +offset]= (z1 + z2)>>1;
1545 block[stride*8 +offset]= (z1 - z2)>>1;
1546 block[stride*10+offset]= (z0 - z3)>>1;
1554 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1555 const int stride= 16*2;
1556 const int xStride= 16;
1559 a= block[stride*0 + xStride*0];
1560 b= block[stride*0 + xStride*1];
1561 c= block[stride*1 + xStride*0];
1562 d= block[stride*1 + xStride*1];
1569 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1570 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1571 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1572 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1576 static void chroma_dc_dct_c(DCTELEM *block){
1577 const int stride= 16*2;
1578 const int xStride= 16;
1581 a= block[stride*0 + xStride*0];
1582 b= block[stride*0 + xStride*1];
1583 c= block[stride*1 + xStride*0];
1584 d= block[stride*1 + xStride*1];
1591 block[stride*0 + xStride*0]= (a+c);
1592 block[stride*0 + xStride*1]= (e+b);
1593 block[stride*1 + xStride*0]= (a-c);
1594 block[stride*1 + xStride*1]= (e-b);
1599 * gets the chroma qp.
1601 static inline int get_chroma_qp(H264Context *h, int qscale){
1602 return h->pps.chroma_qp_table[qscale & 0xff];
1605 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1606 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1607 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1609 const int * const quant_table= quant_coeff[qscale];
1610 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1611 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1612 const unsigned int threshold2= (threshold1<<1);
1618 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1619 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1620 const unsigned int dc_threshold2= (dc_threshold1<<1);
1622 int level= block[0]*quant_coeff[qscale+18][0];
1623 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1625 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1628 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1631 // last_non_zero = i;
1636 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1637 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1638 const unsigned int dc_threshold2= (dc_threshold1<<1);
1640 int level= block[0]*quant_table[0];
1641 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1643 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1646 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1649 // last_non_zero = i;
1662 const int j= scantable[i];
1663 int level= block[j]*quant_table[j];
1665 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1666 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1667 if(((unsigned)(level+threshold1))>threshold2){
1669 level= (bias + level)>>QUANT_SHIFT;
1672 level= (bias - level)>>QUANT_SHIFT;
1681 return last_non_zero;
1684 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1685 const uint32_t a= ((uint32_t*)(src-stride))[0];
1686 ((uint32_t*)(src+0*stride))[0]= a;
1687 ((uint32_t*)(src+1*stride))[0]= a;
1688 ((uint32_t*)(src+2*stride))[0]= a;
1689 ((uint32_t*)(src+3*stride))[0]= a;
1692 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1693 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1694 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1695 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1696 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1699 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1700 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1701 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1703 ((uint32_t*)(src+0*stride))[0]=
1704 ((uint32_t*)(src+1*stride))[0]=
1705 ((uint32_t*)(src+2*stride))[0]=
1706 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1709 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1710 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1712 ((uint32_t*)(src+0*stride))[0]=
1713 ((uint32_t*)(src+1*stride))[0]=
1714 ((uint32_t*)(src+2*stride))[0]=
1715 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1718 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1719 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1721 ((uint32_t*)(src+0*stride))[0]=
1722 ((uint32_t*)(src+1*stride))[0]=
1723 ((uint32_t*)(src+2*stride))[0]=
1724 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1727 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1728 ((uint32_t*)(src+0*stride))[0]=
1729 ((uint32_t*)(src+1*stride))[0]=
1730 ((uint32_t*)(src+2*stride))[0]=
1731 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1735 #define LOAD_TOP_RIGHT_EDGE\
1736 const int av_unused t4= topright[0];\
1737 const int av_unused t5= topright[1];\
1738 const int av_unused t6= topright[2];\
1739 const int av_unused t7= topright[3];\
1741 #define LOAD_LEFT_EDGE\
1742 const int av_unused l0= src[-1+0*stride];\
1743 const int av_unused l1= src[-1+1*stride];\
1744 const int av_unused l2= src[-1+2*stride];\
1745 const int av_unused l3= src[-1+3*stride];\
1747 #define LOAD_TOP_EDGE\
1748 const int av_unused t0= src[ 0-1*stride];\
1749 const int av_unused t1= src[ 1-1*stride];\
1750 const int av_unused t2= src[ 2-1*stride];\
1751 const int av_unused t3= src[ 3-1*stride];\
1753 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1754 const int lt= src[-1-1*stride];
1758 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1760 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1763 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1767 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1770 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1772 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1773 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1776 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1781 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1783 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1786 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1790 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1793 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1795 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1796 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1799 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1800 const int lt= src[-1-1*stride];
1805 src[1+2*stride]=(lt + t0 + 1)>>1;
1807 src[2+2*stride]=(t0 + t1 + 1)>>1;
1809 src[3+2*stride]=(t1 + t2 + 1)>>1;
1810 src[3+0*stride]=(t2 + t3 + 1)>>1;
1812 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1814 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1816 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1817 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1818 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1819 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1822 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1826 src[0+0*stride]=(t0 + t1 + 1)>>1;
1828 src[0+2*stride]=(t1 + t2 + 1)>>1;
1830 src[1+2*stride]=(t2 + t3 + 1)>>1;
1832 src[2+2*stride]=(t3 + t4+ 1)>>1;
1833 src[3+2*stride]=(t4 + t5+ 1)>>1;
1834 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1836 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1838 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1840 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1841 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1844 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1847 src[0+0*stride]=(l0 + l1 + 1)>>1;
1848 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1850 src[0+1*stride]=(l1 + l2 + 1)>>1;
1852 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1854 src[0+2*stride]=(l2 + l3 + 1)>>1;
1856 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
1865 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
1866 const int lt= src[-1-1*stride];
1871 src[2+1*stride]=(lt + l0 + 1)>>1;
1873 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
1874 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
1875 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1877 src[2+2*stride]=(l0 + l1 + 1)>>1;
1879 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1881 src[2+3*stride]=(l1 + l2+ 1)>>1;
1883 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1884 src[0+3*stride]=(l2 + l3 + 1)>>1;
1885 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1888 void ff_pred16x16_vertical_c(uint8_t *src, int stride){
1890 const uint32_t a= ((uint32_t*)(src-stride))[0];
1891 const uint32_t b= ((uint32_t*)(src-stride))[1];
1892 const uint32_t c= ((uint32_t*)(src-stride))[2];
1893 const uint32_t d= ((uint32_t*)(src-stride))[3];
1895 for(i=0; i<16; i++){
1896 ((uint32_t*)(src+i*stride))[0]= a;
1897 ((uint32_t*)(src+i*stride))[1]= b;
1898 ((uint32_t*)(src+i*stride))[2]= c;
1899 ((uint32_t*)(src+i*stride))[3]= d;
1903 void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
1906 for(i=0; i<16; i++){
1907 ((uint32_t*)(src+i*stride))[0]=
1908 ((uint32_t*)(src+i*stride))[1]=
1909 ((uint32_t*)(src+i*stride))[2]=
1910 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
1914 void ff_pred16x16_dc_c(uint8_t *src, int stride){
1918 dc+= src[-1+i*stride];
1925 dc= 0x01010101*((dc + 16)>>5);
1927 for(i=0; i<16; i++){
1928 ((uint32_t*)(src+i*stride))[0]=
1929 ((uint32_t*)(src+i*stride))[1]=
1930 ((uint32_t*)(src+i*stride))[2]=
1931 ((uint32_t*)(src+i*stride))[3]= dc;
1935 void ff_pred16x16_left_dc_c(uint8_t *src, int stride){
1939 dc+= src[-1+i*stride];
1942 dc= 0x01010101*((dc + 8)>>4);
1944 for(i=0; i<16; i++){
1945 ((uint32_t*)(src+i*stride))[0]=
1946 ((uint32_t*)(src+i*stride))[1]=
1947 ((uint32_t*)(src+i*stride))[2]=
1948 ((uint32_t*)(src+i*stride))[3]= dc;
1952 void ff_pred16x16_top_dc_c(uint8_t *src, int stride){
1958 dc= 0x01010101*((dc + 8)>>4);
1960 for(i=0; i<16; i++){
1961 ((uint32_t*)(src+i*stride))[0]=
1962 ((uint32_t*)(src+i*stride))[1]=
1963 ((uint32_t*)(src+i*stride))[2]=
1964 ((uint32_t*)(src+i*stride))[3]= dc;
1968 void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
1971 for(i=0; i<16; i++){
1972 ((uint32_t*)(src+i*stride))[0]=
1973 ((uint32_t*)(src+i*stride))[1]=
1974 ((uint32_t*)(src+i*stride))[2]=
1975 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1979 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
1982 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1983 const uint8_t * const src0 = src+7-stride;
1984 const uint8_t *src1 = src+8*stride-1;
1985 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
1986 int H = src0[1] - src0[-1];
1987 int V = src1[0] - src2[ 0];
1988 for(k=2; k<=8; ++k) {
1989 src1 += stride; src2 -= stride;
1990 H += k*(src0[k] - src0[-k]);
1991 V += k*(src1[0] - src2[ 0]);
1994 H = ( 5*(H/4) ) / 16;
1995 V = ( 5*(V/4) ) / 16;
1997 /* required for 100% accuracy */
1998 i = H; H = V; V = i;
2000 H = ( 5*H+32 ) >> 6;
2001 V = ( 5*V+32 ) >> 6;
2004 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2005 for(j=16; j>0; --j) {
2008 for(i=-16; i<0; i+=4) {
2009 src[16+i] = cm[ (b ) >> 5 ];
2010 src[17+i] = cm[ (b+ H) >> 5 ];
2011 src[18+i] = cm[ (b+2*H) >> 5 ];
2012 src[19+i] = cm[ (b+3*H) >> 5 ];
2019 void ff_pred16x16_plane_c(uint8_t *src, int stride){
2020 pred16x16_plane_compat_c(src, stride, 0);
2023 void ff_pred8x8_vertical_c(uint8_t *src, int stride){
2025 const uint32_t a= ((uint32_t*)(src-stride))[0];
2026 const uint32_t b= ((uint32_t*)(src-stride))[1];
2029 ((uint32_t*)(src+i*stride))[0]= a;
2030 ((uint32_t*)(src+i*stride))[1]= b;
2034 void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
2038 ((uint32_t*)(src+i*stride))[0]=
2039 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2043 void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
2047 ((uint32_t*)(src+i*stride))[0]=
2048 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2052 void ff_pred8x8_left_dc_c(uint8_t *src, int stride){
2058 dc0+= src[-1+i*stride];
2059 dc2+= src[-1+(i+4)*stride];
2061 dc0= 0x01010101*((dc0 + 2)>>2);
2062 dc2= 0x01010101*((dc2 + 2)>>2);
2065 ((uint32_t*)(src+i*stride))[0]=
2066 ((uint32_t*)(src+i*stride))[1]= dc0;
2069 ((uint32_t*)(src+i*stride))[0]=
2070 ((uint32_t*)(src+i*stride))[1]= dc2;
2074 void ff_pred8x8_top_dc_c(uint8_t *src, int stride){
2080 dc0+= src[i-stride];
2081 dc1+= src[4+i-stride];
2083 dc0= 0x01010101*((dc0 + 2)>>2);
2084 dc1= 0x01010101*((dc1 + 2)>>2);
2087 ((uint32_t*)(src+i*stride))[0]= dc0;
2088 ((uint32_t*)(src+i*stride))[1]= dc1;
2091 ((uint32_t*)(src+i*stride))[0]= dc0;
2092 ((uint32_t*)(src+i*stride))[1]= dc1;
2097 void ff_pred8x8_dc_c(uint8_t *src, int stride){
2099 int dc0, dc1, dc2, dc3;
2103 dc0+= src[-1+i*stride] + src[i-stride];
2104 dc1+= src[4+i-stride];
2105 dc2+= src[-1+(i+4)*stride];
2107 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2108 dc0= 0x01010101*((dc0 + 4)>>3);
2109 dc1= 0x01010101*((dc1 + 2)>>2);
2110 dc2= 0x01010101*((dc2 + 2)>>2);
2113 ((uint32_t*)(src+i*stride))[0]= dc0;
2114 ((uint32_t*)(src+i*stride))[1]= dc1;
2117 ((uint32_t*)(src+i*stride))[0]= dc2;
2118 ((uint32_t*)(src+i*stride))[1]= dc3;
2122 void ff_pred8x8_plane_c(uint8_t *src, int stride){
2125 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2126 const uint8_t * const src0 = src+3-stride;
2127 const uint8_t *src1 = src+4*stride-1;
2128 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2129 int H = src0[1] - src0[-1];
2130 int V = src1[0] - src2[ 0];
2131 for(k=2; k<=4; ++k) {
2132 src1 += stride; src2 -= stride;
2133 H += k*(src0[k] - src0[-k]);
2134 V += k*(src1[0] - src2[ 0]);
2136 H = ( 17*H+16 ) >> 5;
2137 V = ( 17*V+16 ) >> 5;
2139 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2140 for(j=8; j>0; --j) {
2143 src[0] = cm[ (b ) >> 5 ];
2144 src[1] = cm[ (b+ H) >> 5 ];
2145 src[2] = cm[ (b+2*H) >> 5 ];
2146 src[3] = cm[ (b+3*H) >> 5 ];
2147 src[4] = cm[ (b+4*H) >> 5 ];
2148 src[5] = cm[ (b+5*H) >> 5 ];
2149 src[6] = cm[ (b+6*H) >> 5 ];
2150 src[7] = cm[ (b+7*H) >> 5 ];
2155 #define SRC(x,y) src[(x)+(y)*stride]
2157 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2158 #define PREDICT_8x8_LOAD_LEFT \
2159 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2160 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2161 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2162 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2165 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2166 #define PREDICT_8x8_LOAD_TOP \
2167 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2168 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2169 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2170 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2171 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2174 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2175 #define PREDICT_8x8_LOAD_TOPRIGHT \
2176 int t8, t9, t10, t11, t12, t13, t14, t15; \
2177 if(has_topright) { \
2178 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2179 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2180 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2182 #define PREDICT_8x8_LOAD_TOPLEFT \
2183 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2185 #define PREDICT_8x8_DC(v) \
2187 for( y = 0; y < 8; y++ ) { \
2188 ((uint32_t*)src)[0] = \
2189 ((uint32_t*)src)[1] = v; \
2193 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2195 PREDICT_8x8_DC(0x80808080);
2197 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2199 PREDICT_8x8_LOAD_LEFT;
2200 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2203 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2205 PREDICT_8x8_LOAD_TOP;
2206 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2209 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2211 PREDICT_8x8_LOAD_LEFT;
2212 PREDICT_8x8_LOAD_TOP;
2213 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2214 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2217 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2219 PREDICT_8x8_LOAD_LEFT;
2220 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2221 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2222 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2225 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2228 PREDICT_8x8_LOAD_TOP;
2237 for( y = 1; y < 8; y++ )
2238 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2240 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2242 PREDICT_8x8_LOAD_TOP;
2243 PREDICT_8x8_LOAD_TOPRIGHT;
2244 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2245 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2246 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2247 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2248 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2249 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2250 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2251 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2252 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2253 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2254 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2255 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2256 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2257 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2258 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2260 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2262 PREDICT_8x8_LOAD_TOP;
2263 PREDICT_8x8_LOAD_LEFT;
2264 PREDICT_8x8_LOAD_TOPLEFT;
2265 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2266 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2267 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2268 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2269 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2270 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2271 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2272 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2273 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2274 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2275 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2276 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2277 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2278 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2279 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2282 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2284 PREDICT_8x8_LOAD_TOP;
2285 PREDICT_8x8_LOAD_LEFT;
2286 PREDICT_8x8_LOAD_TOPLEFT;
2287 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2288 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2289 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2290 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2291 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2292 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2293 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2294 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2295 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2296 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2297 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2298 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2299 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2300 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2301 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2302 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2303 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2304 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2305 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2306 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2307 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2308 SRC(7,0)= (t6 + t7 + 1) >> 1;
2310 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2312 PREDICT_8x8_LOAD_TOP;
2313 PREDICT_8x8_LOAD_LEFT;
2314 PREDICT_8x8_LOAD_TOPLEFT;
2315 SRC(0,7)= (l6 + l7 + 1) >> 1;
2316 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2317 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2318 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2319 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2320 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2321 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2322 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2323 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2324 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2325 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2326 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2327 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2328 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2329 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2330 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2331 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2332 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2333 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2334 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2335 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2336 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2338 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2340 PREDICT_8x8_LOAD_TOP;
2341 PREDICT_8x8_LOAD_TOPRIGHT;
2342 SRC(0,0)= (t0 + t1 + 1) >> 1;
2343 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2344 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2345 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2346 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2347 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2348 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2349 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2350 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2351 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2352 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2353 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2354 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2355 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2356 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2357 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2358 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2359 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2360 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2361 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2362 SRC(7,6)= (t10 + t11 + 1) >> 1;
2363 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2365 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2367 PREDICT_8x8_LOAD_LEFT;
2368 SRC(0,0)= (l0 + l1 + 1) >> 1;
2369 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2370 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2371 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2372 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2373 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2374 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2375 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2376 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2377 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2378 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2379 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2380 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2381 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2382 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2383 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2384 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2385 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2387 #undef PREDICT_8x8_LOAD_LEFT
2388 #undef PREDICT_8x8_LOAD_TOP
2389 #undef PREDICT_8x8_LOAD_TOPLEFT
2390 #undef PREDICT_8x8_LOAD_TOPRIGHT
2391 #undef PREDICT_8x8_DC
2397 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2398 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2399 int src_x_offset, int src_y_offset,
2400 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2401 MpegEncContext * const s = &h->s;
2402 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2403 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2404 const int luma_xy= (mx&3) + ((my&3)<<2);
2405 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2406 uint8_t * src_cb, * src_cr;
2407 int extra_width= h->emu_edge_width;
2408 int extra_height= h->emu_edge_height;
2410 const int full_mx= mx>>2;
2411 const int full_my= my>>2;
2412 const int pic_width = 16*s->mb_width;
2413 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2415 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
2418 if(mx&7) extra_width -= 3;
2419 if(my&7) extra_height -= 3;
2421 if( full_mx < 0-extra_width
2422 || full_my < 0-extra_height
2423 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2424 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2425 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2426 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2430 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2432 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2435 if(s->flags&CODEC_FLAG_GRAY) return;
2438 // chroma offset when predicting from a field of opposite parity
2439 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2440 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2442 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2443 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2446 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2447 src_cb= s->edge_emu_buffer;
2449 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2452 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2453 src_cr= s->edge_emu_buffer;
2455 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2458 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2459 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2460 int x_offset, int y_offset,
2461 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2462 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2463 int list0, int list1){
2464 MpegEncContext * const s = &h->s;
2465 qpel_mc_func *qpix_op= qpix_put;
2466 h264_chroma_mc_func chroma_op= chroma_put;
2468 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2469 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2470 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2471 x_offset += 8*s->mb_x;
2472 y_offset += 8*(s->mb_y >> MB_MBAFF);
2475 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2476 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2477 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2478 qpix_op, chroma_op);
2481 chroma_op= chroma_avg;
2485 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2486 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2487 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2488 qpix_op, chroma_op);
2492 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2493 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2494 int x_offset, int y_offset,
2495 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2496 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2497 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2498 int list0, int list1){
2499 MpegEncContext * const s = &h->s;
2501 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2502 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2503 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2504 x_offset += 8*s->mb_x;
2505 y_offset += 8*(s->mb_y >> MB_MBAFF);
2508 /* don't optimize for luma-only case, since B-frames usually
2509 * use implicit weights => chroma too. */
2510 uint8_t *tmp_cb = s->obmc_scratchpad;
2511 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2512 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2513 int refn0 = h->ref_cache[0][ scan8[n] ];
2514 int refn1 = h->ref_cache[1][ scan8[n] ];
2516 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2517 dest_y, dest_cb, dest_cr,
2518 x_offset, y_offset, qpix_put, chroma_put);
2519 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2520 tmp_y, tmp_cb, tmp_cr,
2521 x_offset, y_offset, qpix_put, chroma_put);
2523 if(h->use_weight == 2){
2524 int weight0 = h->implicit_weight[refn0][refn1];
2525 int weight1 = 64 - weight0;
2526 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2527 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2528 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2530 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2531 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2532 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2533 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2534 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2535 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2536 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2537 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2538 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2541 int list = list1 ? 1 : 0;
2542 int refn = h->ref_cache[list][ scan8[n] ];
2543 Picture *ref= &h->ref_list[list][refn];
2544 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2545 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2546 qpix_put, chroma_put);
2548 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2549 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2550 if(h->use_weight_chroma){
2551 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2552 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2553 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2554 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2559 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2560 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2561 int x_offset, int y_offset,
2562 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2563 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2564 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2565 int list0, int list1){
2566 if((h->use_weight==2 && list0 && list1
2567 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2568 || h->use_weight==1)
2569 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2570 x_offset, y_offset, qpix_put, chroma_put,
2571 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2573 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2574 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2577 static inline void prefetch_motion(H264Context *h, int list){
2578 /* fetch pixels for estimated mv 4 macroblocks ahead
2579 * optimized for 64byte cache lines */
2580 MpegEncContext * const s = &h->s;
2581 const int refn = h->ref_cache[list][scan8[0]];
2583 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2584 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2585 uint8_t **src= h->ref_list[list][refn].data;
2586 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2587 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2588 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2589 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2593 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2594 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2595 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2596 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2597 MpegEncContext * const s = &h->s;
2598 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2599 const int mb_type= s->current_picture.mb_type[mb_xy];
2601 assert(IS_INTER(mb_type));
2603 prefetch_motion(h, 0);
2605 if(IS_16X16(mb_type)){
2606 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2607 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2608 &weight_op[0], &weight_avg[0],
2609 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2610 }else if(IS_16X8(mb_type)){
2611 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2612 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2613 &weight_op[1], &weight_avg[1],
2614 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2615 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2616 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2617 &weight_op[1], &weight_avg[1],
2618 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2619 }else if(IS_8X16(mb_type)){
2620 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
2621 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2622 &weight_op[2], &weight_avg[2],
2623 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2624 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
2625 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2626 &weight_op[2], &weight_avg[2],
2627 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2631 assert(IS_8X8(mb_type));
2634 const int sub_mb_type= h->sub_mb_type[i];
2636 int x_offset= (i&1)<<2;
2637 int y_offset= (i&2)<<1;
2639 if(IS_SUB_8X8(sub_mb_type)){
2640 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2641 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2642 &weight_op[3], &weight_avg[3],
2643 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2644 }else if(IS_SUB_8X4(sub_mb_type)){
2645 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2646 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2647 &weight_op[4], &weight_avg[4],
2648 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2649 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2650 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2651 &weight_op[4], &weight_avg[4],
2652 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2653 }else if(IS_SUB_4X8(sub_mb_type)){
2654 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2655 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2656 &weight_op[5], &weight_avg[5],
2657 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2658 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2659 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2660 &weight_op[5], &weight_avg[5],
2661 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2664 assert(IS_SUB_4X4(sub_mb_type));
2666 int sub_x_offset= x_offset + 2*(j&1);
2667 int sub_y_offset= y_offset + (j&2);
2668 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2669 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2670 &weight_op[6], &weight_avg[6],
2671 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2677 prefetch_motion(h, 1);
2680 static void decode_init_vlc(void){
2681 static int done = 0;
2687 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2688 &chroma_dc_coeff_token_len [0], 1, 1,
2689 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2692 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2693 &coeff_token_len [i][0], 1, 1,
2694 &coeff_token_bits[i][0], 1, 1, 1);
2698 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2699 &chroma_dc_total_zeros_len [i][0], 1, 1,
2700 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2702 for(i=0; i<15; i++){
2703 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2704 &total_zeros_len [i][0], 1, 1,
2705 &total_zeros_bits[i][0], 1, 1, 1);
2709 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2710 &run_len [i][0], 1, 1,
2711 &run_bits[i][0], 1, 1, 1);
2713 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2714 &run_len [6][0], 1, 1,
2715 &run_bits[6][0], 1, 1, 1);
2720 * Sets the intra prediction function pointers.
2722 static void init_pred_ptrs(H264Context *h){
2723 // MpegEncContext * const s = &h->s;
2725 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2726 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2727 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2728 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2729 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2730 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2731 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2732 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2733 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2734 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2735 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2736 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2738 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2739 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2740 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2741 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2742 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2743 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2744 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2745 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2746 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2747 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2748 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2749 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2751 h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
2752 h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
2753 h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
2754 h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
2755 h->pred8x8[LEFT_DC_PRED8x8]= ff_pred8x8_left_dc_c;
2756 h->pred8x8[TOP_DC_PRED8x8 ]= ff_pred8x8_top_dc_c;
2757 h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
2759 h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
2760 h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
2761 h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
2762 h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
2763 h->pred16x16[LEFT_DC_PRED8x8]= ff_pred16x16_left_dc_c;
2764 h->pred16x16[TOP_DC_PRED8x8 ]= ff_pred16x16_top_dc_c;
2765 h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
2768 static void free_tables(H264Context *h){
2770 av_freep(&h->intra4x4_pred_mode);
2771 av_freep(&h->chroma_pred_mode_table);
2772 av_freep(&h->cbp_table);
2773 av_freep(&h->mvd_table[0]);
2774 av_freep(&h->mvd_table[1]);
2775 av_freep(&h->direct_table);
2776 av_freep(&h->non_zero_count);
2777 av_freep(&h->slice_table_base);
2778 av_freep(&h->top_borders[1]);
2779 av_freep(&h->top_borders[0]);
2780 h->slice_table= NULL;
2782 av_freep(&h->mb2b_xy);
2783 av_freep(&h->mb2b8_xy);
2785 av_freep(&h->s.obmc_scratchpad);
2787 for(i = 0; i < MAX_SPS_COUNT; i++)
2788 av_freep(h->sps_buffers + i);
2790 for(i = 0; i < MAX_PPS_COUNT; i++)
2791 av_freep(h->pps_buffers + i);
2794 static void init_dequant8_coeff_table(H264Context *h){
2796 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2797 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2798 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2800 for(i=0; i<2; i++ ){
2801 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2802 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2806 for(q=0; q<52; q++){
2807 int shift = ff_div6[q];
2808 int idx = ff_rem6[q];
2810 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2811 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2812 h->pps.scaling_matrix8[i][x]) << shift;
2817 static void init_dequant4_coeff_table(H264Context *h){
2819 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2820 for(i=0; i<6; i++ ){
2821 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2823 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2824 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2831 for(q=0; q<52; q++){
2832 int shift = ff_div6[q] + 2;
2833 int idx = ff_rem6[q];
2835 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2836 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2837 h->pps.scaling_matrix4[i][x]) << shift;
2842 static void init_dequant_tables(H264Context *h){
2844 init_dequant4_coeff_table(h);
2845 if(h->pps.transform_8x8_mode)
2846 init_dequant8_coeff_table(h);
2847 if(h->sps.transform_bypass){
2850 h->dequant4_coeff[i][0][x] = 1<<6;
2851 if(h->pps.transform_8x8_mode)
2854 h->dequant8_coeff[i][0][x] = 1<<6;
2861 * needs width/height
2863 static int alloc_tables(H264Context *h){
2864 MpegEncContext * const s = &h->s;
2865 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2868 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2870 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2871 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2872 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2873 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2874 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2876 if( h->pps.cabac ) {
2877 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2878 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2879 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2880 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2883 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2884 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2886 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2887 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2888 for(y=0; y<s->mb_height; y++){
2889 for(x=0; x<s->mb_width; x++){
2890 const int mb_xy= x + y*s->mb_stride;
2891 const int b_xy = 4*x + 4*y*h->b_stride;
2892 const int b8_xy= 2*x + 2*y*h->b8_stride;
2894 h->mb2b_xy [mb_xy]= b_xy;
2895 h->mb2b8_xy[mb_xy]= b8_xy;
2899 s->obmc_scratchpad = NULL;
2901 if(!h->dequant4_coeff[0])
2902 init_dequant_tables(h);
2910 static void common_init(H264Context *h){
2911 MpegEncContext * const s = &h->s;
2913 s->width = s->avctx->width;
2914 s->height = s->avctx->height;
2915 s->codec_id= s->avctx->codec->id;
2919 h->dequant_coeff_pps= -1;
2920 s->unrestricted_mv=1;
2921 s->decode=1; //FIXME
2923 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2924 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2927 static int decode_init(AVCodecContext *avctx){
2928 H264Context *h= avctx->priv_data;
2929 MpegEncContext * const s = &h->s;
2931 MPV_decode_defaults(s);
2936 s->out_format = FMT_H264;
2937 s->workaround_bugs= avctx->workaround_bugs;
2940 // s->decode_mb= ff_h263_decode_mb;
2942 avctx->pix_fmt= PIX_FMT_YUV420P;
2946 if(avctx->extradata_size > 0 && avctx->extradata &&
2947 *(char *)avctx->extradata == 1){
2957 static int frame_start(H264Context *h){
2958 MpegEncContext * const s = &h->s;
2961 if(MPV_frame_start(s, s->avctx) < 0)
2963 ff_er_frame_start(s);
2965 assert(s->linesize && s->uvlinesize);
2967 for(i=0; i<16; i++){
2968 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2969 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2972 h->block_offset[16+i]=
2973 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2974 h->block_offset[24+16+i]=
2975 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2978 /* can't be in alloc_tables because linesize isn't known there.
2979 * FIXME: redo bipred weight to not require extra buffer? */
2980 if(!s->obmc_scratchpad)
2981 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2983 /* some macroblocks will be accessed before they're available */
2985 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2987 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2991 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2992 MpegEncContext * const s = &h->s;
2996 src_cb -= uvlinesize;
2997 src_cr -= uvlinesize;
2999 // There are two lines saved, the line above the the top macroblock of a pair,
3000 // and the line above the bottom macroblock
3001 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3002 for(i=1; i<17; i++){
3003 h->left_border[i]= src_y[15+i* linesize];
3006 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3007 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3009 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3010 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3011 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3013 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3014 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3016 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3017 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3021 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
3022 MpegEncContext * const s = &h->s;
3029 if(h->deblocking_filter == 2) {
3030 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
3031 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
3032 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
3034 deblock_left = (s->mb_x > 0);
3035 deblock_top = (s->mb_y > 0);
3038 src_y -= linesize + 1;
3039 src_cb -= uvlinesize + 1;
3040 src_cr -= uvlinesize + 1;
3042 #define XCHG(a,b,t,xchg)\
3049 for(i = !deblock_top; i<17; i++){
3050 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3055 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3056 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3057 if(s->mb_x+1 < s->mb_width){
3058 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3062 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3064 for(i = !deblock_top; i<9; i++){
3065 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3066 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3070 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3071 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3076 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3077 MpegEncContext * const s = &h->s;
3080 src_y -= 2 * linesize;
3081 src_cb -= 2 * uvlinesize;
3082 src_cr -= 2 * uvlinesize;
3084 // There are two lines saved, the line above the the top macroblock of a pair,
3085 // and the line above the bottom macroblock
3086 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3087 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3088 for(i=2; i<34; i++){
3089 h->left_border[i]= src_y[15+i* linesize];
3092 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3093 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3094 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3095 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3097 if(!(s->flags&CODEC_FLAG_GRAY)){
3098 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3099 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3100 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3101 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3102 for(i=2; i<18; i++){
3103 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3104 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3106 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3107 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3108 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3109 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3113 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3114 MpegEncContext * const s = &h->s;
3117 int deblock_left = (s->mb_x > 0);
3118 int deblock_top = (s->mb_y > 1);
3120 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3122 src_y -= 2 * linesize + 1;
3123 src_cb -= 2 * uvlinesize + 1;
3124 src_cr -= 2 * uvlinesize + 1;
3126 #define XCHG(a,b,t,xchg)\
3133 for(i = (!deblock_top)<<1; i<34; i++){
3134 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3139 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3140 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3141 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3142 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3143 if(s->mb_x+1 < s->mb_width){
3144 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3145 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3149 if(!(s->flags&CODEC_FLAG_GRAY)){
3151 for(i = (!deblock_top) << 1; i<18; i++){
3152 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3153 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3157 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3158 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3159 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3160 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3165 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
3166 MpegEncContext * const s = &h->s;
3167 const int mb_x= s->mb_x;
3168 const int mb_y= s->mb_y;
3169 const int mb_xy= mb_x + mb_y*s->mb_stride;
3170 const int mb_type= s->current_picture.mb_type[mb_xy];
3171 uint8_t *dest_y, *dest_cb, *dest_cr;
3172 int linesize, uvlinesize /*dct_offset*/;
3174 int *block_offset = &h->block_offset[0];
3175 const unsigned int bottom = mb_y & 1;
3176 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
3177 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3178 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3180 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3181 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3182 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3184 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3185 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3187 if (!simple && MB_FIELD) {
3188 linesize = h->mb_linesize = s->linesize * 2;
3189 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3190 block_offset = &h->block_offset[24];
3191 if(mb_y&1){ //FIXME move out of this func?
3192 dest_y -= s->linesize*15;
3193 dest_cb-= s->uvlinesize*7;
3194 dest_cr-= s->uvlinesize*7;
3198 for(list=0; list<h->list_count; list++){
3199 if(!USES_LIST(mb_type, list))
3201 if(IS_16X16(mb_type)){
3202 int8_t *ref = &h->ref_cache[list][scan8[0]];
3203 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3205 for(i=0; i<16; i+=4){
3206 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3207 int ref = h->ref_cache[list][scan8[i]];
3209 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3215 linesize = h->mb_linesize = s->linesize;
3216 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3217 // dct_offset = s->linesize * 16;
3220 if(transform_bypass){
3222 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3223 }else if(IS_8x8DCT(mb_type)){
3224 idct_dc_add = s->dsp.h264_idct8_dc_add;
3225 idct_add = s->dsp.h264_idct8_add;
3227 idct_dc_add = s->dsp.h264_idct_dc_add;
3228 idct_add = s->dsp.h264_idct_add;
3231 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3232 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3233 int mbt_y = mb_y&~1;
3234 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3235 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3236 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3237 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3240 if (!simple && IS_INTRA_PCM(mb_type)) {
3243 // The pixels are stored in h->mb array in the same order as levels,
3244 // copy them in output in the correct order.
3245 for(i=0; i<16; i++) {
3246 for (y=0; y<4; y++) {
3247 for (x=0; x<4; x++) {
3248 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3252 for(i=16; i<16+4; i++) {
3253 for (y=0; y<4; y++) {
3254 for (x=0; x<4; x++) {
3255 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3259 for(i=20; i<20+4; i++) {
3260 for (y=0; y<4; y++) {
3261 for (x=0; x<4; x++) {
3262 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3267 if(IS_INTRA(mb_type)){
3268 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3269 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
3271 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3272 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3273 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3276 if(IS_INTRA4x4(mb_type)){
3277 if(simple || !s->encoding){
3278 if(IS_8x8DCT(mb_type)){
3279 for(i=0; i<16; i+=4){
3280 uint8_t * const ptr= dest_y + block_offset[i];
3281 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3282 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3283 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3284 (h->topright_samples_available<<i)&0x4000, linesize);
3286 if(nnz == 1 && h->mb[i*16])
3287 idct_dc_add(ptr, h->mb + i*16, linesize);
3289 idct_add(ptr, h->mb + i*16, linesize);
3293 for(i=0; i<16; i++){
3294 uint8_t * const ptr= dest_y + block_offset[i];
3296 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3299 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3300 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3301 assert(mb_y || linesize <= block_offset[i]);
3302 if(!topright_avail){
3303 tr= ptr[3 - linesize]*0x01010101;
3304 topright= (uint8_t*) &tr;
3306 topright= ptr + 4 - linesize;
3310 h->pred4x4[ dir ](ptr, topright, linesize);
3311 nnz = h->non_zero_count_cache[ scan8[i] ];
3314 if(nnz == 1 && h->mb[i*16])
3315 idct_dc_add(ptr, h->mb + i*16, linesize);
3317 idct_add(ptr, h->mb + i*16, linesize);
3319 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3324 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3326 if(!transform_bypass)
3327 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3329 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3331 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3332 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
3334 hl_motion(h, dest_y, dest_cb, dest_cr,
3335 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3336 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3337 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3341 if(!IS_INTRA4x4(mb_type)){
3343 if(IS_INTRA16x16(mb_type)){
3344 for(i=0; i<16; i++){
3345 if(h->non_zero_count_cache[ scan8[i] ])
3346 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3347 else if(h->mb[i*16])
3348 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3351 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3352 for(i=0; i<16; i+=di){
3353 int nnz = h->non_zero_count_cache[ scan8[i] ];
3355 if(nnz==1 && h->mb[i*16])
3356 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3358 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3363 for(i=0; i<16; i++){
3364 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3365 uint8_t * const ptr= dest_y + block_offset[i];
3366 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3372 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3373 uint8_t *dest[2] = {dest_cb, dest_cr};
3374 if(transform_bypass){
3375 idct_add = idct_dc_add = s->dsp.add_pixels4;
3377 idct_add = s->dsp.h264_idct_add;
3378 idct_dc_add = s->dsp.h264_idct_dc_add;
3379 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3380 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3383 for(i=16; i<16+8; i++){
3384 if(h->non_zero_count_cache[ scan8[i] ])
3385 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3386 else if(h->mb[i*16])
3387 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3390 for(i=16; i<16+8; i++){
3391 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3392 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3393 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3399 if(h->deblocking_filter) {
3400 if (!simple && FRAME_MBAFF) {
3401 //FIXME try deblocking one mb at a time?
3402 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3403 const int mb_y = s->mb_y - 1;
3404 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3405 const int mb_xy= mb_x + mb_y*s->mb_stride;
3406 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3407 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3408 if (!bottom) return;
3409 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3410 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3411 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3413 if(IS_INTRA(mb_type_top | mb_type_bottom))
3414 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3416 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3420 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3421 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3422 h->chroma_qp = get_chroma_qp(h, s->current_picture.qscale_table[mb_xy]);
3423 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3426 tprintf(h->s.avctx, "call mbaff filter_mb\n");
3427 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3428 h->chroma_qp = get_chroma_qp(h, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3429 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3431 tprintf(h->s.avctx, "call filter_mb\n");
3432 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
3433 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3434 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3440 * Process a macroblock; this case avoids checks for expensive uncommon cases.
3442 static void hl_decode_mb_simple(H264Context *h){
3443 hl_decode_mb_internal(h, 1);
3447 * Process a macroblock; this handles edge cases, such as interlacing.
3449 static void av_noinline hl_decode_mb_complex(H264Context *h){
3450 hl_decode_mb_internal(h, 0);
3453 static void hl_decode_mb(H264Context *h){
3454 MpegEncContext * const s = &h->s;
3455 const int mb_x= s->mb_x;
3456 const int mb_y= s->mb_y;
3457 const int mb_xy= mb_x + mb_y*s->mb_stride;
3458 const int mb_type= s->current_picture.mb_type[mb_xy];
3459 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (s->flags&CODEC_FLAG_GRAY) || s->encoding;
3465 hl_decode_mb_complex(h);
3466 else hl_decode_mb_simple(h);
3470 * fills the default_ref_list.
3472 static int fill_default_ref_list(H264Context *h){
3473 MpegEncContext * const s = &h->s;
3475 int smallest_poc_greater_than_current = -1;
3476 Picture sorted_short_ref[32];
3478 if(h->slice_type==B_TYPE){
3482 /* sort frame according to poc in B slice */
3483 for(out_i=0; out_i<h->short_ref_count; out_i++){
3485 int best_poc=INT_MAX;
3487 for(i=0; i<h->short_ref_count; i++){
3488 const int poc= h->short_ref[i]->poc;
3489 if(poc > limit && poc < best_poc){
3495 assert(best_i != INT_MIN);
3498 sorted_short_ref[out_i]= *h->short_ref[best_i];
3499 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3500 if (-1 == smallest_poc_greater_than_current) {
3501 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3502 smallest_poc_greater_than_current = out_i;
3508 if(s->picture_structure == PICT_FRAME){
3509 if(h->slice_type==B_TYPE){
3511 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3513 // find the largest poc
3514 for(list=0; list<2; list++){
3517 int step= list ? -1 : 1;
3519 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3520 while(j<0 || j>= h->short_ref_count){
3521 if(j != -99 && step == (list ? -1 : 1))
3524 j= smallest_poc_greater_than_current + (step>>1);
3526 if(sorted_short_ref[j].reference != 3) continue;
3527 h->default_ref_list[list][index ]= sorted_short_ref[j];
3528 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3531 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3532 if(h->long_ref[i] == NULL) continue;
3533 if(h->long_ref[i]->reference != 3) continue;
3535 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3536 h->default_ref_list[ list ][index++].pic_id= i;;
3539 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3540 // swap the two first elements of L1 when
3541 // L0 and L1 are identical
3542 Picture temp= h->default_ref_list[1][0];
3543 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3544 h->default_ref_list[1][1] = temp;
3547 if(index < h->ref_count[ list ])
3548 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3552 for(i=0; i<h->short_ref_count; i++){
3553 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3554 h->default_ref_list[0][index ]= *h->short_ref[i];
3555 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3557 for(i = 0; i < 16; i++){
3558 if(h->long_ref[i] == NULL) continue;
3559 if(h->long_ref[i]->reference != 3) continue;
3560 h->default_ref_list[0][index ]= *h->long_ref[i];
3561 h->default_ref_list[0][index++].pic_id= i;;
3563 if(index < h->ref_count[0])
3564 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3567 if(h->slice_type==B_TYPE){
3569 //FIXME second field balh
3573 for (i=0; i<h->ref_count[0]; i++) {
3574 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3576 if(h->slice_type==B_TYPE){
3577 for (i=0; i<h->ref_count[1]; i++) {
3578 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3585 static void print_short_term(H264Context *h);
3586 static void print_long_term(H264Context *h);
3588 static int decode_ref_pic_list_reordering(H264Context *h){
3589 MpegEncContext * const s = &h->s;
3592 print_short_term(h);
3594 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3596 for(list=0; list<h->list_count; list++){
3597 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3599 if(get_bits1(&s->gb)){
3600 int pred= h->curr_pic_num;
3602 for(index=0; ; index++){
3603 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3604 unsigned int pic_id;
3606 Picture *ref = NULL;
3608 if(reordering_of_pic_nums_idc==3)
3611 if(index >= h->ref_count[list]){
3612 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3616 if(reordering_of_pic_nums_idc<3){
3617 if(reordering_of_pic_nums_idc<2){
3618 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3620 if(abs_diff_pic_num >= h->max_pic_num){
3621 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3625 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3626 else pred+= abs_diff_pic_num;
3627 pred &= h->max_pic_num - 1;
3629 for(i= h->short_ref_count-1; i>=0; i--){
3630 ref = h->short_ref[i];
3631 assert(ref->reference == 3);
3632 assert(!ref->long_ref);
3633 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3637 ref->pic_id= ref->frame_num;
3639 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3641 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3644 ref = h->long_ref[pic_id];
3646 ref->pic_id= pic_id;
3647 assert(ref->reference == 3);
3648 assert(ref->long_ref);
3656 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3657 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3659 for(i=index; i+1<h->ref_count[list]; i++){
3660 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3663 for(; i > index; i--){
3664 h->ref_list[list][i]= h->ref_list[list][i-1];
3666 h->ref_list[list][index]= *ref;
3669 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3675 for(list=0; list<h->list_count; list++){
3676 for(index= 0; index < h->ref_count[list]; index++){
3677 if(!h->ref_list[list][index].data[0])
3678 h->ref_list[list][index]= s->current_picture;
3682 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3683 direct_dist_scale_factor(h);
3684 direct_ref_list_init(h);
3688 static void fill_mbaff_ref_list(H264Context *h){
3690 for(list=0; list<2; list++){ //FIXME try list_count
3691 for(i=0; i<h->ref_count[list]; i++){
3692 Picture *frame = &h->ref_list[list][i];
3693 Picture *field = &h->ref_list[list][16+2*i];
3696 field[0].linesize[j] <<= 1;
3697 field[1] = field[0];
3699 field[1].data[j] += frame->linesize[j];
3701 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3702 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3704 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3705 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3709 for(j=0; j<h->ref_count[1]; j++){
3710 for(i=0; i<h->ref_count[0]; i++)
3711 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3712 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3713 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3717 static int pred_weight_table(H264Context *h){
3718 MpegEncContext * const s = &h->s;
3720 int luma_def, chroma_def;
3723 h->use_weight_chroma= 0;
3724 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3725 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3726 luma_def = 1<<h->luma_log2_weight_denom;
3727 chroma_def = 1<<h->chroma_log2_weight_denom;
3729 for(list=0; list<2; list++){
3730 for(i=0; i<h->ref_count[list]; i++){
3731 int luma_weight_flag, chroma_weight_flag;
3733 luma_weight_flag= get_bits1(&s->gb);
3734 if(luma_weight_flag){
3735 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3736 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3737 if( h->luma_weight[list][i] != luma_def
3738 || h->luma_offset[list][i] != 0)
3741 h->luma_weight[list][i]= luma_def;
3742 h->luma_offset[list][i]= 0;
3745 chroma_weight_flag= get_bits1(&s->gb);
3746 if(chroma_weight_flag){
3749 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3750 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3751 if( h->chroma_weight[list][i][j] != chroma_def
3752 || h->chroma_offset[list][i][j] != 0)
3753 h->use_weight_chroma= 1;
3758 h->chroma_weight[list][i][j]= chroma_def;
3759 h->chroma_offset[list][i][j]= 0;
3763 if(h->slice_type != B_TYPE) break;
3765 h->use_weight= h->use_weight || h->use_weight_chroma;
3769 static void implicit_weight_table(H264Context *h){
3770 MpegEncContext * const s = &h->s;
3772 int cur_poc = s->current_picture_ptr->poc;
3774 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3775 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3777 h->use_weight_chroma= 0;
3782 h->use_weight_chroma= 2;
3783 h->luma_log2_weight_denom= 5;
3784 h->chroma_log2_weight_denom= 5;
3786 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3787 int poc0 = h->ref_list[0][ref0].poc;
3788 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3789 int poc1 = h->ref_list[1][ref1].poc;
3790 int td = av_clip(poc1 - poc0, -128, 127);
3792 int tb = av_clip(cur_poc - poc0, -128, 127);
3793 int tx = (16384 + (FFABS(td) >> 1)) / td;
3794 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3795 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3796 h->implicit_weight[ref0][ref1] = 32;
3798 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3800 h->implicit_weight[ref0][ref1] = 32;
3805 static inline void unreference_pic(H264Context *h, Picture *pic){
3808 if(pic == h->delayed_output_pic)
3811 for(i = 0; h->delayed_pic[i]; i++)
3812 if(pic == h->delayed_pic[i]){
3820 * instantaneous decoder refresh.
3822 static void idr(H264Context *h){
3825 for(i=0; i<16; i++){
3826 if (h->long_ref[i] != NULL) {
3827 unreference_pic(h, h->long_ref[i]);
3828 h->long_ref[i]= NULL;
3831 h->long_ref_count=0;
3833 for(i=0; i<h->short_ref_count; i++){
3834 unreference_pic(h, h->short_ref[i]);
3835 h->short_ref[i]= NULL;
3837 h->short_ref_count=0;
3840 /* forget old pics after a seek */
3841 static void flush_dpb(AVCodecContext *avctx){
3842 H264Context *h= avctx->priv_data;
3844 for(i=0; i<16; i++) {
3845 if(h->delayed_pic[i])
3846 h->delayed_pic[i]->reference= 0;
3847 h->delayed_pic[i]= NULL;
3849 if(h->delayed_output_pic)
3850 h->delayed_output_pic->reference= 0;
3851 h->delayed_output_pic= NULL;
3853 if(h->s.current_picture_ptr)
3854 h->s.current_picture_ptr->reference= 0;
3859 * @return the removed picture or NULL if an error occurs
3861 static Picture * remove_short(H264Context *h, int frame_num){
3862 MpegEncContext * const s = &h->s;
3865 if(s->avctx->debug&FF_DEBUG_MMCO)
3866 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3868 for(i=0; i<h->short_ref_count; i++){
3869 Picture *pic= h->short_ref[i];
3870 if(s->avctx->debug&FF_DEBUG_MMCO)
3871 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3872 if(pic->frame_num == frame_num){
3873 h->short_ref[i]= NULL;
3874 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3875 h->short_ref_count--;
3884 * @return the removed picture or NULL if an error occurs
3886 static Picture * remove_long(H264Context *h, int i){
3889 pic= h->long_ref[i];
3890 h->long_ref[i]= NULL;
3891 if(pic) h->long_ref_count--;
3897 * print short term list
3899 static void print_short_term(H264Context *h) {
3901 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3902 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3903 for(i=0; i<h->short_ref_count; i++){
3904 Picture *pic= h->short_ref[i];
3905 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3911 * print long term list
3913 static void print_long_term(H264Context *h) {
3915 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3916 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3917 for(i = 0; i < 16; i++){
3918 Picture *pic= h->long_ref[i];
3920 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3927 * Executes the reference picture marking (memory management control operations).
3929 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3930 MpegEncContext * const s = &h->s;
3932 int current_is_long=0;
3935 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3936 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3938 for(i=0; i<mmco_count; i++){
3939 if(s->avctx->debug&FF_DEBUG_MMCO)
3940 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3942 switch(mmco[i].opcode){
3943 case MMCO_SHORT2UNUSED:
3944 pic= remove_short(h, mmco[i].short_frame_num);
3946 unreference_pic(h, pic);
3947 else if(s->avctx->debug&FF_DEBUG_MMCO)
3948 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3950 case MMCO_SHORT2LONG:
3951 pic= remove_long(h, mmco[i].long_index);
3952 if(pic) unreference_pic(h, pic);
3954 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3955 if (h->long_ref[ mmco[i].long_index ]){
3956 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3957 h->long_ref_count++;
3960 case MMCO_LONG2UNUSED:
3961 pic= remove_long(h, mmco[i].long_index);
3963 unreference_pic(h, pic);
3964 else if(s->avctx->debug&FF_DEBUG_MMCO)
3965 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3968 pic= remove_long(h, mmco[i].long_index);
3969 if(pic) unreference_pic(h, pic);
3971 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3972 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3973 h->long_ref_count++;
3977 case MMCO_SET_MAX_LONG:
3978 assert(mmco[i].long_index <= 16);
3979 // just remove the long term which index is greater than new max
3980 for(j = mmco[i].long_index; j<16; j++){
3981 pic = remove_long(h, j);
3982 if (pic) unreference_pic(h, pic);
3986 while(h->short_ref_count){
3987 pic= remove_short(h, h->short_ref[0]->frame_num);
3988 if(pic) unreference_pic(h, pic);
3990 for(j = 0; j < 16; j++) {
3991 pic= remove_long(h, j);
3992 if(pic) unreference_pic(h, pic);
3999 if(!current_is_long){
4000 pic= remove_short(h, s->current_picture_ptr->frame_num);
4002 unreference_pic(h, pic);
4003 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4006 if(h->short_ref_count)
4007 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4009 h->short_ref[0]= s->current_picture_ptr;
4010 h->short_ref[0]->long_ref=0;
4011 h->short_ref_count++;
4014 print_short_term(h);
4019 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
4020 MpegEncContext * const s = &h->s;
4023 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4024 s->broken_link= get_bits1(gb) -1;
4025 h->mmco[0].long_index= get_bits1(gb) - 1; // current_long_term_idx
4026 if(h->mmco[0].long_index == -1)
4029 h->mmco[0].opcode= MMCO_LONG;
4033 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
4034 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4035 MMCOOpcode opcode= get_ue_golomb(gb);
4037 h->mmco[i].opcode= opcode;
4038 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4039 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4040 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4041 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4045 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4046 unsigned int long_index= get_ue_golomb(gb);
4047 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
4048 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4051 h->mmco[i].long_index= long_index;
4054 if(opcode > (unsigned)MMCO_LONG){
4055 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4058 if(opcode == MMCO_END)
4063 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4065 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4066 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4067 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4077 static int init_poc(H264Context *h){
4078 MpegEncContext * const s = &h->s;
4079 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4082 if(h->nal_unit_type == NAL_IDR_SLICE){
4083 h->frame_num_offset= 0;
4085 if(h->frame_num < h->prev_frame_num)
4086 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4088 h->frame_num_offset= h->prev_frame_num_offset;
4091 if(h->sps.poc_type==0){
4092 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4094 if(h->nal_unit_type == NAL_IDR_SLICE){
4099 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4100 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4101 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4102 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4104 h->poc_msb = h->prev_poc_msb;
4105 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4107 field_poc[1] = h->poc_msb + h->poc_lsb;
4108 if(s->picture_structure == PICT_FRAME)
4109 field_poc[1] += h->delta_poc_bottom;
4110 }else if(h->sps.poc_type==1){
4111 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4114 if(h->sps.poc_cycle_length != 0)
4115 abs_frame_num = h->frame_num_offset + h->frame_num;
4119 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4122 expected_delta_per_poc_cycle = 0;
4123 for(i=0; i < h->sps.poc_cycle_length; i++)
4124 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4126 if(abs_frame_num > 0){
4127 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4128 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4130 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4131 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4132 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4136 if(h->nal_ref_idc == 0)
4137 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4139 field_poc[0] = expectedpoc + h->delta_poc[0];
4140 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4142 if(s->picture_structure == PICT_FRAME)
4143 field_poc[1] += h->delta_poc[1];
4146 if(h->nal_unit_type == NAL_IDR_SLICE){
4149 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4150 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4156 if(s->picture_structure != PICT_BOTTOM_FIELD)
4157 s->current_picture_ptr->field_poc[0]= field_poc[0];
4158 if(s->picture_structure != PICT_TOP_FIELD)
4159 s->current_picture_ptr->field_poc[1]= field_poc[1];
4160 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4161 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4168 * initialize scan tables
4170 static void init_scan_tables(H264Context *h){
4171 MpegEncContext * const s = &h->s;
4173 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4174 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4175 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4177 for(i=0; i<16; i++){
4178 #define T(x) (x>>2) | ((x<<2) & 0xF)
4179 h->zigzag_scan[i] = T(zigzag_scan[i]);
4180 h-> field_scan[i] = T( field_scan[i]);
4184 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4185 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4186 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4187 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4188 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4190 for(i=0; i<64; i++){
4191 #define T(x) (x>>3) | ((x&7)<<3)
4192 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4193 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4194 h->field_scan8x8[i] = T(field_scan8x8[i]);
4195 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4199 if(h->sps.transform_bypass){ //FIXME same ugly
4200 h->zigzag_scan_q0 = zigzag_scan;
4201 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4202 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4203 h->field_scan_q0 = field_scan;
4204 h->field_scan8x8_q0 = field_scan8x8;
4205 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4207 h->zigzag_scan_q0 = h->zigzag_scan;
4208 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4209 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4210 h->field_scan_q0 = h->field_scan;
4211 h->field_scan8x8_q0 = h->field_scan8x8;
4212 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4216 * decodes a slice header.
4217 * this will allso call MPV_common_init() and frame_start() as needed
4219 static int decode_slice_header(H264Context *h){
4220 MpegEncContext * const s = &h->s;
4221 unsigned int first_mb_in_slice;
4222 unsigned int pps_id;
4223 int num_ref_idx_active_override_flag;
4224 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4225 unsigned int slice_type, tmp;
4226 int default_ref_list_done = 0;
4228 s->current_picture.reference= h->nal_ref_idc != 0;
4229 s->dropable= h->nal_ref_idc == 0;
4231 first_mb_in_slice= get_ue_golomb(&s->gb);
4233 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
4235 s->current_picture_ptr= NULL;
4238 slice_type= get_ue_golomb(&s->gb);
4240 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4245 h->slice_type_fixed=1;
4247 h->slice_type_fixed=0;
4249 slice_type= slice_type_map[ slice_type ];
4250 if (slice_type == I_TYPE
4251 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4252 default_ref_list_done = 1;
4254 h->slice_type= slice_type;
4256 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4258 pps_id= get_ue_golomb(&s->gb);
4259 if(pps_id>=MAX_PPS_COUNT){
4260 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4263 if(!h->pps_buffers[pps_id]) {
4264 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4267 h->pps= *h->pps_buffers[pps_id];
4269 if(!h->sps_buffers[h->pps.sps_id]) {
4270 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4273 h->sps = *h->sps_buffers[h->pps.sps_id];
4275 if(h->dequant_coeff_pps != pps_id){
4276 h->dequant_coeff_pps = pps_id;
4277 init_dequant_tables(h);
4280 s->mb_width= h->sps.mb_width;
4281 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4283 h->b_stride= s->mb_width*4;
4284 h->b8_stride= s->mb_width*2;
4286 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4287 if(h->sps.frame_mbs_only_flag)
4288 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4290 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4292 if (s->context_initialized
4293 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4297 if (!s->context_initialized) {
4298 if (MPV_common_init(s) < 0)
4301 init_scan_tables(h);
4304 s->avctx->width = s->width;
4305 s->avctx->height = s->height;
4306 s->avctx->sample_aspect_ratio= h->sps.sar;
4307 if(!s->avctx->sample_aspect_ratio.den)
4308 s->avctx->sample_aspect_ratio.den = 1;
4310 if(h->sps.timing_info_present_flag){
4311 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4312 if(h->x264_build > 0 && h->x264_build < 44)
4313 s->avctx->time_base.den *= 2;
4314 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4315 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4319 if(h->slice_num == 0){
4320 if(frame_start(h) < 0)
4324 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4325 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4328 h->mb_aff_frame = 0;
4329 if(h->sps.frame_mbs_only_flag){
4330 s->picture_structure= PICT_FRAME;
4332 if(get_bits1(&s->gb)) { //field_pic_flag
4333 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4334 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4336 s->picture_structure= PICT_FRAME;
4337 h->mb_aff_frame = h->sps.mb_aff;
4340 assert(s->mb_num == s->mb_width * s->mb_height);
4341 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
4342 first_mb_in_slice >= s->mb_num){
4343 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4346 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4347 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4348 assert(s->mb_y < s->mb_height);
4350 if(s->picture_structure==PICT_FRAME){
4351 h->curr_pic_num= h->frame_num;
4352 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4354 h->curr_pic_num= 2*h->frame_num;
4355 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4358 if(h->nal_unit_type == NAL_IDR_SLICE){
4359 get_ue_golomb(&s->gb); /* idr_pic_id */
4362 if(h->sps.poc_type==0){
4363 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4365 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4366 h->delta_poc_bottom= get_se_golomb(&s->gb);
4370 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4371 h->delta_poc[0]= get_se_golomb(&s->gb);
4373 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4374 h->delta_poc[1]= get_se_golomb(&s->gb);
4379 if(h->pps.redundant_pic_cnt_present){
4380 h->redundant_pic_count= get_ue_golomb(&s->gb);
4383 //set defaults, might be overriden a few line later
4384 h->ref_count[0]= h->pps.ref_count[0];
4385 h->ref_count[1]= h->pps.ref_count[1];
4387 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4388 if(h->slice_type == B_TYPE){
4389 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4390 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4391 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4393 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4395 if(num_ref_idx_active_override_flag){
4396 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4397 if(h->slice_type==B_TYPE)
4398 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4400 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4401 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4402 h->ref_count[0]= h->ref_count[1]= 1;
4406 if(h->slice_type == B_TYPE)
4413 if(!default_ref_list_done){
4414 fill_default_ref_list(h);
4417 if(decode_ref_pic_list_reordering(h) < 0)
4420 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4421 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4422 pred_weight_table(h);
4423 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4424 implicit_weight_table(h);
4428 if(s->current_picture.reference)
4429 decode_ref_pic_marking(h, &s->gb);
4432 fill_mbaff_ref_list(h);
4434 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4435 tmp = get_ue_golomb(&s->gb);
4437 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4440 h->cabac_init_idc= tmp;
4443 h->last_qscale_diff = 0;
4444 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4446 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4450 h->chroma_qp = get_chroma_qp(h, s->qscale);
4451 //FIXME qscale / qp ... stuff
4452 if(h->slice_type == SP_TYPE){
4453 get_bits1(&s->gb); /* sp_for_switch_flag */
4455 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4456 get_se_golomb(&s->gb); /* slice_qs_delta */
4459 h->deblocking_filter = 1;
4460 h->slice_alpha_c0_offset = 0;
4461 h->slice_beta_offset = 0;
4462 if( h->pps.deblocking_filter_parameters_present ) {
4463 tmp= get_ue_golomb(&s->gb);
4465 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4468 h->deblocking_filter= tmp;
4469 if(h->deblocking_filter < 2)
4470 h->deblocking_filter^= 1; // 1<->0
4472 if( h->deblocking_filter ) {
4473 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4474 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4477 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4478 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4479 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4480 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4481 h->deblocking_filter= 0;
4484 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4485 slice_group_change_cycle= get_bits(&s->gb, ?);
4490 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4491 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4493 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4494 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4496 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4498 av_get_pict_type_char(h->slice_type),
4499 pps_id, h->frame_num,
4500 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4501 h->ref_count[0], h->ref_count[1],
4503 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4505 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4509 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4510 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4511 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4513 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4514 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4523 static inline int get_level_prefix(GetBitContext *gb){
4527 OPEN_READER(re, gb);
4528 UPDATE_CACHE(re, gb);
4529 buf=GET_CACHE(re, gb);
4531 log= 32 - av_log2(buf);
4533 print_bin(buf>>(32-log), log);
4534 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4537 LAST_SKIP_BITS(re, gb, log);
4538 CLOSE_READER(re, gb);
4543 static inline int get_dct8x8_allowed(H264Context *h){
4546 if(!IS_SUB_8X8(h->sub_mb_type[i])
4547 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4554 * decodes a residual block.
4555 * @param n block index
4556 * @param scantable scantable
4557 * @param max_coeff number of coefficients in the block
4558 * @return <0 if an error occured
4560 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4561 MpegEncContext * const s = &h->s;
4562 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4564 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4566 //FIXME put trailing_onex into the context
4568 if(n == CHROMA_DC_BLOCK_INDEX){
4569 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4570 total_coeff= coeff_token>>2;
4572 if(n == LUMA_DC_BLOCK_INDEX){
4573 total_coeff= pred_non_zero_count(h, 0);
4574 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4575 total_coeff= coeff_token>>2;
4577 total_coeff= pred_non_zero_count(h, n);
4578 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4579 total_coeff= coeff_token>>2;
4580 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4584 //FIXME set last_non_zero?
4588 if(total_coeff > (unsigned)max_coeff) {
4589 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4593 trailing_ones= coeff_token&3;
4594 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4595 assert(total_coeff<=16);
4597 for(i=0; i<trailing_ones; i++){
4598 level[i]= 1 - 2*get_bits1(gb);
4602 int level_code, mask;
4603 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4604 int prefix= get_level_prefix(gb);
4606 //first coefficient has suffix_length equal to 0 or 1
4607 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4609 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4611 level_code= (prefix<<suffix_length); //part
4612 }else if(prefix==14){
4614 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4616 level_code= prefix + get_bits(gb, 4); //part
4617 }else if(prefix==15){
4618 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4619 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4621 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4625 if(trailing_ones < 3) level_code += 2;
4630 mask= -(level_code&1);
4631 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4634 //remaining coefficients have suffix_length > 0
4635 for(;i<total_coeff;i++) {
4636 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4637 prefix = get_level_prefix(gb);
4639 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4640 }else if(prefix==15){
4641 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4643 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4646 mask= -(level_code&1);
4647 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4648 if(level_code > suffix_limit[suffix_length])
4653 if(total_coeff == max_coeff)
4656 if(n == CHROMA_DC_BLOCK_INDEX)
4657 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4659 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4662 coeff_num = zeros_left + total_coeff - 1;
4663 j = scantable[coeff_num];
4665 block[j] = level[0];
4666 for(i=1;i<total_coeff;i++) {
4669 else if(zeros_left < 7){
4670 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4672 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4674 zeros_left -= run_before;
4675 coeff_num -= 1 + run_before;
4676 j= scantable[ coeff_num ];
4681 block[j] = (level[0] * qmul[j] + 32)>>6;
4682 for(i=1;i<total_coeff;i++) {
4685 else if(zeros_left < 7){
4686 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4688 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4690 zeros_left -= run_before;
4691 coeff_num -= 1 + run_before;
4692 j= scantable[ coeff_num ];
4694 block[j]= (level[i] * qmul[j] + 32)>>6;
4699 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4706 static void predict_field_decoding_flag(H264Context *h){
4707 MpegEncContext * const s = &h->s;
4708 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4709 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4710 ? s->current_picture.mb_type[mb_xy-1]
4711 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4712 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4714 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4718 * decodes a P_SKIP or B_SKIP macroblock
4720 static void decode_mb_skip(H264Context *h){
4721 MpegEncContext * const s = &h->s;
4722 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4725 memset(h->non_zero_count[mb_xy], 0, 16);
4726 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4729 mb_type|= MB_TYPE_INTERLACED;
4731 if( h->slice_type == B_TYPE )
4733 // just for fill_caches. pred_direct_motion will set the real mb_type
4734 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4736 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4737 pred_direct_motion(h, &mb_type);
4738 mb_type|= MB_TYPE_SKIP;
4743 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4745 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4746 pred_pskip_motion(h, &mx, &my);
4747 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4748 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4751 write_back_motion(h, mb_type);
4752 s->current_picture.mb_type[mb_xy]= mb_type;
4753 s->current_picture.qscale_table[mb_xy]= s->qscale;
4754 h->slice_table[ mb_xy ]= h->slice_num;
4755 h->prev_mb_skipped= 1;
4759 * decodes a macroblock
4760 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4762 static int decode_mb_cavlc(H264Context *h){
4763 MpegEncContext * const s = &h->s;
4764 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4765 int partition_count;
4766 unsigned int mb_type, cbp;
4767 int dct8x8_allowed= h->pps.transform_8x8_mode;
4769 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4771 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4772 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4774 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4775 if(s->mb_skip_run==-1)
4776 s->mb_skip_run= get_ue_golomb(&s->gb);
4778 if (s->mb_skip_run--) {
4779 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4780 if(s->mb_skip_run==0)
4781 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4783 predict_field_decoding_flag(h);
4790 if( (s->mb_y&1) == 0 )
4791 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4793 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4795 h->prev_mb_skipped= 0;
4797 mb_type= get_ue_golomb(&s->gb);
4798 if(h->slice_type == B_TYPE){
4800 partition_count= b_mb_type_info[mb_type].partition_count;
4801 mb_type= b_mb_type_info[mb_type].type;
4804 goto decode_intra_mb;
4806 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4808 partition_count= p_mb_type_info[mb_type].partition_count;
4809 mb_type= p_mb_type_info[mb_type].type;
4812 goto decode_intra_mb;
4815 assert(h->slice_type == I_TYPE);
4818 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4822 cbp= i_mb_type_info[mb_type].cbp;
4823 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4824 mb_type= i_mb_type_info[mb_type].type;
4828 mb_type |= MB_TYPE_INTERLACED;
4830 h->slice_table[ mb_xy ]= h->slice_num;
4832 if(IS_INTRA_PCM(mb_type)){
4835 // We assume these blocks are very rare so we do not optimize it.
4836 align_get_bits(&s->gb);
4838 // The pixels are stored in the same order as levels in h->mb array.
4839 for(y=0; y<16; y++){
4840 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4841 for(x=0; x<16; x++){
4842 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4843 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4847 const int index= 256 + 4*(y&3) + 32*(y>>2);
4849 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4850 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4854 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4856 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4857 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4861 // In deblocking, the quantizer is 0
4862 s->current_picture.qscale_table[mb_xy]= 0;
4863 h->chroma_qp = get_chroma_qp(h, 0);
4864 // All coeffs are present
4865 memset(h->non_zero_count[mb_xy], 16, 16);
4867 s->current_picture.mb_type[mb_xy]= mb_type;
4872 h->ref_count[0] <<= 1;
4873 h->ref_count[1] <<= 1;
4876 fill_caches(h, mb_type, 0);
4879 if(IS_INTRA(mb_type)){
4881 // init_top_left_availability(h);
4882 if(IS_INTRA4x4(mb_type)){
4885 if(dct8x8_allowed && get_bits1(&s->gb)){
4886 mb_type |= MB_TYPE_8x8DCT;
4890 // fill_intra4x4_pred_table(h);
4891 for(i=0; i<16; i+=di){
4892 int mode= pred_intra_mode(h, i);
4894 if(!get_bits1(&s->gb)){
4895 const int rem_mode= get_bits(&s->gb, 3);
4896 mode = rem_mode + (rem_mode >= mode);
4900 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4902 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4904 write_back_intra_pred_mode(h);
4905 if( check_intra4x4_pred_mode(h) < 0)
4908 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4909 if(h->intra16x16_pred_mode < 0)
4913 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4916 h->chroma_pred_mode= pred_mode;
4917 }else if(partition_count==4){
4918 int i, j, sub_partition_count[4], list, ref[2][4];
4920 if(h->slice_type == B_TYPE){
4922 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4923 if(h->sub_mb_type[i] >=13){
4924 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4927 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4928 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4930 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4931 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4932 pred_direct_motion(h, &mb_type);
4933 h->ref_cache[0][scan8[4]] =
4934 h->ref_cache[1][scan8[4]] =
4935 h->ref_cache[0][scan8[12]] =
4936 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4939 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4941 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4942 if(h->sub_mb_type[i] >=4){
4943 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4946 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4947 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4951 for(list=0; list<h->list_count; list++){
4952 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4954 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4955 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4956 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4958 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4970 dct8x8_allowed = get_dct8x8_allowed(h);
4972 for(list=0; list<h->list_count; list++){
4974 if(IS_DIRECT(h->sub_mb_type[i])) {
4975 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4978 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4979 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4981 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4982 const int sub_mb_type= h->sub_mb_type[i];
4983 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4984 for(j=0; j<sub_partition_count[i]; j++){
4986 const int index= 4*i + block_width*j;
4987 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4988 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4989 mx += get_se_golomb(&s->gb);
4990 my += get_se_golomb(&s->gb);
4991 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4993 if(IS_SUB_8X8(sub_mb_type)){
4995 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4997 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4998 }else if(IS_SUB_8X4(sub_mb_type)){
4999 mv_cache[ 1 ][0]= mx;
5000 mv_cache[ 1 ][1]= my;
5001 }else if(IS_SUB_4X8(sub_mb_type)){
5002 mv_cache[ 8 ][0]= mx;
5003 mv_cache[ 8 ][1]= my;
5005 mv_cache[ 0 ][0]= mx;
5006 mv_cache[ 0 ][1]= my;
5009 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5015 }else if(IS_DIRECT(mb_type)){
5016 pred_direct_motion(h, &mb_type);
5017 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5019 int list, mx, my, i;
5020 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5021 if(IS_16X16(mb_type)){
5022 for(list=0; list<h->list_count; list++){
5024 if(IS_DIR(mb_type, 0, list)){
5025 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5026 if(val >= h->ref_count[list]){
5027 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5031 val= LIST_NOT_USED&0xFF;
5032 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5034 for(list=0; list<h->list_count; list++){
5036 if(IS_DIR(mb_type, 0, list)){
5037 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5038 mx += get_se_golomb(&s->gb);
5039 my += get_se_golomb(&s->gb);
5040 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5042 val= pack16to32(mx,my);
5045 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
5048 else if(IS_16X8(mb_type)){
5049 for(list=0; list<h->list_count; list++){
5052 if(IS_DIR(mb_type, i, list)){
5053 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5054 if(val >= h->ref_count[list]){
5055 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5059 val= LIST_NOT_USED&0xFF;
5060 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5063 for(list=0; list<h->list_count; list++){
5066 if(IS_DIR(mb_type, i, list)){
5067 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5068 mx += get_se_golomb(&s->gb);
5069 my += get_se_golomb(&s->gb);
5070 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5072 val= pack16to32(mx,my);
5075 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
5079 assert(IS_8X16(mb_type));
5080 for(list=0; list<h->list_count; list++){
5083 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5084 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5085 if(val >= h->ref_count[list]){
5086 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5090 val= LIST_NOT_USED&0xFF;
5091 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5094 for(list=0; list<h->list_count; list++){
5097 if(IS_DIR(mb_type, i, list)){
5098 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5099 mx += get_se_golomb(&s->gb);
5100 my += get_se_golomb(&s->gb);
5101 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5103 val= pack16to32(mx,my);
5106 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
5112 if(IS_INTER(mb_type))
5113 write_back_motion(h, mb_type);
5115 if(!IS_INTRA16x16(mb_type)){
5116 cbp= get_ue_golomb(&s->gb);
5118 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
5122 if(IS_INTRA4x4(mb_type))
5123 cbp= golomb_to_intra4x4_cbp[cbp];
5125 cbp= golomb_to_inter_cbp[cbp];
5129 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5130 if(get_bits1(&s->gb))
5131 mb_type |= MB_TYPE_8x8DCT;
5133 s->current_picture.mb_type[mb_xy]= mb_type;
5135 if(cbp || IS_INTRA16x16(mb_type)){
5136 int i8x8, i4x4, chroma_idx;
5137 int chroma_qp, dquant;
5138 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5139 const uint8_t *scan, *scan8x8, *dc_scan;
5141 // fill_non_zero_count_cache(h);
5143 if(IS_INTERLACED(mb_type)){
5144 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5145 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5146 dc_scan= luma_dc_field_scan;
5148 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5149 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5150 dc_scan= luma_dc_zigzag_scan;
5153 dquant= get_se_golomb(&s->gb);
5155 if( dquant > 25 || dquant < -26 ){
5156 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5160 s->qscale += dquant;
5161 if(((unsigned)s->qscale) > 51){
5162 if(s->qscale<0) s->qscale+= 52;
5163 else s->qscale-= 52;
5166 h->chroma_qp= chroma_qp= get_chroma_qp(h, s->qscale);
5167 if(IS_INTRA16x16(mb_type)){
5168 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5169 return -1; //FIXME continue if partitioned and other return -1 too
5172 assert((cbp&15) == 0 || (cbp&15) == 15);
5175 for(i8x8=0; i8x8<4; i8x8++){
5176 for(i4x4=0; i4x4<4; i4x4++){
5177 const int index= i4x4 + 4*i8x8;
5178 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5184 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5187 for(i8x8=0; i8x8<4; i8x8++){
5188 if(cbp & (1<<i8x8)){
5189 if(IS_8x8DCT(mb_type)){
5190 DCTELEM *buf = &h->mb[64*i8x8];
5192 for(i4x4=0; i4x4<4; i4x4++){
5193 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5194 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5197 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5198 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5200 for(i4x4=0; i4x4<4; i4x4++){
5201 const int index= i4x4 + 4*i8x8;
5203 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5209 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5210 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5216 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5217 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5223 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5224 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp];
5225 for(i4x4=0; i4x4<4; i4x4++){
5226 const int index= 16 + 4*chroma_idx + i4x4;
5227 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
5233 uint8_t * const nnz= &h->non_zero_count_cache[0];
5234 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5235 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5238 uint8_t * const nnz= &h->non_zero_count_cache[0];
5239 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5240 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5241 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5243 s->current_picture.qscale_table[mb_xy]= s->qscale;
5244 write_back_non_zero_count(h);
5247 h->ref_count[0] >>= 1;
5248 h->ref_count[1] >>= 1;
5254 static int decode_cabac_field_decoding_flag(H264Context *h) {
5255 MpegEncContext * const s = &h->s;
5256 const int mb_x = s->mb_x;
5257 const int mb_y = s->mb_y & ~1;
5258 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5259 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5261 unsigned int ctx = 0;
5263 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5266 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5270 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5273 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5274 uint8_t *state= &h->cabac_state[ctx_base];
5278 MpegEncContext * const s = &h->s;
5279 const int mba_xy = h->left_mb_xy[0];
5280 const int mbb_xy = h->top_mb_xy;
5282 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5284 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5286 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5287 return 0; /* I4x4 */
5290 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5291 return 0; /* I4x4 */
5294 if( get_cabac_terminate( &h->cabac ) )
5295 return 25; /* PCM */
5297 mb_type = 1; /* I16x16 */
5298 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5299 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5300 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5301 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5302 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5306 static int decode_cabac_mb_type( H264Context *h ) {
5307 MpegEncContext * const s = &h->s;
5309 if( h->slice_type == I_TYPE ) {
5310 return decode_cabac_intra_mb_type(h, 3, 1);
5311 } else if( h->slice_type == P_TYPE ) {
5312 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5314 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5315 /* P_L0_D16x16, P_8x8 */
5316 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5318 /* P_L0_D8x16, P_L0_D16x8 */
5319 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5322 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5324 } else if( h->slice_type == B_TYPE ) {
5325 const int mba_xy = h->left_mb_xy[0];
5326 const int mbb_xy = h->top_mb_xy;
5330 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5332 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5335 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5336 return 0; /* B_Direct_16x16 */
5338 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5339 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5342 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5343 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5344 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5345 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5347 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5348 else if( bits == 13 ) {
5349 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5350 } else if( bits == 14 )
5351 return 11; /* B_L1_L0_8x16 */
5352 else if( bits == 15 )
5353 return 22; /* B_8x8 */
5355 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5356 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5358 /* TODO SI/SP frames? */
5363 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5364 MpegEncContext * const s = &h->s;
5368 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5369 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5372 && h->slice_table[mba_xy] == h->slice_num
5373 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5374 mba_xy += s->mb_stride;
5376 mbb_xy = mb_xy - s->mb_stride;
5378 && h->slice_table[mbb_xy] == h->slice_num
5379 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5380 mbb_xy -= s->mb_stride;
5382 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5384 int mb_xy = mb_x + mb_y*s->mb_stride;
5386 mbb_xy = mb_xy - s->mb_stride;
5389 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5391 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5394 if( h->slice_type == B_TYPE )
5396 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5399 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5402 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5405 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5406 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5407 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5409 if( mode >= pred_mode )
5415 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5416 const int mba_xy = h->left_mb_xy[0];
5417 const int mbb_xy = h->top_mb_xy;
5421 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5422 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5425 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5428 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5431 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5433 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5439 static const uint8_t block_idx_x[16] = {
5440 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5442 static const uint8_t block_idx_y[16] = {
5443 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5445 static const uint8_t block_idx_xy[4][4] = {
5452 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5457 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5459 tprintf(h->s.avctx, "cbp_b = top_cbp = %x\n", cbp_b);
5462 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5467 x = block_idx_x[4*i8x8];
5468 y = block_idx_y[4*i8x8];
5472 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5473 cbp_a = h->left_cbp;
5474 tprintf(h->s.avctx, "cbp_a = left_cbp = %x\n", cbp_a);
5480 /* No need to test for skip as we put 0 for skip block */
5481 /* No need to test for IPCM as we put 1 for IPCM block */
5483 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5484 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5489 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5490 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5494 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5500 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5504 cbp_a = (h->left_cbp>>4)&0x03;
5505 cbp_b = (h-> top_cbp>>4)&0x03;
5508 if( cbp_a > 0 ) ctx++;
5509 if( cbp_b > 0 ) ctx += 2;
5510 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5514 if( cbp_a == 2 ) ctx++;
5515 if( cbp_b == 2 ) ctx += 2;
5516 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5518 static int decode_cabac_mb_dqp( H264Context *h) {
5519 MpegEncContext * const s = &h->s;
5525 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5527 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5529 if( h->last_qscale_diff != 0 )
5532 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5538 if(val > 102) //prevent infinite loop
5545 return -(val + 1)/2;
5547 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5548 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5550 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5552 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5556 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5558 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5559 return 0; /* B_Direct_8x8 */
5560 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5561 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5563 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5564 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5565 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5568 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5569 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5573 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5574 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5577 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5578 int refa = h->ref_cache[list][scan8[n] - 1];
5579 int refb = h->ref_cache[list][scan8[n] - 8];
5583 if( h->slice_type == B_TYPE) {
5584 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5586 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5595 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5601 if(ref >= 32 /*h->ref_list[list]*/){
5602 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5603 return 0; //FIXME we should return -1 and check the return everywhere
5609 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5610 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5611 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5612 int ctxbase = (l == 0) ? 40 : 47;
5617 else if( amvd > 32 )
5622 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5627 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5635 while( get_cabac_bypass( &h->cabac ) ) {
5639 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5644 if( get_cabac_bypass( &h->cabac ) )
5648 return get_cabac_bypass_sign( &h->cabac, -mvd );
5651 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5656 nza = h->left_cbp&0x100;
5657 nzb = h-> top_cbp&0x100;
5658 } else if( cat == 1 || cat == 2 ) {
5659 nza = h->non_zero_count_cache[scan8[idx] - 1];
5660 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5661 } else if( cat == 3 ) {
5662 nza = (h->left_cbp>>(6+idx))&0x01;
5663 nzb = (h-> top_cbp>>(6+idx))&0x01;
5666 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5667 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5676 return ctx + 4 * cat;
5679 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5680 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5681 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5682 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5683 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5686 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5687 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5688 static const int significant_coeff_flag_offset[2][6] = {
5689 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5690 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5692 static const int last_coeff_flag_offset[2][6] = {
5693 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5694 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5696 static const int coeff_abs_level_m1_offset[6] = {
5697 227+0, 227+10, 227+20, 227+30, 227+39, 426
5699 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5700 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5701 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5702 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5703 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5704 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5705 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5706 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5707 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5713 int coeff_count = 0;
5716 int abslevelgt1 = 0;
5718 uint8_t *significant_coeff_ctx_base;
5719 uint8_t *last_coeff_ctx_base;
5720 uint8_t *abs_level_m1_ctx_base;
5723 #define CABAC_ON_STACK
5725 #ifdef CABAC_ON_STACK
5728 cc.range = h->cabac.range;
5729 cc.low = h->cabac.low;
5730 cc.bytestream= h->cabac.bytestream;
5732 #define CC &h->cabac
5736 /* cat: 0-> DC 16x16 n = 0
5737 * 1-> AC 16x16 n = luma4x4idx
5738 * 2-> Luma4x4 n = luma4x4idx
5739 * 3-> DC Chroma n = iCbCr
5740 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5741 * 5-> Luma8x8 n = 4 * luma8x8idx
5744 /* read coded block flag */
5746 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5747 if( cat == 1 || cat == 2 )
5748 h->non_zero_count_cache[scan8[n]] = 0;
5750 h->non_zero_count_cache[scan8[16+n]] = 0;
5751 #ifdef CABAC_ON_STACK
5752 h->cabac.range = cc.range ;
5753 h->cabac.low = cc.low ;
5754 h->cabac.bytestream= cc.bytestream;
5760 significant_coeff_ctx_base = h->cabac_state
5761 + significant_coeff_flag_offset[MB_FIELD][cat];
5762 last_coeff_ctx_base = h->cabac_state
5763 + last_coeff_flag_offset[MB_FIELD][cat];
5764 abs_level_m1_ctx_base = h->cabac_state
5765 + coeff_abs_level_m1_offset[cat];
5768 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5769 for(last= 0; last < coefs; last++) { \
5770 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5771 if( get_cabac( CC, sig_ctx )) { \
5772 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5773 index[coeff_count++] = last; \
5774 if( get_cabac( CC, last_ctx ) ) { \
5780 if( last == max_coeff -1 ) {\
5781 index[coeff_count++] = last;\
5783 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5784 #if defined(ARCH_X86) && defined(CONFIG_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5785 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5787 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5789 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5791 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5794 assert(coeff_count > 0);
5797 h->cbp_table[mb_xy] |= 0x100;
5798 else if( cat == 1 || cat == 2 )
5799 h->non_zero_count_cache[scan8[n]] = coeff_count;
5801 h->cbp_table[mb_xy] |= 0x40 << n;
5803 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5806 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5809 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5810 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5811 int j= scantable[index[coeff_count]];
5813 if( get_cabac( CC, ctx ) == 0 ) {
5815 block[j] = get_cabac_bypass_sign( CC, -1);
5817 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5823 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5824 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5828 if( coeff_abs >= 15 ) {
5830 while( get_cabac_bypass( CC ) ) {
5836 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5842 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5843 else block[j] = coeff_abs;
5845 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5846 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5852 #ifdef CABAC_ON_STACK
5853 h->cabac.range = cc.range ;
5854 h->cabac.low = cc.low ;
5855 h->cabac.bytestream= cc.bytestream;
5860 static inline void compute_mb_neighbors(H264Context *h)
5862 MpegEncContext * const s = &h->s;
5863 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5864 h->top_mb_xy = mb_xy - s->mb_stride;
5865 h->left_mb_xy[0] = mb_xy - 1;
5867 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5868 const int top_pair_xy = pair_xy - s->mb_stride;
5869 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5870 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5871 const int curr_mb_frame_flag = !MB_FIELD;
5872 const int bottom = (s->mb_y & 1);
5874 ? !curr_mb_frame_flag // bottom macroblock
5875 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5877 h->top_mb_xy -= s->mb_stride;
5879 if (left_mb_frame_flag != curr_mb_frame_flag) {
5880 h->left_mb_xy[0] = pair_xy - 1;
5887 * decodes a macroblock
5888 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5890 static int decode_mb_cabac(H264Context *h) {
5891 MpegEncContext * const s = &h->s;
5892 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5893 int mb_type, partition_count, cbp = 0;
5894 int dct8x8_allowed= h->pps.transform_8x8_mode;
5896 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5898 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5899 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5901 /* a skipped mb needs the aff flag from the following mb */
5902 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5903 predict_field_decoding_flag(h);
5904 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5905 skip = h->next_mb_skipped;
5907 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5908 /* read skip flags */
5910 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5911 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5912 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5913 if(h->next_mb_skipped)
5914 predict_field_decoding_flag(h);
5916 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5921 h->cbp_table[mb_xy] = 0;
5922 h->chroma_pred_mode_table[mb_xy] = 0;
5923 h->last_qscale_diff = 0;
5930 if( (s->mb_y&1) == 0 )
5932 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5934 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5936 h->prev_mb_skipped = 0;
5938 compute_mb_neighbors(h);
5939 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5940 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5944 if( h->slice_type == B_TYPE ) {
5946 partition_count= b_mb_type_info[mb_type].partition_count;
5947 mb_type= b_mb_type_info[mb_type].type;
5950 goto decode_intra_mb;
5952 } else if( h->slice_type == P_TYPE ) {
5954 partition_count= p_mb_type_info[mb_type].partition_count;
5955 mb_type= p_mb_type_info[mb_type].type;
5958 goto decode_intra_mb;
5961 assert(h->slice_type == I_TYPE);
5963 partition_count = 0;
5964 cbp= i_mb_type_info[mb_type].cbp;
5965 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5966 mb_type= i_mb_type_info[mb_type].type;
5969 mb_type |= MB_TYPE_INTERLACED;
5971 h->slice_table[ mb_xy ]= h->slice_num;
5973 if(IS_INTRA_PCM(mb_type)) {
5977 // We assume these blocks are very rare so we do not optimize it.
5978 // FIXME The two following lines get the bitstream position in the cabac
5979 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5980 ptr= h->cabac.bytestream;
5981 if(h->cabac.low&0x1) ptr--;
5983 if(h->cabac.low&0x1FF) ptr--;
5986 // The pixels are stored in the same order as levels in h->mb array.
5987 for(y=0; y<16; y++){
5988 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5989 for(x=0; x<16; x++){
5990 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5991 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5995 const int index= 256 + 4*(y&3) + 32*(y>>2);
5997 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5998 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6002 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6004 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6005 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6009 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6011 // All blocks are present
6012 h->cbp_table[mb_xy] = 0x1ef;
6013 h->chroma_pred_mode_table[mb_xy] = 0;
6014 // In deblocking, the quantizer is 0
6015 s->current_picture.qscale_table[mb_xy]= 0;
6016 h->chroma_qp = get_chroma_qp(h, 0);
6017 // All coeffs are present
6018 memset(h->non_zero_count[mb_xy], 16, 16);
6019 s->current_picture.mb_type[mb_xy]= mb_type;
6024 h->ref_count[0] <<= 1;
6025 h->ref_count[1] <<= 1;
6028 fill_caches(h, mb_type, 0);
6030 if( IS_INTRA( mb_type ) ) {
6032 if( IS_INTRA4x4( mb_type ) ) {
6033 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6034 mb_type |= MB_TYPE_8x8DCT;
6035 for( i = 0; i < 16; i+=4 ) {
6036 int pred = pred_intra_mode( h, i );
6037 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6038 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6041 for( i = 0; i < 16; i++ ) {
6042 int pred = pred_intra_mode( h, i );
6043 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6045 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6048 write_back_intra_pred_mode(h);
6049 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6051 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6052 if( h->intra16x16_pred_mode < 0 ) return -1;
6054 h->chroma_pred_mode_table[mb_xy] =
6055 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6057 pred_mode= check_intra_pred_mode( h, pred_mode );
6058 if( pred_mode < 0 ) return -1;
6059 h->chroma_pred_mode= pred_mode;
6060 } else if( partition_count == 4 ) {
6061 int i, j, sub_partition_count[4], list, ref[2][4];
6063 if( h->slice_type == B_TYPE ) {
6064 for( i = 0; i < 4; i++ ) {
6065 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6066 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6067 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6069 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6070 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6071 pred_direct_motion(h, &mb_type);
6072 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6073 for( i = 0; i < 4; i++ )
6074 if( IS_DIRECT(h->sub_mb_type[i]) )
6075 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6079 for( i = 0; i < 4; i++ ) {
6080 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6081 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6082 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6086 for( list = 0; list < h->list_count; list++ ) {
6087 for( i = 0; i < 4; i++ ) {
6088 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6089 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6090 if( h->ref_count[list] > 1 )
6091 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6097 h->ref_cache[list][ scan8[4*i]+1 ]=
6098 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6103 dct8x8_allowed = get_dct8x8_allowed(h);
6105 for(list=0; list<h->list_count; list++){
6107 if(IS_DIRECT(h->sub_mb_type[i])){
6108 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6111 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6113 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6114 const int sub_mb_type= h->sub_mb_type[i];
6115 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6116 for(j=0; j<sub_partition_count[i]; j++){
6119 const int index= 4*i + block_width*j;
6120 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6121 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6122 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6124 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6125 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6126 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6128 if(IS_SUB_8X8(sub_mb_type)){
6130 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6132 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6135 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6137 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6138 }else if(IS_SUB_8X4(sub_mb_type)){
6139 mv_cache[ 1 ][0]= mx;
6140 mv_cache[ 1 ][1]= my;
6142 mvd_cache[ 1 ][0]= mx - mpx;
6143 mvd_cache[ 1 ][1]= my - mpy;
6144 }else if(IS_SUB_4X8(sub_mb_type)){
6145 mv_cache[ 8 ][0]= mx;
6146 mv_cache[ 8 ][1]= my;
6148 mvd_cache[ 8 ][0]= mx - mpx;
6149 mvd_cache[ 8 ][1]= my - mpy;
6151 mv_cache[ 0 ][0]= mx;
6152 mv_cache[ 0 ][1]= my;
6154 mvd_cache[ 0 ][0]= mx - mpx;
6155 mvd_cache[ 0 ][1]= my - mpy;
6158 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6159 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6160 p[0] = p[1] = p[8] = p[9] = 0;
6161 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6165 } else if( IS_DIRECT(mb_type) ) {
6166 pred_direct_motion(h, &mb_type);
6167 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6168 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6169 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6171 int list, mx, my, i, mpx, mpy;
6172 if(IS_16X16(mb_type)){
6173 for(list=0; list<h->list_count; list++){
6174 if(IS_DIR(mb_type, 0, list)){
6175 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6176 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6178 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
6180 for(list=0; list<h->list_count; list++){
6181 if(IS_DIR(mb_type, 0, list)){
6182 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6184 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6185 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6186 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6188 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6189 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6191 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6194 else if(IS_16X8(mb_type)){
6195 for(list=0; list<h->list_count; list++){
6197 if(IS_DIR(mb_type, i, list)){
6198 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6199 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6201 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6204 for(list=0; list<h->list_count; list++){
6206 if(IS_DIR(mb_type, i, list)){
6207 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6208 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6209 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6210 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6212 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6213 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6215 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6216 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6221 assert(IS_8X16(mb_type));
6222 for(list=0; list<h->list_count; list++){
6224 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6225 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6226 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6228 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6231 for(list=0; list<h->list_count; list++){
6233 if(IS_DIR(mb_type, i, list)){
6234 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6235 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6236 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6238 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6239 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6240 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6242 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6243 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6250 if( IS_INTER( mb_type ) ) {
6251 h->chroma_pred_mode_table[mb_xy] = 0;
6252 write_back_motion( h, mb_type );
6255 if( !IS_INTRA16x16( mb_type ) ) {
6256 cbp = decode_cabac_mb_cbp_luma( h );
6257 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6260 h->cbp_table[mb_xy] = h->cbp = cbp;
6262 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6263 if( decode_cabac_mb_transform_size( h ) )
6264 mb_type |= MB_TYPE_8x8DCT;
6266 s->current_picture.mb_type[mb_xy]= mb_type;
6268 if( cbp || IS_INTRA16x16( mb_type ) ) {
6269 const uint8_t *scan, *scan8x8, *dc_scan;
6272 if(IS_INTERLACED(mb_type)){
6273 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6274 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6275 dc_scan= luma_dc_field_scan;
6277 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6278 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6279 dc_scan= luma_dc_zigzag_scan;
6282 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6283 if( dqp == INT_MIN ){
6284 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6288 if(((unsigned)s->qscale) > 51){
6289 if(s->qscale<0) s->qscale+= 52;
6290 else s->qscale-= 52;
6292 h->chroma_qp = get_chroma_qp(h, s->qscale);
6294 if( IS_INTRA16x16( mb_type ) ) {
6296 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6297 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6300 for( i = 0; i < 16; i++ ) {
6301 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6302 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6306 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6310 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6311 if( cbp & (1<<i8x8) ) {
6312 if( IS_8x8DCT(mb_type) ) {
6313 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6314 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6317 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6318 const int index = 4*i8x8 + i4x4;
6319 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6321 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6323 //STOP_TIMER("decode_residual")
6326 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6327 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6334 for( c = 0; c < 2; c++ ) {
6335 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6336 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6343 for( c = 0; c < 2; c++ ) {
6344 const uint32_t *qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp];
6345 for( i = 0; i < 4; i++ ) {
6346 const int index = 16 + 4 * c + i;
6347 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6348 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15) < 0)
6353 uint8_t * const nnz= &h->non_zero_count_cache[0];
6354 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6355 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6358 uint8_t * const nnz= &h->non_zero_count_cache[0];
6359 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6360 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6361 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6362 h->last_qscale_diff = 0;
6365 s->current_picture.qscale_table[mb_xy]= s->qscale;
6366 write_back_non_zero_count(h);
6369 h->ref_count[0] >>= 1;
6370 h->ref_count[1] >>= 1;
6377 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6379 const int index_a = qp + h->slice_alpha_c0_offset;
6380 const int alpha = (alpha_table+52)[index_a];
6381 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6386 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6387 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6389 /* 16px edge length, because bS=4 is triggered by being at
6390 * the edge of an intra MB, so all 4 bS are the same */
6391 for( d = 0; d < 16; d++ ) {
6392 const int p0 = pix[-1];
6393 const int p1 = pix[-2];
6394 const int p2 = pix[-3];
6396 const int q0 = pix[0];
6397 const int q1 = pix[1];
6398 const int q2 = pix[2];
6400 if( FFABS( p0 - q0 ) < alpha &&
6401 FFABS( p1 - p0 ) < beta &&
6402 FFABS( q1 - q0 ) < beta ) {
6404 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6405 if( FFABS( p2 - p0 ) < beta)
6407 const int p3 = pix[-4];
6409 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6410 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6411 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6414 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6416 if( FFABS( q2 - q0 ) < beta)
6418 const int q3 = pix[3];
6420 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6421 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6422 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6425 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6429 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6430 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6432 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6438 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6440 const int index_a = qp + h->slice_alpha_c0_offset;
6441 const int alpha = (alpha_table+52)[index_a];
6442 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6447 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6448 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6450 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6454 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6456 for( i = 0; i < 16; i++, pix += stride) {
6462 int bS_index = (i >> 1);
6465 bS_index |= (i & 1);
6468 if( bS[bS_index] == 0 ) {
6472 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6473 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6474 alpha = (alpha_table+52)[index_a];
6475 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6477 if( bS[bS_index] < 4 ) {
6478 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6479 const int p0 = pix[-1];
6480 const int p1 = pix[-2];
6481 const int p2 = pix[-3];
6482 const int q0 = pix[0];
6483 const int q1 = pix[1];
6484 const int q2 = pix[2];
6486 if( FFABS( p0 - q0 ) < alpha &&
6487 FFABS( p1 - p0 ) < beta &&
6488 FFABS( q1 - q0 ) < beta ) {
6492 if( FFABS( p2 - p0 ) < beta ) {
6493 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6496 if( FFABS( q2 - q0 ) < beta ) {
6497 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6501 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6502 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6503 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6504 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6507 const int p0 = pix[-1];
6508 const int p1 = pix[-2];
6509 const int p2 = pix[-3];
6511 const int q0 = pix[0];
6512 const int q1 = pix[1];
6513 const int q2 = pix[2];
6515 if( FFABS( p0 - q0 ) < alpha &&
6516 FFABS( p1 - p0 ) < beta &&
6517 FFABS( q1 - q0 ) < beta ) {
6519 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6520 if( FFABS( p2 - p0 ) < beta)
6522 const int p3 = pix[-4];
6524 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6525 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6526 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6529 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6531 if( FFABS( q2 - q0 ) < beta)
6533 const int q3 = pix[3];
6535 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6536 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6537 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6540 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6544 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6545 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6547 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6552 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6554 for( i = 0; i < 8; i++, pix += stride) {
6562 if( bS[bS_index] == 0 ) {
6566 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6567 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6568 alpha = (alpha_table+52)[index_a];
6569 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6571 if( bS[bS_index] < 4 ) {
6572 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6573 const int p0 = pix[-1];
6574 const int p1 = pix[-2];
6575 const int q0 = pix[0];
6576 const int q1 = pix[1];
6578 if( FFABS( p0 - q0 ) < alpha &&
6579 FFABS( p1 - p0 ) < beta &&
6580 FFABS( q1 - q0 ) < beta ) {
6581 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6583 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6584 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6585 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6588 const int p0 = pix[-1];
6589 const int p1 = pix[-2];
6590 const int q0 = pix[0];
6591 const int q1 = pix[1];
6593 if( FFABS( p0 - q0 ) < alpha &&
6594 FFABS( p1 - p0 ) < beta &&
6595 FFABS( q1 - q0 ) < beta ) {
6597 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6598 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6599 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6605 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6607 const int index_a = qp + h->slice_alpha_c0_offset;
6608 const int alpha = (alpha_table+52)[index_a];
6609 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6610 const int pix_next = stride;
6615 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6616 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6618 /* 16px edge length, see filter_mb_edgev */
6619 for( d = 0; d < 16; d++ ) {
6620 const int p0 = pix[-1*pix_next];
6621 const int p1 = pix[-2*pix_next];
6622 const int p2 = pix[-3*pix_next];
6623 const int q0 = pix[0];
6624 const int q1 = pix[1*pix_next];
6625 const int q2 = pix[2*pix_next];
6627 if( FFABS( p0 - q0 ) < alpha &&
6628 FFABS( p1 - p0 ) < beta &&
6629 FFABS( q1 - q0 ) < beta ) {
6631 const int p3 = pix[-4*pix_next];
6632 const int q3 = pix[ 3*pix_next];
6634 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6635 if( FFABS( p2 - p0 ) < beta) {
6637 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6638 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6639 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6642 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6644 if( FFABS( q2 - q0 ) < beta) {
6646 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6647 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6648 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6651 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6655 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6656 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6658 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6665 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6667 const int index_a = qp + h->slice_alpha_c0_offset;
6668 const int alpha = (alpha_table+52)[index_a];
6669 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6674 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6675 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6677 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6681 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6682 MpegEncContext * const s = &h->s;
6684 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6686 mb_xy = mb_x + mb_y*s->mb_stride;
6688 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength ||
6689 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6690 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6691 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6694 assert(!FRAME_MBAFF);
6696 mb_type = s->current_picture.mb_type[mb_xy];
6697 qp = s->current_picture.qscale_table[mb_xy];
6698 qp0 = s->current_picture.qscale_table[mb_xy-1];
6699 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6700 qpc = get_chroma_qp( h, qp );
6701 qpc0 = get_chroma_qp( h, qp0 );
6702 qpc1 = get_chroma_qp( h, qp1 );
6703 qp0 = (qp + qp0 + 1) >> 1;
6704 qp1 = (qp + qp1 + 1) >> 1;
6705 qpc0 = (qpc + qpc0 + 1) >> 1;
6706 qpc1 = (qpc + qpc1 + 1) >> 1;
6707 qp_thresh = 15 - h->slice_alpha_c0_offset;
6708 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6709 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6712 if( IS_INTRA(mb_type) ) {
6713 int16_t bS4[4] = {4,4,4,4};
6714 int16_t bS3[4] = {3,3,3,3};
6715 if( IS_8x8DCT(mb_type) ) {
6716 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6717 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6718 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6719 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6721 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6722 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6723 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6724 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6725 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6726 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6727 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6728 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6730 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6731 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6732 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6733 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6734 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6735 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6736 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
6737 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6740 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6741 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6743 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6745 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6747 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6748 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6749 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6750 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6752 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6753 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6754 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6755 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6757 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6758 bSv[0][0] = 0x0004000400040004ULL;
6759 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6760 bSv[1][0] = 0x0004000400040004ULL;
6762 #define FILTER(hv,dir,edge)\
6763 if(bSv[dir][edge]) {\
6764 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6766 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6767 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6773 } else if( IS_8x8DCT(mb_type) ) {
6792 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6793 MpegEncContext * const s = &h->s;
6794 const int mb_xy= mb_x + mb_y*s->mb_stride;
6795 const int mb_type = s->current_picture.mb_type[mb_xy];
6796 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6797 int first_vertical_edge_done = 0;
6799 /* FIXME: A given frame may occupy more than one position in
6800 * the reference list. So ref2frm should be populated with
6801 * frame numbers, not indices. */
6802 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6803 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6805 //for sufficiently low qp, filtering wouldn't do anything
6806 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6808 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
6809 int qp = s->current_picture.qscale_table[mb_xy];
6811 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6812 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6818 // left mb is in picture
6819 && h->slice_table[mb_xy-1] != 255
6820 // and current and left pair do not have the same interlaced type
6821 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6822 // and left mb is in the same slice if deblocking_filter == 2
6823 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6824 /* First vertical edge is different in MBAFF frames
6825 * There are 8 different bS to compute and 2 different Qp
6827 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6828 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6832 int mb_qp, mbn0_qp, mbn1_qp;
6834 first_vertical_edge_done = 1;
6836 if( IS_INTRA(mb_type) )
6837 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6839 for( i = 0; i < 8; i++ ) {
6840 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6842 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6844 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6845 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6846 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6853 mb_qp = s->current_picture.qscale_table[mb_xy];
6854 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6855 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6856 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6857 chroma_qp[0] = ( get_chroma_qp( h, mb_qp ) +
6858 get_chroma_qp( h, mbn0_qp ) + 1 ) >> 1;
6859 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6860 chroma_qp[1] = ( get_chroma_qp( h, mb_qp ) +
6861 get_chroma_qp( h, mbn1_qp ) + 1 ) >> 1;
6864 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6865 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6866 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6867 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6868 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6870 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6871 for( dir = 0; dir < 2; dir++ )
6874 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6875 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6876 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6878 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6879 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6880 // how often to recheck mv-based bS when iterating between edges
6881 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6882 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6883 // how often to recheck mv-based bS when iterating along each edge
6884 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6886 if (first_vertical_edge_done) {
6888 first_vertical_edge_done = 0;
6891 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6894 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6895 && !IS_INTERLACED(mb_type)
6896 && IS_INTERLACED(mbm_type)
6898 // This is a special case in the norm where the filtering must
6899 // be done twice (one each of the field) even if we are in a
6900 // frame macroblock.
6902 static const int nnz_idx[4] = {4,5,6,3};
6903 unsigned int tmp_linesize = 2 * linesize;
6904 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6905 int mbn_xy = mb_xy - 2 * s->mb_stride;
6910 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6911 if( IS_INTRA(mb_type) ||
6912 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6913 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6915 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6916 for( i = 0; i < 4; i++ ) {
6917 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6918 mbn_nnz[nnz_idx[i]] != 0 )
6924 // Do not use s->qscale as luma quantizer because it has not the same
6925 // value in IPCM macroblocks.
6926 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6927 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6928 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6929 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6930 chroma_qp = ( h->chroma_qp +
6931 get_chroma_qp( h, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6932 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6933 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6940 for( edge = start; edge < edges; edge++ ) {
6941 /* mbn_xy: neighbor macroblock */
6942 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6943 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6947 if( (edge&1) && IS_8x8DCT(mb_type) )
6950 if( IS_INTRA(mb_type) ||
6951 IS_INTRA(mbn_type) ) {
6954 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6955 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6964 bS[0] = bS[1] = bS[2] = bS[3] = value;
6969 if( edge & mask_edge ) {
6970 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6973 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6974 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6977 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6978 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6979 int bn_idx= b_idx - (dir ? 8:1);
6981 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6982 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6983 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6984 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6986 bS[0] = bS[1] = bS[2] = bS[3] = v;
6992 for( i = 0; i < 4; i++ ) {
6993 int x = dir == 0 ? edge : i;
6994 int y = dir == 0 ? i : edge;
6995 int b_idx= 8 + 4 + x + 8*y;
6996 int bn_idx= b_idx - (dir ? 8:1);
6998 if( h->non_zero_count_cache[b_idx] != 0 ||
6999 h->non_zero_count_cache[bn_idx] != 0 ) {
7005 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7006 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7007 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7008 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7016 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7021 // Do not use s->qscale as luma quantizer because it has not the same
7022 // value in IPCM macroblocks.
7023 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7024 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7025 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7026 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7028 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7029 if( (edge&1) == 0 ) {
7030 int chroma_qp = ( h->chroma_qp +
7031 get_chroma_qp( h, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7032 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7033 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7036 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7037 if( (edge&1) == 0 ) {
7038 int chroma_qp = ( h->chroma_qp +
7039 get_chroma_qp( h, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7040 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7041 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7048 static int decode_slice(H264Context *h){
7049 MpegEncContext * const s = &h->s;
7050 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7054 if( h->pps.cabac ) {
7058 align_get_bits( &s->gb );
7061 ff_init_cabac_states( &h->cabac);
7062 ff_init_cabac_decoder( &h->cabac,
7063 s->gb.buffer + get_bits_count(&s->gb)/8,
7064 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7065 /* calculate pre-state */
7066 for( i= 0; i < 460; i++ ) {
7068 if( h->slice_type == I_TYPE )
7069 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7071 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7074 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7076 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7081 int ret = decode_mb_cabac(h);
7083 //STOP_TIMER("decode_mb_cabac")
7085 if(ret>=0) hl_decode_mb(h);
7087 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7090 if(ret>=0) ret = decode_mb_cabac(h);
7092 if(ret>=0) hl_decode_mb(h);
7095 eos = get_cabac_terminate( &h->cabac );
7097 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7098 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7099 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7103 if( ++s->mb_x >= s->mb_width ) {
7105 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7112 if( eos || s->mb_y >= s->mb_height ) {
7113 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7114 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7121 int ret = decode_mb_cavlc(h);
7123 if(ret>=0) hl_decode_mb(h);
7125 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7127 ret = decode_mb_cavlc(h);
7129 if(ret>=0) hl_decode_mb(h);
7134 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7135 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7140 if(++s->mb_x >= s->mb_width){
7142 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7147 if(s->mb_y >= s->mb_height){
7148 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7150 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7151 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7155 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7162 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7163 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7164 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7165 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7169 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7178 for(;s->mb_y < s->mb_height; s->mb_y++){
7179 for(;s->mb_x < s->mb_width; s->mb_x++){
7180 int ret= decode_mb(h);
7185 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7186 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7191 if(++s->mb_x >= s->mb_width){
7193 if(++s->mb_y >= s->mb_height){
7194 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7195 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7199 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7206 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7207 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7208 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7212 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7219 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7222 return -1; //not reached
7225 static int decode_unregistered_user_data(H264Context *h, int size){
7226 MpegEncContext * const s = &h->s;
7227 uint8_t user_data[16+256];
7233 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7234 user_data[i]= get_bits(&s->gb, 8);
7238 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7239 if(e==1 && build>=0)
7240 h->x264_build= build;
7242 if(s->avctx->debug & FF_DEBUG_BUGS)
7243 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7246 skip_bits(&s->gb, 8);
7251 static int decode_sei(H264Context *h){
7252 MpegEncContext * const s = &h->s;
7254 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7259 type+= show_bits(&s->gb, 8);
7260 }while(get_bits(&s->gb, 8) == 255);
7264 size+= show_bits(&s->gb, 8);
7265 }while(get_bits(&s->gb, 8) == 255);
7269 if(decode_unregistered_user_data(h, size) < 0)
7273 skip_bits(&s->gb, 8*size);
7276 //FIXME check bits here
7277 align_get_bits(&s->gb);
7283 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7284 MpegEncContext * const s = &h->s;
7286 cpb_count = get_ue_golomb(&s->gb) + 1;
7287 get_bits(&s->gb, 4); /* bit_rate_scale */
7288 get_bits(&s->gb, 4); /* cpb_size_scale */
7289 for(i=0; i<cpb_count; i++){
7290 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7291 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7292 get_bits1(&s->gb); /* cbr_flag */
7294 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7295 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7296 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7297 get_bits(&s->gb, 5); /* time_offset_length */
7300 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7301 MpegEncContext * const s = &h->s;
7302 int aspect_ratio_info_present_flag;
7303 unsigned int aspect_ratio_idc;
7304 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7306 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7308 if( aspect_ratio_info_present_flag ) {
7309 aspect_ratio_idc= get_bits(&s->gb, 8);
7310 if( aspect_ratio_idc == EXTENDED_SAR ) {
7311 sps->sar.num= get_bits(&s->gb, 16);
7312 sps->sar.den= get_bits(&s->gb, 16);
7313 }else if(aspect_ratio_idc < 14){
7314 sps->sar= pixel_aspect[aspect_ratio_idc];
7316 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7323 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7325 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7326 get_bits1(&s->gb); /* overscan_appropriate_flag */
7329 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7330 get_bits(&s->gb, 3); /* video_format */
7331 get_bits1(&s->gb); /* video_full_range_flag */
7332 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7333 get_bits(&s->gb, 8); /* colour_primaries */
7334 get_bits(&s->gb, 8); /* transfer_characteristics */
7335 get_bits(&s->gb, 8); /* matrix_coefficients */
7339 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7340 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7341 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7344 sps->timing_info_present_flag = get_bits1(&s->gb);
7345 if(sps->timing_info_present_flag){
7346 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7347 sps->time_scale = get_bits_long(&s->gb, 32);
7348 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7351 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7352 if(nal_hrd_parameters_present_flag)
7353 decode_hrd_parameters(h, sps);
7354 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7355 if(vcl_hrd_parameters_present_flag)
7356 decode_hrd_parameters(h, sps);
7357 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7358 get_bits1(&s->gb); /* low_delay_hrd_flag */
7359 get_bits1(&s->gb); /* pic_struct_present_flag */
7361 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7362 if(sps->bitstream_restriction_flag){
7363 unsigned int num_reorder_frames;
7364 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7365 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7366 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7367 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7368 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7369 num_reorder_frames= get_ue_golomb(&s->gb);
7370 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7372 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7373 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7377 sps->num_reorder_frames= num_reorder_frames;
7383 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7384 const uint8_t *jvt_list, const uint8_t *fallback_list){
7385 MpegEncContext * const s = &h->s;
7386 int i, last = 8, next = 8;
7387 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7388 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7389 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7391 for(i=0;i<size;i++){
7393 next = (last + get_se_golomb(&s->gb)) & 0xff;
7394 if(!i && !next){ /* matrix not written, we use the preset one */
7395 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7398 last = factors[scan[i]] = next ? next : last;
7402 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7403 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7404 MpegEncContext * const s = &h->s;
7405 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7406 const uint8_t *fallback[4] = {
7407 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7408 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7409 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7410 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7412 if(get_bits1(&s->gb)){
7413 sps->scaling_matrix_present |= is_sps;
7414 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7415 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7416 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7417 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7418 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7419 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7420 if(is_sps || pps->transform_8x8_mode){
7421 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7422 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7424 } else if(fallback_sps) {
7425 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7426 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7431 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7434 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7435 const size_t size, const char *name)
7438 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7443 vec[id] = av_mallocz(size);
7445 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7450 static inline int decode_seq_parameter_set(H264Context *h){
7451 MpegEncContext * const s = &h->s;
7452 int profile_idc, level_idc;
7453 unsigned int sps_id, tmp, mb_width, mb_height;
7457 profile_idc= get_bits(&s->gb, 8);
7458 get_bits1(&s->gb); //constraint_set0_flag
7459 get_bits1(&s->gb); //constraint_set1_flag
7460 get_bits1(&s->gb); //constraint_set2_flag
7461 get_bits1(&s->gb); //constraint_set3_flag
7462 get_bits(&s->gb, 4); // reserved
7463 level_idc= get_bits(&s->gb, 8);
7464 sps_id= get_ue_golomb(&s->gb);
7466 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7470 sps->profile_idc= profile_idc;
7471 sps->level_idc= level_idc;
7473 if(sps->profile_idc >= 100){ //high profile
7474 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7475 get_bits1(&s->gb); //residual_color_transform_flag
7476 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7477 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7478 sps->transform_bypass = get_bits1(&s->gb);
7479 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7481 sps->scaling_matrix_present = 0;
7483 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7484 sps->poc_type= get_ue_golomb(&s->gb);
7486 if(sps->poc_type == 0){ //FIXME #define
7487 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7488 } else if(sps->poc_type == 1){//FIXME #define
7489 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7490 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7491 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7492 tmp= get_ue_golomb(&s->gb);
7494 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7495 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7498 sps->poc_cycle_length= tmp;
7500 for(i=0; i<sps->poc_cycle_length; i++)
7501 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7502 }else if(sps->poc_type != 2){
7503 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7507 tmp= get_ue_golomb(&s->gb);
7508 if(tmp > MAX_PICTURE_COUNT-2){
7509 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7511 sps->ref_frame_count= tmp;
7512 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7513 mb_width= get_ue_golomb(&s->gb) + 1;
7514 mb_height= get_ue_golomb(&s->gb) + 1;
7515 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7516 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7517 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7520 sps->mb_width = mb_width;
7521 sps->mb_height= mb_height;
7523 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7524 if(!sps->frame_mbs_only_flag)
7525 sps->mb_aff= get_bits1(&s->gb);
7529 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7531 #ifndef ALLOW_INTERLACE
7533 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7535 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7536 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7538 sps->crop= get_bits1(&s->gb);
7540 sps->crop_left = get_ue_golomb(&s->gb);
7541 sps->crop_right = get_ue_golomb(&s->gb);
7542 sps->crop_top = get_ue_golomb(&s->gb);
7543 sps->crop_bottom= get_ue_golomb(&s->gb);
7544 if(sps->crop_left || sps->crop_top){
7545 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7551 sps->crop_bottom= 0;
7554 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7555 if( sps->vui_parameters_present_flag )
7556 decode_vui_parameters(h, sps);
7558 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7559 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7560 sps_id, sps->profile_idc, sps->level_idc,
7562 sps->ref_frame_count,
7563 sps->mb_width, sps->mb_height,
7564 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7565 sps->direct_8x8_inference_flag ? "8B8" : "",
7566 sps->crop_left, sps->crop_right,
7567 sps->crop_top, sps->crop_bottom,
7568 sps->vui_parameters_present_flag ? "VUI" : ""
7575 build_qp_table(PPS *pps, int index)
7578 for(i = 0; i < 255; i++)
7579 pps->chroma_qp_table[i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7580 pps->chroma_qp_index_offset = index;
7583 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7584 MpegEncContext * const s = &h->s;
7585 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7588 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7592 tmp= get_ue_golomb(&s->gb);
7593 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7594 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7599 pps->cabac= get_bits1(&s->gb);
7600 pps->pic_order_present= get_bits1(&s->gb);
7601 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7602 if(pps->slice_group_count > 1 ){
7603 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7604 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7605 switch(pps->mb_slice_group_map_type){
7608 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7609 | run_length[ i ] |1 |ue(v) |
7614 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7616 | top_left_mb[ i ] |1 |ue(v) |
7617 | bottom_right_mb[ i ] |1 |ue(v) |
7625 | slice_group_change_direction_flag |1 |u(1) |
7626 | slice_group_change_rate_minus1 |1 |ue(v) |
7631 | slice_group_id_cnt_minus1 |1 |ue(v) |
7632 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7634 | slice_group_id[ i ] |1 |u(v) |
7639 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7640 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7641 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7642 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7643 pps->ref_count[0]= pps->ref_count[1]= 1;
7647 pps->weighted_pred= get_bits1(&s->gb);
7648 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7649 pps->init_qp= get_se_golomb(&s->gb) + 26;
7650 pps->init_qs= get_se_golomb(&s->gb) + 26;
7651 build_qp_table(pps, get_se_golomb(&s->gb));
7652 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7653 pps->constrained_intra_pred= get_bits1(&s->gb);
7654 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7656 pps->transform_8x8_mode= 0;
7657 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7658 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7659 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7661 if(get_bits_count(&s->gb) < bit_length){
7662 pps->transform_8x8_mode= get_bits1(&s->gb);
7663 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7664 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7667 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7668 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7669 pps_id, pps->sps_id,
7670 pps->cabac ? "CABAC" : "CAVLC",
7671 pps->slice_group_count,
7672 pps->ref_count[0], pps->ref_count[1],
7673 pps->weighted_pred ? "weighted" : "",
7674 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7675 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7676 pps->constrained_intra_pred ? "CONSTR" : "",
7677 pps->redundant_pic_cnt_present ? "REDU" : "",
7678 pps->transform_8x8_mode ? "8x8DCT" : ""
7685 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7686 MpegEncContext * const s = &h->s;
7687 AVCodecContext * const avctx= s->avctx;
7691 for(i=0; i<50; i++){
7692 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7695 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7697 s->current_picture_ptr= NULL;
7708 if(buf_index >= buf_size) break;
7710 for(i = 0; i < h->nal_length_size; i++)
7711 nalsize = (nalsize << 8) | buf[buf_index++];
7712 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7717 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7722 // start code prefix search
7723 for(; buf_index + 3 < buf_size; buf_index++){
7724 // This should always succeed in the first iteration.
7725 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7729 if(buf_index+3 >= buf_size) break;
7734 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7735 if (ptr==NULL || dst_length < 0){
7738 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7740 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7742 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7743 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7746 if (h->is_avc && (nalsize != consumed))
7747 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7749 buf_index += consumed;
7751 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7752 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7755 switch(h->nal_unit_type){
7757 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7759 init_get_bits(&s->gb, ptr, bit_length);
7761 h->inter_gb_ptr= &s->gb;
7762 s->data_partitioning = 0;
7764 if(decode_slice_header(h) < 0){
7765 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7768 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
7769 if(h->redundant_pic_count==0 && s->hurry_up < 5
7770 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7771 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7772 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7773 && avctx->skip_frame < AVDISCARD_ALL)
7777 init_get_bits(&s->gb, ptr, bit_length);
7779 h->inter_gb_ptr= NULL;
7780 s->data_partitioning = 1;
7782 if(decode_slice_header(h) < 0){
7783 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7787 init_get_bits(&h->intra_gb, ptr, bit_length);
7788 h->intra_gb_ptr= &h->intra_gb;
7791 init_get_bits(&h->inter_gb, ptr, bit_length);
7792 h->inter_gb_ptr= &h->inter_gb;
7794 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7795 && s->context_initialized
7797 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7798 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7799 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7800 && avctx->skip_frame < AVDISCARD_ALL)
7804 init_get_bits(&s->gb, ptr, bit_length);
7808 init_get_bits(&s->gb, ptr, bit_length);
7809 decode_seq_parameter_set(h);
7811 if(s->flags& CODEC_FLAG_LOW_DELAY)
7814 if(avctx->has_b_frames < 2)
7815 avctx->has_b_frames= !s->low_delay;
7818 init_get_bits(&s->gb, ptr, bit_length);
7820 decode_picture_parameter_set(h, bit_length);
7824 case NAL_END_SEQUENCE:
7825 case NAL_END_STREAM:
7826 case NAL_FILLER_DATA:
7828 case NAL_AUXILIARY_SLICE:
7831 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7839 * returns the number of bytes consumed for building the current frame
7841 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7842 if(s->flags&CODEC_FLAG_TRUNCATED){
7843 pos -= s->parse_context.last_index;
7844 if(pos<0) pos=0; // FIXME remove (unneeded?)
7848 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7849 if(pos+10>buf_size) pos=buf_size; // oops ;)
7855 static int decode_frame(AVCodecContext *avctx,
7856 void *data, int *data_size,
7857 uint8_t *buf, int buf_size)
7859 H264Context *h = avctx->priv_data;
7860 MpegEncContext *s = &h->s;
7861 AVFrame *pict = data;
7864 s->flags= avctx->flags;
7865 s->flags2= avctx->flags2;
7867 /* no supplementary picture */
7868 if (buf_size == 0) {
7872 //FIXME factorize this with the output code below
7873 out = h->delayed_pic[0];
7875 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7876 if(h->delayed_pic[i]->poc < out->poc){
7877 out = h->delayed_pic[i];
7881 for(i=out_idx; h->delayed_pic[i]; i++)
7882 h->delayed_pic[i] = h->delayed_pic[i+1];
7885 *data_size = sizeof(AVFrame);
7886 *pict= *(AVFrame*)out;
7892 if(s->flags&CODEC_FLAG_TRUNCATED){
7893 int next= ff_h264_find_frame_end(h, buf, buf_size);
7895 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7897 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7900 if(h->is_avc && !h->got_avcC) {
7901 int i, cnt, nalsize;
7902 unsigned char *p = avctx->extradata;
7903 if(avctx->extradata_size < 7) {
7904 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7908 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7911 /* sps and pps in the avcC always have length coded with 2 bytes,
7912 so put a fake nal_length_size = 2 while parsing them */
7913 h->nal_length_size = 2;
7914 // Decode sps from avcC
7915 cnt = *(p+5) & 0x1f; // Number of sps
7917 for (i = 0; i < cnt; i++) {
7918 nalsize = AV_RB16(p) + 2;
7919 if(decode_nal_units(h, p, nalsize) < 0) {
7920 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7925 // Decode pps from avcC
7926 cnt = *(p++); // Number of pps
7927 for (i = 0; i < cnt; i++) {
7928 nalsize = AV_RB16(p) + 2;
7929 if(decode_nal_units(h, p, nalsize) != nalsize) {
7930 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7935 // Now store right nal length size, that will be use to parse all other nals
7936 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7937 // Do not reparse avcC
7941 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7942 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7946 buf_index=decode_nal_units(h, buf, buf_size);
7950 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7951 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7955 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7956 Picture *out = s->current_picture_ptr;
7957 Picture *cur = s->current_picture_ptr;
7958 Picture *prev = h->delayed_output_pic;
7959 int i, pics, cross_idr, out_of_order, out_idx;
7963 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7964 s->current_picture_ptr->pict_type= s->pict_type;
7966 h->prev_frame_num_offset= h->frame_num_offset;
7967 h->prev_frame_num= h->frame_num;
7968 if(s->current_picture_ptr->reference){
7969 h->prev_poc_msb= h->poc_msb;
7970 h->prev_poc_lsb= h->poc_lsb;
7972 if(s->current_picture_ptr->reference)
7973 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7979 //FIXME do something with unavailable reference frames
7981 #if 0 //decode order
7982 *data_size = sizeof(AVFrame);
7984 /* Sort B-frames into display order */
7986 if(h->sps.bitstream_restriction_flag
7987 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7988 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7993 while(h->delayed_pic[pics]) pics++;
7995 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7997 h->delayed_pic[pics++] = cur;
7998 if(cur->reference == 0)
8002 for(i=0; h->delayed_pic[i]; i++)
8003 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8006 out = h->delayed_pic[0];
8008 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8009 if(h->delayed_pic[i]->poc < out->poc){
8010 out = h->delayed_pic[i];
8014 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8015 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8017 else if(prev && pics <= s->avctx->has_b_frames)
8019 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8021 ((!cross_idr && prev && out->poc > prev->poc + 2)
8022 || cur->pict_type == B_TYPE)))
8025 s->avctx->has_b_frames++;
8028 else if(out_of_order)
8031 if(out_of_order || pics > s->avctx->has_b_frames){
8032 for(i=out_idx; h->delayed_pic[i]; i++)
8033 h->delayed_pic[i] = h->delayed_pic[i+1];
8039 *data_size = sizeof(AVFrame);
8040 if(prev && prev != out && prev->reference == 1)
8041 prev->reference = 0;
8042 h->delayed_output_pic = out;
8046 *pict= *(AVFrame*)out;
8048 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8051 assert(pict->data[0] || !*data_size);
8052 ff_print_debug_info(s, pict);
8053 //printf("out %d\n", (int)pict->data[0]);
8056 /* Return the Picture timestamp as the frame number */
8057 /* we substract 1 because it is added on utils.c */
8058 avctx->frame_number = s->picture_number - 1;
8060 return get_consumed_bytes(s, buf_index, buf_size);
8063 static inline void fill_mb_avail(H264Context *h){
8064 MpegEncContext * const s = &h->s;
8065 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8068 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8069 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8070 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8076 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8077 h->mb_avail[4]= 1; //FIXME move out
8078 h->mb_avail[5]= 0; //FIXME move out
8084 #define SIZE (COUNT*40)
8090 // int int_temp[10000];
8092 AVCodecContext avctx;
8094 dsputil_init(&dsp, &avctx);
8096 init_put_bits(&pb, temp, SIZE);
8097 printf("testing unsigned exp golomb\n");
8098 for(i=0; i<COUNT; i++){
8100 set_ue_golomb(&pb, i);
8101 STOP_TIMER("set_ue_golomb");
8103 flush_put_bits(&pb);
8105 init_get_bits(&gb, temp, 8*SIZE);
8106 for(i=0; i<COUNT; i++){
8109 s= show_bits(&gb, 24);
8112 j= get_ue_golomb(&gb);
8114 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8117 STOP_TIMER("get_ue_golomb");
8121 init_put_bits(&pb, temp, SIZE);
8122 printf("testing signed exp golomb\n");
8123 for(i=0; i<COUNT; i++){
8125 set_se_golomb(&pb, i - COUNT/2);
8126 STOP_TIMER("set_se_golomb");
8128 flush_put_bits(&pb);
8130 init_get_bits(&gb, temp, 8*SIZE);
8131 for(i=0; i<COUNT; i++){
8134 s= show_bits(&gb, 24);
8137 j= get_se_golomb(&gb);
8138 if(j != i - COUNT/2){
8139 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8142 STOP_TIMER("get_se_golomb");
8145 printf("testing 4x4 (I)DCT\n");
8148 uint8_t src[16], ref[16];
8149 uint64_t error= 0, max_error=0;
8151 for(i=0; i<COUNT; i++){
8153 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8154 for(j=0; j<16; j++){
8155 ref[j]= random()%255;
8156 src[j]= random()%255;
8159 h264_diff_dct_c(block, src, ref, 4);
8162 for(j=0; j<16; j++){
8163 // printf("%d ", block[j]);
8164 block[j]= block[j]*4;
8165 if(j&1) block[j]= (block[j]*4 + 2)/5;
8166 if(j&4) block[j]= (block[j]*4 + 2)/5;
8170 s->dsp.h264_idct_add(ref, block, 4);
8171 /* for(j=0; j<16; j++){
8172 printf("%d ", ref[j]);
8176 for(j=0; j<16; j++){
8177 int diff= FFABS(src[j] - ref[j]);
8180 max_error= FFMAX(max_error, diff);
8183 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8185 printf("testing quantizer\n");
8186 for(qp=0; qp<52; qp++){
8188 src1_block[i]= src2_block[i]= random()%255;
8192 printf("Testing NAL layer\n");
8194 uint8_t bitstream[COUNT];
8195 uint8_t nal[COUNT*2];
8197 memset(&h, 0, sizeof(H264Context));
8199 for(i=0; i<COUNT; i++){
8207 for(j=0; j<COUNT; j++){
8208 bitstream[j]= (random() % 255) + 1;
8211 for(j=0; j<zeros; j++){
8212 int pos= random() % COUNT;
8213 while(bitstream[pos] == 0){
8222 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8224 printf("encoding failed\n");
8228 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8232 if(out_length != COUNT){
8233 printf("incorrect length %d %d\n", out_length, COUNT);
8237 if(consumed != nal_length){
8238 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8242 if(memcmp(bitstream, out, COUNT)){
8243 printf("mismatch\n");
8248 printf("Testing RBSP\n");
8256 static int decode_end(AVCodecContext *avctx)
8258 H264Context *h = avctx->priv_data;
8259 MpegEncContext *s = &h->s;
8261 av_freep(&h->rbsp_buffer[0]);
8262 av_freep(&h->rbsp_buffer[1]);
8263 free_tables(h); //FIXME cleanup init stuff perhaps
8266 // memset(h, 0, sizeof(H264Context));
8272 AVCodec h264_decoder = {
8276 sizeof(H264Context),
8281 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,