2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
573 if(!(h->top_samples_available&0x8000)){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if(!(h->left_samples_available&0x8000)){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
667 // store all luma nnzs, for deblocking
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
785 }else if(top_ref==ref){
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
891 pred_motion(h, 0, 4, 0, 0, mx, my);
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
984 *mb_type |= MB_TYPE_DIRECT2;
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1005 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1007 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1013 if(ref[0] < 0 && ref[1] < 0){
1014 ref[0] = ref[1] = 0;
1015 mv[0][0] = mv[0][1] =
1016 mv[1][0] = mv[1][1] = 0;
1018 for(list=0; list<2; list++){
1020 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1022 mv[list][0] = mv[list][1] = 0;
1028 *mb_type &= ~MB_TYPE_L1;
1029 sub_mb_type &= ~MB_TYPE_L1;
1030 }else if(ref[0] < 0){
1032 *mb_type &= ~MB_TYPE_L0;
1033 sub_mb_type &= ~MB_TYPE_L0;
1036 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1037 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1038 int mb_types_col[2];
1039 int b8_stride = h->b8_stride;
1040 int b4_stride = h->b_stride;
1042 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1044 if(IS_INTERLACED(*mb_type)){
1045 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1046 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1048 l1ref0 -= 2*b8_stride;
1049 l1ref1 -= 2*b8_stride;
1050 l1mv0 -= 4*b4_stride;
1051 l1mv1 -= 4*b4_stride;
1056 int cur_poc = s->current_picture_ptr->poc;
1057 int *col_poc = h->ref_list[1]->field_poc;
1058 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1059 int dy = 2*col_parity - (s->mb_y&1);
1061 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1062 l1ref0 += dy*b8_stride;
1063 l1ref1 += dy*b8_stride;
1064 l1mv0 += 2*dy*b4_stride;
1065 l1mv1 += 2*dy*b4_stride;
1069 for(i8=0; i8<4; i8++){
1072 int xy8 = x8+y8*b8_stride;
1073 int xy4 = 3*x8+y8*b4_stride;
1076 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1078 h->sub_mb_type[i8] = sub_mb_type;
1080 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1081 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1082 if(!IS_INTRA(mb_types_col[y8])
1083 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1084 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1086 a= pack16to32(mv[0][0],mv[0][1]);
1088 b= pack16to32(mv[1][0],mv[1][1]);
1090 a= pack16to32(mv[0][0],mv[0][1]);
1091 b= pack16to32(mv[1][0],mv[1][1]);
1093 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1094 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1096 }else if(IS_16X16(*mb_type)){
1099 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1100 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1101 if(!IS_INTRA(mb_type_col)
1102 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1103 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1104 && (h->x264_build>33 || !h->x264_build)))){
1106 a= pack16to32(mv[0][0],mv[0][1]);
1108 b= pack16to32(mv[1][0],mv[1][1]);
1110 a= pack16to32(mv[0][0],mv[0][1]);
1111 b= pack16to32(mv[1][0],mv[1][1]);
1113 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1114 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1116 for(i8=0; i8<4; i8++){
1117 const int x8 = i8&1;
1118 const int y8 = i8>>1;
1120 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1122 h->sub_mb_type[i8] = sub_mb_type;
1124 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1126 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1127 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1130 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1131 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1132 && (h->x264_build>33 || !h->x264_build)))){
1133 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1134 if(IS_SUB_8X8(sub_mb_type)){
1135 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1136 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1138 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1140 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1143 for(i4=0; i4<4; i4++){
1144 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1145 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1147 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1149 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1155 }else{ /* direct temporal mv pred */
1156 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1157 const int *dist_scale_factor = h->dist_scale_factor;
1160 if(IS_INTERLACED(*mb_type)){
1161 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1162 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1163 dist_scale_factor = h->dist_scale_factor_field;
1165 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1166 /* FIXME assumes direct_8x8_inference == 1 */
1167 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1168 int mb_types_col[2];
1171 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1172 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1173 | (*mb_type & MB_TYPE_INTERLACED);
1174 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1176 if(IS_INTERLACED(*mb_type)){
1177 /* frame to field scaling */
1178 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1179 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1181 l1ref0 -= 2*h->b8_stride;
1182 l1ref1 -= 2*h->b8_stride;
1183 l1mv0 -= 4*h->b_stride;
1184 l1mv1 -= 4*h->b_stride;
1188 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1189 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1191 *mb_type |= MB_TYPE_16x8;
1193 *mb_type |= MB_TYPE_8x8;
1195 /* field to frame scaling */
1196 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1197 * but in MBAFF, top and bottom POC are equal */
1198 int dy = (s->mb_y&1) ? 1 : 2;
1200 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1201 l1ref0 += dy*h->b8_stride;
1202 l1ref1 += dy*h->b8_stride;
1203 l1mv0 += 2*dy*h->b_stride;
1204 l1mv1 += 2*dy*h->b_stride;
1207 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1209 *mb_type |= MB_TYPE_16x16;
1211 *mb_type |= MB_TYPE_8x8;
1214 for(i8=0; i8<4; i8++){
1215 const int x8 = i8&1;
1216 const int y8 = i8>>1;
1218 const int16_t (*l1mv)[2]= l1mv0;
1220 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1222 h->sub_mb_type[i8] = sub_mb_type;
1224 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1225 if(IS_INTRA(mb_types_col[y8])){
1226 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1227 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1232 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1234 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1236 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1239 scale = dist_scale_factor[ref0];
1240 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1243 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1244 int my_col = (mv_col[1]<<y_shift)/2;
1245 int mx = (scale * mv_col[0] + 128) >> 8;
1246 int my = (scale * my_col + 128) >> 8;
1247 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1248 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1255 /* one-to-one mv scaling */
1257 if(IS_16X16(*mb_type)){
1260 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1261 if(IS_INTRA(mb_type_col)){
1264 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1265 : map_col_to_list0[1][l1ref1[0]];
1266 const int scale = dist_scale_factor[ref0];
1267 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1269 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1270 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1272 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1273 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1275 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1276 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1277 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1279 for(i8=0; i8<4; i8++){
1280 const int x8 = i8&1;
1281 const int y8 = i8>>1;
1283 const int16_t (*l1mv)[2]= l1mv0;
1285 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1287 h->sub_mb_type[i8] = sub_mb_type;
1288 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1289 if(IS_INTRA(mb_type_col)){
1290 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1291 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1296 ref0 = l1ref0[x8 + y8*h->b8_stride];
1298 ref0 = map_col_to_list0[0][ref0];
1300 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1303 scale = dist_scale_factor[ref0];
1305 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1306 if(IS_SUB_8X8(sub_mb_type)){
1307 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1308 int mx = (scale * mv_col[0] + 128) >> 8;
1309 int my = (scale * mv_col[1] + 128) >> 8;
1310 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1311 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1313 for(i4=0; i4<4; i4++){
1314 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1315 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1316 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1317 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1318 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1319 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1326 static inline void write_back_motion(H264Context *h, int mb_type){
1327 MpegEncContext * const s = &h->s;
1328 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1329 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1332 if(!USES_LIST(mb_type, 0))
1333 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1335 for(list=0; list<h->list_count; list++){
1337 if(!USES_LIST(mb_type, list))
1341 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1344 if( h->pps.cabac ) {
1345 if(IS_SKIP(mb_type))
1346 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1349 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1350 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1355 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1356 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1357 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1358 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1359 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1363 if(h->slice_type == FF_B_TYPE && h->pps.cabac){
1364 if(IS_8X8(mb_type)){
1365 uint8_t *direct_table = &h->direct_table[b8_xy];
1366 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1367 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1368 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1374 * Decodes a network abstraction layer unit.
1375 * @param consumed is the number of bytes used as input
1376 * @param length is the length of the array
1377 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1378 * @returns decoded bytes, might be src+1 if no escapes
1380 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1385 // src[0]&0x80; //forbidden bit
1386 h->nal_ref_idc= src[0]>>5;
1387 h->nal_unit_type= src[0]&0x1F;
1391 for(i=0; i<length; i++)
1392 printf("%2X ", src[i]);
1394 for(i=0; i+1<length; i+=2){
1395 if(src[i]) continue;
1396 if(i>0 && src[i-1]==0) i--;
1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1399 /* startcode, so we must be past the end */
1406 if(i>=length-1){ //no escaped 0
1407 *dst_length= length;
1408 *consumed= length+1; //+1 for the header
1412 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1413 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1414 dst= h->rbsp_buffer[bufidx];
1420 //printf("decoding esc\n");
1423 //remove escapes (very rare 1:2^22)
1424 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1425 if(src[si+2]==3){ //escape
1430 }else //next start code
1434 dst[di++]= src[si++];
1438 *consumed= si + 1;//+1 for the header
1439 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1444 * identifies the exact end of the bitstream
1445 * @return the length of the trailing, or 0 if damaged
1447 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1451 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1461 * idct tranforms the 16 dc values and dequantize them.
1462 * @param qp quantization parameter
1464 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1467 int temp[16]; //FIXME check if this is a good idea
1468 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1469 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1471 //memset(block, 64, 2*256);
1474 const int offset= y_offset[i];
1475 const int z0= block[offset+stride*0] + block[offset+stride*4];
1476 const int z1= block[offset+stride*0] - block[offset+stride*4];
1477 const int z2= block[offset+stride*1] - block[offset+stride*5];
1478 const int z3= block[offset+stride*1] + block[offset+stride*5];
1487 const int offset= x_offset[i];
1488 const int z0= temp[4*0+i] + temp[4*2+i];
1489 const int z1= temp[4*0+i] - temp[4*2+i];
1490 const int z2= temp[4*1+i] - temp[4*3+i];
1491 const int z3= temp[4*1+i] + temp[4*3+i];
1493 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1494 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1495 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1496 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1502 * dct tranforms the 16 dc values.
1503 * @param qp quantization parameter ??? FIXME
1505 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1506 // const int qmul= dequant_coeff[qp][0];
1508 int temp[16]; //FIXME check if this is a good idea
1509 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1510 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1513 const int offset= y_offset[i];
1514 const int z0= block[offset+stride*0] + block[offset+stride*4];
1515 const int z1= block[offset+stride*0] - block[offset+stride*4];
1516 const int z2= block[offset+stride*1] - block[offset+stride*5];
1517 const int z3= block[offset+stride*1] + block[offset+stride*5];
1526 const int offset= x_offset[i];
1527 const int z0= temp[4*0+i] + temp[4*2+i];
1528 const int z1= temp[4*0+i] - temp[4*2+i];
1529 const int z2= temp[4*1+i] - temp[4*3+i];
1530 const int z3= temp[4*1+i] + temp[4*3+i];
1532 block[stride*0 +offset]= (z0 + z3)>>1;
1533 block[stride*2 +offset]= (z1 + z2)>>1;
1534 block[stride*8 +offset]= (z1 - z2)>>1;
1535 block[stride*10+offset]= (z0 - z3)>>1;
1543 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1544 const int stride= 16*2;
1545 const int xStride= 16;
1548 a= block[stride*0 + xStride*0];
1549 b= block[stride*0 + xStride*1];
1550 c= block[stride*1 + xStride*0];
1551 d= block[stride*1 + xStride*1];
1558 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1559 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1560 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1561 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1565 static void chroma_dc_dct_c(DCTELEM *block){
1566 const int stride= 16*2;
1567 const int xStride= 16;
1570 a= block[stride*0 + xStride*0];
1571 b= block[stride*0 + xStride*1];
1572 c= block[stride*1 + xStride*0];
1573 d= block[stride*1 + xStride*1];
1580 block[stride*0 + xStride*0]= (a+c);
1581 block[stride*0 + xStride*1]= (e+b);
1582 block[stride*1 + xStride*0]= (a-c);
1583 block[stride*1 + xStride*1]= (e-b);
1588 * gets the chroma qp.
1590 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1591 return h->pps.chroma_qp_table[t][qscale & 0xff];
1594 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1595 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1596 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1598 const int * const quant_table= quant_coeff[qscale];
1599 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1600 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1601 const unsigned int threshold2= (threshold1<<1);
1607 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1608 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1609 const unsigned int dc_threshold2= (dc_threshold1<<1);
1611 int level= block[0]*quant_coeff[qscale+18][0];
1612 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1614 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1617 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1620 // last_non_zero = i;
1625 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1626 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1627 const unsigned int dc_threshold2= (dc_threshold1<<1);
1629 int level= block[0]*quant_table[0];
1630 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1632 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1635 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1638 // last_non_zero = i;
1651 const int j= scantable[i];
1652 int level= block[j]*quant_table[j];
1654 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1655 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1656 if(((unsigned)(level+threshold1))>threshold2){
1658 level= (bias + level)>>QUANT_SHIFT;
1661 level= (bias - level)>>QUANT_SHIFT;
1670 return last_non_zero;
1673 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1674 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1675 int src_x_offset, int src_y_offset,
1676 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1677 MpegEncContext * const s = &h->s;
1678 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1679 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1680 const int luma_xy= (mx&3) + ((my&3)<<2);
1681 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1682 uint8_t * src_cb, * src_cr;
1683 int extra_width= h->emu_edge_width;
1684 int extra_height= h->emu_edge_height;
1686 const int full_mx= mx>>2;
1687 const int full_my= my>>2;
1688 const int pic_width = 16*s->mb_width;
1689 const int pic_height = 16*s->mb_height >> MB_FIELD;
1691 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1694 if(mx&7) extra_width -= 3;
1695 if(my&7) extra_height -= 3;
1697 if( full_mx < 0-extra_width
1698 || full_my < 0-extra_height
1699 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1700 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1701 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1702 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1706 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1708 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1711 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1714 // chroma offset when predicting from a field of opposite parity
1715 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1716 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1718 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1719 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1722 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1723 src_cb= s->edge_emu_buffer;
1725 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1728 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1729 src_cr= s->edge_emu_buffer;
1731 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1734 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1735 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1736 int x_offset, int y_offset,
1737 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1738 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1739 int list0, int list1){
1740 MpegEncContext * const s = &h->s;
1741 qpel_mc_func *qpix_op= qpix_put;
1742 h264_chroma_mc_func chroma_op= chroma_put;
1744 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1745 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1746 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1747 x_offset += 8*s->mb_x;
1748 y_offset += 8*(s->mb_y >> MB_FIELD);
1751 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1752 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1753 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1754 qpix_op, chroma_op);
1757 chroma_op= chroma_avg;
1761 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1762 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1763 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1764 qpix_op, chroma_op);
1768 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1769 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1770 int x_offset, int y_offset,
1771 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1772 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1773 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1774 int list0, int list1){
1775 MpegEncContext * const s = &h->s;
1777 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1778 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1779 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1780 x_offset += 8*s->mb_x;
1781 y_offset += 8*(s->mb_y >> MB_FIELD);
1784 /* don't optimize for luma-only case, since B-frames usually
1785 * use implicit weights => chroma too. */
1786 uint8_t *tmp_cb = s->obmc_scratchpad;
1787 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1788 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1789 int refn0 = h->ref_cache[0][ scan8[n] ];
1790 int refn1 = h->ref_cache[1][ scan8[n] ];
1792 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1793 dest_y, dest_cb, dest_cr,
1794 x_offset, y_offset, qpix_put, chroma_put);
1795 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1796 tmp_y, tmp_cb, tmp_cr,
1797 x_offset, y_offset, qpix_put, chroma_put);
1799 if(h->use_weight == 2){
1800 int weight0 = h->implicit_weight[refn0][refn1];
1801 int weight1 = 64 - weight0;
1802 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1803 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1804 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1806 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1807 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1808 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1809 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1810 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1811 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1812 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1813 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1814 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1817 int list = list1 ? 1 : 0;
1818 int refn = h->ref_cache[list][ scan8[n] ];
1819 Picture *ref= &h->ref_list[list][refn];
1820 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1821 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1822 qpix_put, chroma_put);
1824 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1825 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1826 if(h->use_weight_chroma){
1827 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1828 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1829 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1835 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1836 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1837 int x_offset, int y_offset,
1838 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1839 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1840 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1841 int list0, int list1){
1842 if((h->use_weight==2 && list0 && list1
1843 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1844 || h->use_weight==1)
1845 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put,
1847 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1849 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1850 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1853 static inline void prefetch_motion(H264Context *h, int list){
1854 /* fetch pixels for estimated mv 4 macroblocks ahead
1855 * optimized for 64byte cache lines */
1856 MpegEncContext * const s = &h->s;
1857 const int refn = h->ref_cache[list][scan8[0]];
1859 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1860 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1861 uint8_t **src= h->ref_list[list][refn].data;
1862 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1863 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1864 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1865 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1869 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1870 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1871 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1872 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1873 MpegEncContext * const s = &h->s;
1874 const int mb_xy= h->mb_xy;
1875 const int mb_type= s->current_picture.mb_type[mb_xy];
1877 assert(IS_INTER(mb_type));
1879 prefetch_motion(h, 0);
1881 if(IS_16X16(mb_type)){
1882 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1883 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1884 &weight_op[0], &weight_avg[0],
1885 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1886 }else if(IS_16X8(mb_type)){
1887 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1891 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1892 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1893 &weight_op[1], &weight_avg[1],
1894 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1895 }else if(IS_8X16(mb_type)){
1896 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1900 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1901 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1902 &weight_op[2], &weight_avg[2],
1903 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1907 assert(IS_8X8(mb_type));
1910 const int sub_mb_type= h->sub_mb_type[i];
1912 int x_offset= (i&1)<<2;
1913 int y_offset= (i&2)<<1;
1915 if(IS_SUB_8X8(sub_mb_type)){
1916 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1917 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1918 &weight_op[3], &weight_avg[3],
1919 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1920 }else if(IS_SUB_8X4(sub_mb_type)){
1921 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1926 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1927 &weight_op[4], &weight_avg[4],
1928 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1929 }else if(IS_SUB_4X8(sub_mb_type)){
1930 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1934 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1935 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1936 &weight_op[5], &weight_avg[5],
1937 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1940 assert(IS_SUB_4X4(sub_mb_type));
1942 int sub_x_offset= x_offset + 2*(j&1);
1943 int sub_y_offset= y_offset + (j&2);
1944 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1945 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1946 &weight_op[6], &weight_avg[6],
1947 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1953 prefetch_motion(h, 1);
1956 static av_cold void decode_init_vlc(void){
1957 static int done = 0;
1963 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1964 &chroma_dc_coeff_token_len [0], 1, 1,
1965 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1968 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1969 &coeff_token_len [i][0], 1, 1,
1970 &coeff_token_bits[i][0], 1, 1, 1);
1974 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1975 &chroma_dc_total_zeros_len [i][0], 1, 1,
1976 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1978 for(i=0; i<15; i++){
1979 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1980 &total_zeros_len [i][0], 1, 1,
1981 &total_zeros_bits[i][0], 1, 1, 1);
1985 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1986 &run_len [i][0], 1, 1,
1987 &run_bits[i][0], 1, 1, 1);
1989 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1990 &run_len [6][0], 1, 1,
1991 &run_bits[6][0], 1, 1, 1);
1995 static void free_tables(H264Context *h){
1998 av_freep(&h->intra4x4_pred_mode);
1999 av_freep(&h->chroma_pred_mode_table);
2000 av_freep(&h->cbp_table);
2001 av_freep(&h->mvd_table[0]);
2002 av_freep(&h->mvd_table[1]);
2003 av_freep(&h->direct_table);
2004 av_freep(&h->non_zero_count);
2005 av_freep(&h->slice_table_base);
2006 h->slice_table= NULL;
2008 av_freep(&h->mb2b_xy);
2009 av_freep(&h->mb2b8_xy);
2011 for(i = 0; i < MAX_SPS_COUNT; i++)
2012 av_freep(h->sps_buffers + i);
2014 for(i = 0; i < MAX_PPS_COUNT; i++)
2015 av_freep(h->pps_buffers + i);
2017 for(i = 0; i < h->s.avctx->thread_count; i++) {
2018 hx = h->thread_context[i];
2020 av_freep(&hx->top_borders[1]);
2021 av_freep(&hx->top_borders[0]);
2022 av_freep(&hx->s.obmc_scratchpad);
2026 static void init_dequant8_coeff_table(H264Context *h){
2028 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2029 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2030 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2032 for(i=0; i<2; i++ ){
2033 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2034 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2038 for(q=0; q<52; q++){
2039 int shift = ff_div6[q];
2040 int idx = ff_rem6[q];
2042 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2043 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2044 h->pps.scaling_matrix8[i][x]) << shift;
2049 static void init_dequant4_coeff_table(H264Context *h){
2051 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2052 for(i=0; i<6; i++ ){
2053 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2055 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2056 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2063 for(q=0; q<52; q++){
2064 int shift = ff_div6[q] + 2;
2065 int idx = ff_rem6[q];
2067 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2068 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2069 h->pps.scaling_matrix4[i][x]) << shift;
2074 static void init_dequant_tables(H264Context *h){
2076 init_dequant4_coeff_table(h);
2077 if(h->pps.transform_8x8_mode)
2078 init_dequant8_coeff_table(h);
2079 if(h->sps.transform_bypass){
2082 h->dequant4_coeff[i][0][x] = 1<<6;
2083 if(h->pps.transform_8x8_mode)
2086 h->dequant8_coeff[i][0][x] = 1<<6;
2093 * needs width/height
2095 static int alloc_tables(H264Context *h){
2096 MpegEncContext * const s = &h->s;
2097 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2100 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2102 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2106 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2107 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2108 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2109 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2111 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2112 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2114 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2115 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2116 for(y=0; y<s->mb_height; y++){
2117 for(x=0; x<s->mb_width; x++){
2118 const int mb_xy= x + y*s->mb_stride;
2119 const int b_xy = 4*x + 4*y*h->b_stride;
2120 const int b8_xy= 2*x + 2*y*h->b8_stride;
2122 h->mb2b_xy [mb_xy]= b_xy;
2123 h->mb2b8_xy[mb_xy]= b8_xy;
2127 s->obmc_scratchpad = NULL;
2129 if(!h->dequant4_coeff[0])
2130 init_dequant_tables(h);
2139 * Mimic alloc_tables(), but for every context thread.
2141 static void clone_tables(H264Context *dst, H264Context *src){
2142 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2143 dst->non_zero_count = src->non_zero_count;
2144 dst->slice_table = src->slice_table;
2145 dst->cbp_table = src->cbp_table;
2146 dst->mb2b_xy = src->mb2b_xy;
2147 dst->mb2b8_xy = src->mb2b8_xy;
2148 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2149 dst->mvd_table[0] = src->mvd_table[0];
2150 dst->mvd_table[1] = src->mvd_table[1];
2151 dst->direct_table = src->direct_table;
2153 dst->s.obmc_scratchpad = NULL;
2154 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2159 * Allocate buffers which are not shared amongst multiple threads.
2161 static int context_init(H264Context *h){
2162 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2167 return -1; // free_tables will clean up for us
2170 static av_cold void common_init(H264Context *h){
2171 MpegEncContext * const s = &h->s;
2173 s->width = s->avctx->width;
2174 s->height = s->avctx->height;
2175 s->codec_id= s->avctx->codec->id;
2177 ff_h264_pred_init(&h->hpc, s->codec_id);
2179 h->dequant_coeff_pps= -1;
2180 s->unrestricted_mv=1;
2181 s->decode=1; //FIXME
2183 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2184 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2187 static av_cold int decode_init(AVCodecContext *avctx){
2188 H264Context *h= avctx->priv_data;
2189 MpegEncContext * const s = &h->s;
2191 MPV_decode_defaults(s);
2196 s->out_format = FMT_H264;
2197 s->workaround_bugs= avctx->workaround_bugs;
2200 // s->decode_mb= ff_h263_decode_mb;
2201 s->quarter_sample = 1;
2204 if(avctx->codec_id == CODEC_ID_SVQ3)
2205 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2207 avctx->pix_fmt= PIX_FMT_YUV420P;
2211 if(avctx->extradata_size > 0 && avctx->extradata &&
2212 *(char *)avctx->extradata == 1){
2219 h->thread_context[0] = h;
2223 static int frame_start(H264Context *h){
2224 MpegEncContext * const s = &h->s;
2227 if(MPV_frame_start(s, s->avctx) < 0)
2229 ff_er_frame_start(s);
2231 * MPV_frame_start uses pict_type to derive key_frame.
2232 * This is incorrect for H.264; IDR markings must be used.
2233 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2234 * See decode_nal_units().
2236 s->current_picture_ptr->key_frame= 0;
2238 assert(s->linesize && s->uvlinesize);
2240 for(i=0; i<16; i++){
2241 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2242 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2245 h->block_offset[16+i]=
2246 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 h->block_offset[24+16+i]=
2248 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2251 /* can't be in alloc_tables because linesize isn't known there.
2252 * FIXME: redo bipred weight to not require extra buffer? */
2253 for(i = 0; i < s->avctx->thread_count; i++)
2254 if(!h->thread_context[i]->s.obmc_scratchpad)
2255 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2257 /* some macroblocks will be accessed before they're available */
2258 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2259 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2261 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2265 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2266 MpegEncContext * const s = &h->s;
2270 src_cb -= uvlinesize;
2271 src_cr -= uvlinesize;
2273 // There are two lines saved, the line above the the top macroblock of a pair,
2274 // and the line above the bottom macroblock
2275 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2276 for(i=1; i<17; i++){
2277 h->left_border[i]= src_y[15+i* linesize];
2280 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2281 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2283 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2284 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2285 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2287 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2288 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2290 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2295 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2296 MpegEncContext * const s = &h->s;
2303 if(h->deblocking_filter == 2) {
2305 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2306 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2308 deblock_left = (s->mb_x > 0);
2309 deblock_top = (s->mb_y > 0);
2312 src_y -= linesize + 1;
2313 src_cb -= uvlinesize + 1;
2314 src_cr -= uvlinesize + 1;
2316 #define XCHG(a,b,t,xchg)\
2323 for(i = !deblock_top; i<17; i++){
2324 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2329 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2330 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2331 if(s->mb_x+1 < s->mb_width){
2332 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2336 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2338 for(i = !deblock_top; i<9; i++){
2339 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2340 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2344 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2345 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2350 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2351 MpegEncContext * const s = &h->s;
2354 src_y -= 2 * linesize;
2355 src_cb -= 2 * uvlinesize;
2356 src_cr -= 2 * uvlinesize;
2358 // There are two lines saved, the line above the the top macroblock of a pair,
2359 // and the line above the bottom macroblock
2360 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2361 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2362 for(i=2; i<34; i++){
2363 h->left_border[i]= src_y[15+i* linesize];
2366 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2367 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2368 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2369 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2371 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2372 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2373 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2374 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2375 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2376 for(i=2; i<18; i++){
2377 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2378 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2380 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2381 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2382 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2383 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2387 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2388 MpegEncContext * const s = &h->s;
2391 int deblock_left = (s->mb_x > 0);
2392 int deblock_top = (s->mb_y > 1);
2394 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2396 src_y -= 2 * linesize + 1;
2397 src_cb -= 2 * uvlinesize + 1;
2398 src_cr -= 2 * uvlinesize + 1;
2400 #define XCHG(a,b,t,xchg)\
2407 for(i = (!deblock_top)<<1; i<34; i++){
2408 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2413 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2414 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2415 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2416 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2417 if(s->mb_x+1 < s->mb_width){
2418 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2419 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2423 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2425 for(i = (!deblock_top) << 1; i<18; i++){
2426 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2427 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2431 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2432 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2433 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2434 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2439 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2440 MpegEncContext * const s = &h->s;
2441 const int mb_x= s->mb_x;
2442 const int mb_y= s->mb_y;
2443 const int mb_xy= h->mb_xy;
2444 const int mb_type= s->current_picture.mb_type[mb_xy];
2445 uint8_t *dest_y, *dest_cb, *dest_cr;
2446 int linesize, uvlinesize /*dct_offset*/;
2448 int *block_offset = &h->block_offset[0];
2449 const unsigned int bottom = mb_y & 1;
2450 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2451 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2452 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2454 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2455 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2456 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2458 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2459 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2461 if (!simple && MB_FIELD) {
2462 linesize = h->mb_linesize = s->linesize * 2;
2463 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2464 block_offset = &h->block_offset[24];
2465 if(mb_y&1){ //FIXME move out of this func?
2466 dest_y -= s->linesize*15;
2467 dest_cb-= s->uvlinesize*7;
2468 dest_cr-= s->uvlinesize*7;
2472 for(list=0; list<h->list_count; list++){
2473 if(!USES_LIST(mb_type, list))
2475 if(IS_16X16(mb_type)){
2476 int8_t *ref = &h->ref_cache[list][scan8[0]];
2477 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2479 for(i=0; i<16; i+=4){
2480 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2481 int ref = h->ref_cache[list][scan8[i]];
2483 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2489 linesize = h->mb_linesize = s->linesize;
2490 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2491 // dct_offset = s->linesize * 16;
2494 if(transform_bypass){
2496 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2497 }else if(IS_8x8DCT(mb_type)){
2498 idct_dc_add = s->dsp.h264_idct8_dc_add;
2499 idct_add = s->dsp.h264_idct8_add;
2501 idct_dc_add = s->dsp.h264_idct_dc_add;
2502 idct_add = s->dsp.h264_idct_add;
2505 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2506 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2507 int mbt_y = mb_y&~1;
2508 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2509 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2510 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2511 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2514 if (!simple && IS_INTRA_PCM(mb_type)) {
2517 // The pixels are stored in h->mb array in the same order as levels,
2518 // copy them in output in the correct order.
2519 for(i=0; i<16; i++) {
2520 for (y=0; y<4; y++) {
2521 for (x=0; x<4; x++) {
2522 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2526 for(i=16; i<16+4; i++) {
2527 for (y=0; y<4; y++) {
2528 for (x=0; x<4; x++) {
2529 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2533 for(i=20; i<20+4; i++) {
2534 for (y=0; y<4; y++) {
2535 for (x=0; x<4; x++) {
2536 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2541 if(IS_INTRA(mb_type)){
2542 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2543 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2545 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2546 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2547 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2550 if(IS_INTRA4x4(mb_type)){
2551 if(simple || !s->encoding){
2552 if(IS_8x8DCT(mb_type)){
2553 for(i=0; i<16; i+=4){
2554 uint8_t * const ptr= dest_y + block_offset[i];
2555 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2556 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2557 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2558 (h->topright_samples_available<<i)&0x4000, linesize);
2560 if(nnz == 1 && h->mb[i*16])
2561 idct_dc_add(ptr, h->mb + i*16, linesize);
2563 idct_add(ptr, h->mb + i*16, linesize);
2567 for(i=0; i<16; i++){
2568 uint8_t * const ptr= dest_y + block_offset[i];
2570 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2573 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2574 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2575 assert(mb_y || linesize <= block_offset[i]);
2576 if(!topright_avail){
2577 tr= ptr[3 - linesize]*0x01010101;
2578 topright= (uint8_t*) &tr;
2580 topright= ptr + 4 - linesize;
2584 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2585 nnz = h->non_zero_count_cache[ scan8[i] ];
2588 if(nnz == 1 && h->mb[i*16])
2589 idct_dc_add(ptr, h->mb + i*16, linesize);
2591 idct_add(ptr, h->mb + i*16, linesize);
2593 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2598 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2600 if(!transform_bypass)
2601 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2603 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2605 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2606 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2608 hl_motion(h, dest_y, dest_cb, dest_cr,
2609 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2610 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2611 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2615 if(!IS_INTRA4x4(mb_type)){
2617 if(IS_INTRA16x16(mb_type)){
2618 for(i=0; i<16; i++){
2619 if(h->non_zero_count_cache[ scan8[i] ])
2620 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2621 else if(h->mb[i*16])
2622 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2625 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2626 for(i=0; i<16; i+=di){
2627 int nnz = h->non_zero_count_cache[ scan8[i] ];
2629 if(nnz==1 && h->mb[i*16])
2630 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2632 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2637 for(i=0; i<16; i++){
2638 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2639 uint8_t * const ptr= dest_y + block_offset[i];
2640 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2646 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2647 uint8_t *dest[2] = {dest_cb, dest_cr};
2648 if(transform_bypass){
2649 idct_add = idct_dc_add = s->dsp.add_pixels4;
2651 idct_add = s->dsp.h264_idct_add;
2652 idct_dc_add = s->dsp.h264_idct_dc_add;
2653 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2654 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2657 for(i=16; i<16+8; i++){
2658 if(h->non_zero_count_cache[ scan8[i] ])
2659 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2660 else if(h->mb[i*16])
2661 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2664 for(i=16; i<16+8; i++){
2665 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2666 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2667 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2673 if(h->deblocking_filter) {
2674 if (!simple && FRAME_MBAFF) {
2675 //FIXME try deblocking one mb at a time?
2676 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2677 const int mb_y = s->mb_y - 1;
2678 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2679 const int mb_xy= mb_x + mb_y*s->mb_stride;
2680 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2681 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2682 if (!bottom) return;
2683 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2684 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2685 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2687 if(IS_INTRA(mb_type_top | mb_type_bottom))
2688 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2690 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2693 s->mb_y--; h->mb_xy -= s->mb_stride;
2694 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2695 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2696 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2697 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2698 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2700 s->mb_y++; h->mb_xy += s->mb_stride;
2701 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2702 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2703 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2704 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2705 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2707 tprintf(h->s.avctx, "call filter_mb\n");
2708 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2709 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2710 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2716 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2718 static void hl_decode_mb_simple(H264Context *h){
2719 hl_decode_mb_internal(h, 1);
2723 * Process a macroblock; this handles edge cases, such as interlacing.
2725 static void av_noinline hl_decode_mb_complex(H264Context *h){
2726 hl_decode_mb_internal(h, 0);
2729 static void hl_decode_mb(H264Context *h){
2730 MpegEncContext * const s = &h->s;
2731 const int mb_xy= h->mb_xy;
2732 const int mb_type= s->current_picture.mb_type[mb_xy];
2733 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2734 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2736 if(ENABLE_H264_ENCODER && !s->decode)
2740 hl_decode_mb_complex(h);
2741 else hl_decode_mb_simple(h);
2744 static void pic_as_field(Picture *pic, const int parity){
2746 for (i = 0; i < 4; ++i) {
2747 if (parity == PICT_BOTTOM_FIELD)
2748 pic->data[i] += pic->linesize[i];
2749 pic->reference = parity;
2750 pic->linesize[i] *= 2;
2754 static int split_field_copy(Picture *dest, Picture *src,
2755 int parity, int id_add){
2756 int match = !!(src->reference & parity);
2760 pic_as_field(dest, parity);
2762 dest->pic_id += id_add;
2769 * Split one reference list into field parts, interleaving by parity
2770 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2771 * set to look at the actual start of data for that field.
2773 * @param dest output list
2774 * @param dest_len maximum number of fields to put in dest
2775 * @param src the source reference list containing fields and/or field pairs
2776 * (aka short_ref/long_ref, or
2777 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2778 * @param src_len number of Picture's in source (pairs and unmatched fields)
2779 * @param parity the parity of the picture being decoded/needing
2780 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2781 * @return number of fields placed in dest
2783 static int split_field_half_ref_list(Picture *dest, int dest_len,
2784 Picture *src, int src_len, int parity){
2785 int same_parity = 1;
2791 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2792 if (same_parity && same_i < src_len) {
2793 field_output = split_field_copy(dest + out_i, src + same_i,
2795 same_parity = !field_output;
2798 } else if (opp_i < src_len) {
2799 field_output = split_field_copy(dest + out_i, src + opp_i,
2800 PICT_FRAME - parity, 0);
2801 same_parity = field_output;
2813 * Split the reference frame list into a reference field list.
2814 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2815 * The input list contains both reference field pairs and
2816 * unmatched reference fields; it is ordered as spec describes
2817 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2818 * unmatched field pairs are also present. Conceptually this is equivalent
2819 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2821 * @param dest output reference list where ordered fields are to be placed
2822 * @param dest_len max number of fields to place at dest
2823 * @param src source reference list, as described above
2824 * @param src_len number of pictures (pairs and unmatched fields) in src
2825 * @param parity parity of field being currently decoded
2826 * (one of PICT_{TOP,BOTTOM}_FIELD)
2827 * @param long_i index into src array that holds first long reference picture,
2828 * or src_len if no long refs present.
2830 static int split_field_ref_list(Picture *dest, int dest_len,
2831 Picture *src, int src_len,
2832 int parity, int long_i){
2834 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2838 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2839 src_len - long_i, parity);
2844 * fills the default_ref_list.
2846 static int fill_default_ref_list(H264Context *h){
2847 MpegEncContext * const s = &h->s;
2849 int smallest_poc_greater_than_current = -1;
2851 Picture sorted_short_ref[32];
2852 Picture field_entry_list[2][32];
2853 Picture *frame_list[2];
2855 if (FIELD_PICTURE) {
2856 structure_sel = PICT_FRAME;
2857 frame_list[0] = field_entry_list[0];
2858 frame_list[1] = field_entry_list[1];
2861 frame_list[0] = h->default_ref_list[0];
2862 frame_list[1] = h->default_ref_list[1];
2865 if(h->slice_type==FF_B_TYPE){
2872 /* sort frame according to poc in B slice */
2873 for(out_i=0; out_i<h->short_ref_count; out_i++){
2875 int best_poc=INT_MAX;
2877 for(i=0; i<h->short_ref_count; i++){
2878 const int poc= h->short_ref[i]->poc;
2879 if(poc > limit && poc < best_poc){
2885 assert(best_i != INT_MIN);
2888 sorted_short_ref[out_i]= *h->short_ref[best_i];
2889 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2890 if (-1 == smallest_poc_greater_than_current) {
2891 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2892 smallest_poc_greater_than_current = out_i;
2897 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2899 // find the largest poc
2900 for(list=0; list<2; list++){
2903 int step= list ? -1 : 1;
2905 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2907 while(j<0 || j>= h->short_ref_count){
2908 if(j != -99 && step == (list ? -1 : 1))
2911 j= smallest_poc_greater_than_current + (step>>1);
2913 sel = sorted_short_ref[j].reference | structure_sel;
2914 if(sel != PICT_FRAME) continue;
2915 frame_list[list][index ]= sorted_short_ref[j];
2916 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2918 short_len[list] = index;
2920 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2922 if(h->long_ref[i] == NULL) continue;
2923 sel = h->long_ref[i]->reference | structure_sel;
2924 if(sel != PICT_FRAME) continue;
2926 frame_list[ list ][index ]= *h->long_ref[i];
2927 frame_list[ list ][index++].pic_id= i;
2932 for(list=0; list<2; list++){
2934 len[list] = split_field_ref_list(h->default_ref_list[list],
2938 s->picture_structure,
2941 // swap the two first elements of L1 when L0 and L1 are identical
2942 if(list && len[0] > 1 && len[0] == len[1])
2943 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2945 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2949 if(len[list] < h->ref_count[ list ])
2950 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2957 for(i=0; i<h->short_ref_count; i++){
2959 sel = h->short_ref[i]->reference | structure_sel;
2960 if(sel != PICT_FRAME) continue;
2961 frame_list[0][index ]= *h->short_ref[i];
2962 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2965 for(i = 0; i < 16; i++){
2967 if(h->long_ref[i] == NULL) continue;
2968 sel = h->long_ref[i]->reference | structure_sel;
2969 if(sel != PICT_FRAME) continue;
2970 frame_list[0][index ]= *h->long_ref[i];
2971 frame_list[0][index++].pic_id= i;
2975 index = split_field_ref_list(h->default_ref_list[0],
2976 h->ref_count[0], frame_list[0],
2977 index, s->picture_structure,
2980 if(index < h->ref_count[0])
2981 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2984 for (i=0; i<h->ref_count[0]; i++) {
2985 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2987 if(h->slice_type==FF_B_TYPE){
2988 for (i=0; i<h->ref_count[1]; i++) {
2989 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2996 static void print_short_term(H264Context *h);
2997 static void print_long_term(H264Context *h);
3000 * Extract structure information about the picture described by pic_num in
3001 * the current decoding context (frame or field). Note that pic_num is
3002 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3003 * @param pic_num picture number for which to extract structure information
3004 * @param structure one of PICT_XXX describing structure of picture
3006 * @return frame number (short term) or long term index of picture
3007 * described by pic_num
3009 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3010 MpegEncContext * const s = &h->s;
3012 *structure = s->picture_structure;
3015 /* opposite field */
3016 *structure ^= PICT_FRAME;
3023 static int decode_ref_pic_list_reordering(H264Context *h){
3024 MpegEncContext * const s = &h->s;
3025 int list, index, pic_structure;
3027 print_short_term(h);
3029 if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
3031 for(list=0; list<h->list_count; list++){
3032 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3034 if(get_bits1(&s->gb)){
3035 int pred= h->curr_pic_num;
3037 for(index=0; ; index++){
3038 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3039 unsigned int pic_id;
3041 Picture *ref = NULL;
3043 if(reordering_of_pic_nums_idc==3)
3046 if(index >= h->ref_count[list]){
3047 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3051 if(reordering_of_pic_nums_idc<3){
3052 if(reordering_of_pic_nums_idc<2){
3053 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3056 if(abs_diff_pic_num > h->max_pic_num){
3057 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3061 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3062 else pred+= abs_diff_pic_num;
3063 pred &= h->max_pic_num - 1;
3065 frame_num = pic_num_extract(h, pred, &pic_structure);
3067 for(i= h->short_ref_count-1; i>=0; i--){
3068 ref = h->short_ref[i];
3069 assert(ref->reference);
3070 assert(!ref->long_ref);
3071 if(ref->data[0] != NULL &&
3072 ref->frame_num == frame_num &&
3073 (ref->reference & pic_structure) &&
3074 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3081 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3083 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3086 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3089 ref = h->long_ref[long_idx];
3090 assert(!(ref && !ref->reference));
3091 if(ref && (ref->reference & pic_structure)){
3092 ref->pic_id= pic_id;
3093 assert(ref->long_ref);
3101 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3102 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3104 for(i=index; i+1<h->ref_count[list]; i++){
3105 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3108 for(; i > index; i--){
3109 h->ref_list[list][i]= h->ref_list[list][i-1];
3111 h->ref_list[list][index]= *ref;
3113 pic_as_field(&h->ref_list[list][index], pic_structure);
3117 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3123 for(list=0; list<h->list_count; list++){
3124 for(index= 0; index < h->ref_count[list]; index++){
3125 if(!h->ref_list[list][index].data[0])
3126 h->ref_list[list][index]= s->current_picture;
3130 if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
3131 direct_dist_scale_factor(h);
3132 direct_ref_list_init(h);
3136 static void fill_mbaff_ref_list(H264Context *h){
3138 for(list=0; list<2; list++){ //FIXME try list_count
3139 for(i=0; i<h->ref_count[list]; i++){
3140 Picture *frame = &h->ref_list[list][i];
3141 Picture *field = &h->ref_list[list][16+2*i];
3144 field[0].linesize[j] <<= 1;
3145 field[0].reference = PICT_TOP_FIELD;
3146 field[1] = field[0];
3148 field[1].data[j] += frame->linesize[j];
3149 field[1].reference = PICT_BOTTOM_FIELD;
3151 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3152 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3154 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3155 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3159 for(j=0; j<h->ref_count[1]; j++){
3160 for(i=0; i<h->ref_count[0]; i++)
3161 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3162 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3163 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3167 static int pred_weight_table(H264Context *h){
3168 MpegEncContext * const s = &h->s;
3170 int luma_def, chroma_def;
3173 h->use_weight_chroma= 0;
3174 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3175 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3176 luma_def = 1<<h->luma_log2_weight_denom;
3177 chroma_def = 1<<h->chroma_log2_weight_denom;
3179 for(list=0; list<2; list++){
3180 for(i=0; i<h->ref_count[list]; i++){
3181 int luma_weight_flag, chroma_weight_flag;
3183 luma_weight_flag= get_bits1(&s->gb);
3184 if(luma_weight_flag){
3185 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3186 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3187 if( h->luma_weight[list][i] != luma_def
3188 || h->luma_offset[list][i] != 0)
3191 h->luma_weight[list][i]= luma_def;
3192 h->luma_offset[list][i]= 0;
3195 chroma_weight_flag= get_bits1(&s->gb);
3196 if(chroma_weight_flag){
3199 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3200 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3201 if( h->chroma_weight[list][i][j] != chroma_def
3202 || h->chroma_offset[list][i][j] != 0)
3203 h->use_weight_chroma= 1;
3208 h->chroma_weight[list][i][j]= chroma_def;
3209 h->chroma_offset[list][i][j]= 0;
3213 if(h->slice_type != FF_B_TYPE) break;
3215 h->use_weight= h->use_weight || h->use_weight_chroma;
3219 static void implicit_weight_table(H264Context *h){
3220 MpegEncContext * const s = &h->s;
3222 int cur_poc = s->current_picture_ptr->poc;
3224 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3225 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3227 h->use_weight_chroma= 0;
3232 h->use_weight_chroma= 2;
3233 h->luma_log2_weight_denom= 5;
3234 h->chroma_log2_weight_denom= 5;
3236 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3237 int poc0 = h->ref_list[0][ref0].poc;
3238 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3239 int poc1 = h->ref_list[1][ref1].poc;
3240 int td = av_clip(poc1 - poc0, -128, 127);
3242 int tb = av_clip(cur_poc - poc0, -128, 127);
3243 int tx = (16384 + (FFABS(td) >> 1)) / td;
3244 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3245 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3246 h->implicit_weight[ref0][ref1] = 32;
3248 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3250 h->implicit_weight[ref0][ref1] = 32;
3256 * Mark a picture as no longer needed for reference. The refmask
3257 * argument allows unreferencing of individual fields or the whole frame.
3258 * If the picture becomes entirely unreferenced, but is being held for
3259 * display purposes, it is marked as such.
3260 * @param refmask mask of fields to unreference; the mask is bitwise
3261 * anded with the reference marking of pic
3262 * @return non-zero if pic becomes entirely unreferenced (except possibly
3263 * for display purposes) zero if one of the fields remains in
3266 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3268 if (pic->reference &= refmask) {
3271 if(pic == h->delayed_output_pic)
3272 pic->reference=DELAYED_PIC_REF;
3274 for(i = 0; h->delayed_pic[i]; i++)
3275 if(pic == h->delayed_pic[i]){
3276 pic->reference=DELAYED_PIC_REF;
3285 * instantaneous decoder refresh.
3287 static void idr(H264Context *h){
3290 for(i=0; i<16; i++){
3291 if (h->long_ref[i] != NULL) {
3292 unreference_pic(h, h->long_ref[i], 0);
3293 h->long_ref[i]= NULL;
3296 h->long_ref_count=0;
3298 for(i=0; i<h->short_ref_count; i++){
3299 unreference_pic(h, h->short_ref[i], 0);
3300 h->short_ref[i]= NULL;
3302 h->short_ref_count=0;
3305 /* forget old pics after a seek */
3306 static void flush_dpb(AVCodecContext *avctx){
3307 H264Context *h= avctx->priv_data;
3309 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3310 if(h->delayed_pic[i])
3311 h->delayed_pic[i]->reference= 0;
3312 h->delayed_pic[i]= NULL;
3314 if(h->delayed_output_pic)
3315 h->delayed_output_pic->reference= 0;
3316 h->delayed_output_pic= NULL;
3318 if(h->s.current_picture_ptr)
3319 h->s.current_picture_ptr->reference= 0;
3320 h->s.first_field= 0;
3321 ff_mpeg_flush(avctx);
3325 * Find a Picture in the short term reference list by frame number.
3326 * @param frame_num frame number to search for
3327 * @param idx the index into h->short_ref where returned picture is found
3328 * undefined if no picture found.
3329 * @return pointer to the found picture, or NULL if no pic with the provided
3330 * frame number is found
3332 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3333 MpegEncContext * const s = &h->s;
3336 for(i=0; i<h->short_ref_count; i++){
3337 Picture *pic= h->short_ref[i];
3338 if(s->avctx->debug&FF_DEBUG_MMCO)
3339 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3340 if(pic->frame_num == frame_num) {
3349 * Remove a picture from the short term reference list by its index in
3350 * that list. This does no checking on the provided index; it is assumed
3351 * to be valid. Other list entries are shifted down.
3352 * @param i index into h->short_ref of picture to remove.
3354 static void remove_short_at_index(H264Context *h, int i){
3355 assert(i >= 0 && i < h->short_ref_count);
3356 h->short_ref[i]= NULL;
3357 if (--h->short_ref_count)
3358 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3363 * @return the removed picture or NULL if an error occurs
3365 static Picture * remove_short(H264Context *h, int frame_num){
3366 MpegEncContext * const s = &h->s;
3370 if(s->avctx->debug&FF_DEBUG_MMCO)
3371 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3373 pic = find_short(h, frame_num, &i);
3375 remove_short_at_index(h, i);
3381 * Remove a picture from the long term reference list by its index in
3382 * that list. This does no checking on the provided index; it is assumed
3383 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3384 * @param i index into h->long_ref of picture to remove.
3386 static void remove_long_at_index(H264Context *h, int i){
3387 h->long_ref[i]= NULL;
3388 h->long_ref_count--;
3393 * @return the removed picture or NULL if an error occurs
3395 static Picture * remove_long(H264Context *h, int i){
3398 pic= h->long_ref[i];
3400 remove_long_at_index(h, i);
3406 * print short term list
3408 static void print_short_term(H264Context *h) {
3410 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3411 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3412 for(i=0; i<h->short_ref_count; i++){
3413 Picture *pic= h->short_ref[i];
3414 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3420 * print long term list
3422 static void print_long_term(H264Context *h) {
3424 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3425 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3426 for(i = 0; i < 16; i++){
3427 Picture *pic= h->long_ref[i];
3429 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3436 * Executes the reference picture marking (memory management control operations).
3438 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3439 MpegEncContext * const s = &h->s;
3441 int current_ref_assigned=0;
3444 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3445 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3447 for(i=0; i<mmco_count; i++){
3448 int structure, frame_num, unref_pic;
3449 if(s->avctx->debug&FF_DEBUG_MMCO)
3450 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3452 switch(mmco[i].opcode){
3453 case MMCO_SHORT2UNUSED:
3454 if(s->avctx->debug&FF_DEBUG_MMCO)
3455 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3456 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3457 pic = find_short(h, frame_num, &j);
3459 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3460 remove_short_at_index(h, j);
3461 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3462 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3464 case MMCO_SHORT2LONG:
3465 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3466 h->long_ref[mmco[i].long_arg]->frame_num ==
3467 mmco[i].short_pic_num / 2) {
3468 /* do nothing, we've already moved this field pair. */
3470 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3472 pic= remove_long(h, mmco[i].long_arg);
3473 if(pic) unreference_pic(h, pic, 0);
3475 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3476 if (h->long_ref[ mmco[i].long_arg ]){
3477 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3478 h->long_ref_count++;
3482 case MMCO_LONG2UNUSED:
3483 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3484 pic = h->long_ref[j];
3486 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3487 remove_long_at_index(h, j);
3488 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3489 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3493 if (FIELD_PICTURE && !s->first_field) {
3494 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3495 /* Just mark second field as referenced */
3497 } else if (s->current_picture_ptr->reference) {
3498 /* First field in pair is in short term list or
3499 * at a different long term index.
3500 * This is not allowed; see 7.4.3, notes 2 and 3.
3501 * Report the problem and keep the pair where it is,
3502 * and mark this field valid.
3504 av_log(h->s.avctx, AV_LOG_ERROR,
3505 "illegal long term reference assignment for second "
3506 "field in complementary field pair (first field is "
3507 "short term or has non-matching long index)\n");
3513 pic= remove_long(h, mmco[i].long_arg);
3514 if(pic) unreference_pic(h, pic, 0);
3516 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3517 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3518 h->long_ref_count++;
3521 s->current_picture_ptr->reference |= s->picture_structure;
3522 current_ref_assigned=1;
3524 case MMCO_SET_MAX_LONG:
3525 assert(mmco[i].long_arg <= 16);
3526 // just remove the long term which index is greater than new max
3527 for(j = mmco[i].long_arg; j<16; j++){
3528 pic = remove_long(h, j);
3529 if (pic) unreference_pic(h, pic, 0);
3533 while(h->short_ref_count){
3534 pic= remove_short(h, h->short_ref[0]->frame_num);
3535 if(pic) unreference_pic(h, pic, 0);
3537 for(j = 0; j < 16; j++) {
3538 pic= remove_long(h, j);
3539 if(pic) unreference_pic(h, pic, 0);
3546 if (!current_ref_assigned && FIELD_PICTURE &&
3547 !s->first_field && s->current_picture_ptr->reference) {
3549 /* Second field of complementary field pair; the first field of
3550 * which is already referenced. If short referenced, it
3551 * should be first entry in short_ref. If not, it must exist
3552 * in long_ref; trying to put it on the short list here is an
3553 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3555 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3556 /* Just mark the second field valid */
3557 s->current_picture_ptr->reference = PICT_FRAME;
3558 } else if (s->current_picture_ptr->long_ref) {
3559 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3560 "assignment for second field "
3561 "in complementary field pair "
3562 "(first field is long term)\n");
3565 * First field in reference, but not in any sensible place on our
3566 * reference lists. This shouldn't happen unless reference
3567 * handling somewhere else is wrong.
3571 current_ref_assigned = 1;
3574 if(!current_ref_assigned){
3575 pic= remove_short(h, s->current_picture_ptr->frame_num);
3577 unreference_pic(h, pic, 0);
3578 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3581 if(h->short_ref_count)
3582 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3584 h->short_ref[0]= s->current_picture_ptr;
3585 h->short_ref[0]->long_ref=0;
3586 h->short_ref_count++;
3587 s->current_picture_ptr->reference |= s->picture_structure;
3590 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3592 /* We have too many reference frames, probably due to corrupted
3593 * stream. Need to discard one frame. Prevents overrun of the
3594 * short_ref and long_ref buffers.
3596 av_log(h->s.avctx, AV_LOG_ERROR,
3597 "number of reference frames exceeds max (probably "
3598 "corrupt input), discarding one\n");
3600 if (h->long_ref_count) {
3601 for (i = 0; i < 16; ++i)
3606 pic = h->long_ref[i];
3607 remove_long_at_index(h, i);
3609 pic = h->short_ref[h->short_ref_count - 1];
3610 remove_short_at_index(h, h->short_ref_count - 1);
3612 unreference_pic(h, pic, 0);
3615 print_short_term(h);
3620 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3621 MpegEncContext * const s = &h->s;
3624 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3625 s->broken_link= get_bits1(gb) -1;
3626 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3627 if(h->mmco[0].long_arg == -1)
3630 h->mmco[0].opcode= MMCO_LONG;
3634 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3635 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3636 MMCOOpcode opcode= get_ue_golomb(gb);
3638 h->mmco[i].opcode= opcode;
3639 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3640 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3641 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3642 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3646 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3647 unsigned int long_arg= get_ue_golomb(gb);
3648 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3649 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3652 h->mmco[i].long_arg= long_arg;
3655 if(opcode > (unsigned)MMCO_LONG){
3656 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3659 if(opcode == MMCO_END)
3664 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3666 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3667 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3668 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3669 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3671 if (FIELD_PICTURE) {
3672 h->mmco[0].short_pic_num *= 2;
3673 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3674 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3685 static int init_poc(H264Context *h){
3686 MpegEncContext * const s = &h->s;
3687 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3690 if(h->nal_unit_type == NAL_IDR_SLICE){
3691 h->frame_num_offset= 0;
3693 if(h->frame_num < h->prev_frame_num)
3694 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3696 h->frame_num_offset= h->prev_frame_num_offset;
3699 if(h->sps.poc_type==0){
3700 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3702 if(h->nal_unit_type == NAL_IDR_SLICE){
3707 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3708 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3709 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3710 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3712 h->poc_msb = h->prev_poc_msb;
3713 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3715 field_poc[1] = h->poc_msb + h->poc_lsb;
3716 if(s->picture_structure == PICT_FRAME)
3717 field_poc[1] += h->delta_poc_bottom;
3718 }else if(h->sps.poc_type==1){
3719 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3722 if(h->sps.poc_cycle_length != 0)
3723 abs_frame_num = h->frame_num_offset + h->frame_num;
3727 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3730 expected_delta_per_poc_cycle = 0;
3731 for(i=0; i < h->sps.poc_cycle_length; i++)
3732 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3734 if(abs_frame_num > 0){
3735 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3736 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3738 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3739 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3740 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3744 if(h->nal_ref_idc == 0)
3745 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3747 field_poc[0] = expectedpoc + h->delta_poc[0];
3748 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3750 if(s->picture_structure == PICT_FRAME)
3751 field_poc[1] += h->delta_poc[1];
3754 if(h->nal_unit_type == NAL_IDR_SLICE){
3757 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3758 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3764 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3765 s->current_picture_ptr->field_poc[0]= field_poc[0];
3766 s->current_picture_ptr->poc = field_poc[0];
3768 if(s->picture_structure != PICT_TOP_FIELD) {
3769 s->current_picture_ptr->field_poc[1]= field_poc[1];
3770 s->current_picture_ptr->poc = field_poc[1];
3772 if(!FIELD_PICTURE || !s->first_field) {
3773 Picture *cur = s->current_picture_ptr;
3774 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3782 * initialize scan tables
3784 static void init_scan_tables(H264Context *h){
3785 MpegEncContext * const s = &h->s;
3787 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3788 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3789 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3791 for(i=0; i<16; i++){
3792 #define T(x) (x>>2) | ((x<<2) & 0xF)
3793 h->zigzag_scan[i] = T(zigzag_scan[i]);
3794 h-> field_scan[i] = T( field_scan[i]);
3798 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3799 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3800 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3801 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3802 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3804 for(i=0; i<64; i++){
3805 #define T(x) (x>>3) | ((x&7)<<3)
3806 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3807 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3808 h->field_scan8x8[i] = T(field_scan8x8[i]);
3809 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3813 if(h->sps.transform_bypass){ //FIXME same ugly
3814 h->zigzag_scan_q0 = zigzag_scan;
3815 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3816 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3817 h->field_scan_q0 = field_scan;
3818 h->field_scan8x8_q0 = field_scan8x8;
3819 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3821 h->zigzag_scan_q0 = h->zigzag_scan;
3822 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3823 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3824 h->field_scan_q0 = h->field_scan;
3825 h->field_scan8x8_q0 = h->field_scan8x8;
3826 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3831 * Replicates H264 "master" context to thread contexts.
3833 static void clone_slice(H264Context *dst, H264Context *src)
3835 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3836 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3837 dst->s.current_picture = src->s.current_picture;
3838 dst->s.linesize = src->s.linesize;
3839 dst->s.uvlinesize = src->s.uvlinesize;
3840 dst->s.first_field = src->s.first_field;
3842 dst->prev_poc_msb = src->prev_poc_msb;
3843 dst->prev_poc_lsb = src->prev_poc_lsb;
3844 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3845 dst->prev_frame_num = src->prev_frame_num;
3846 dst->short_ref_count = src->short_ref_count;
3848 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3849 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3850 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3851 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3853 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3854 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3858 * decodes a slice header.
3859 * This will also call MPV_common_init() and frame_start() as needed.
3861 * @param h h264context
3862 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3864 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3866 static int decode_slice_header(H264Context *h, H264Context *h0){
3867 MpegEncContext * const s = &h->s;
3868 MpegEncContext * const s0 = &h0->s;
3869 unsigned int first_mb_in_slice;
3870 unsigned int pps_id;
3871 int num_ref_idx_active_override_flag;
3872 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3873 unsigned int slice_type, tmp, i;
3874 int default_ref_list_done = 0;
3875 int last_pic_structure;
3877 s->dropable= h->nal_ref_idc == 0;
3879 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3880 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3881 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3883 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3884 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3887 first_mb_in_slice= get_ue_golomb(&s->gb);
3889 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3890 h0->current_slice = 0;
3891 if (!s0->first_field)
3892 s->current_picture_ptr= NULL;
3895 slice_type= get_ue_golomb(&s->gb);
3897 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3902 h->slice_type_fixed=1;
3904 h->slice_type_fixed=0;
3906 slice_type= slice_type_map[ slice_type ];
3907 if (slice_type == FF_I_TYPE
3908 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3909 default_ref_list_done = 1;
3911 h->slice_type= slice_type;
3913 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3914 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3915 av_log(h->s.avctx, AV_LOG_ERROR,
3916 "B picture before any references, skipping\n");
3920 pps_id= get_ue_golomb(&s->gb);
3921 if(pps_id>=MAX_PPS_COUNT){
3922 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3925 if(!h0->pps_buffers[pps_id]) {
3926 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3929 h->pps= *h0->pps_buffers[pps_id];
3931 if(!h0->sps_buffers[h->pps.sps_id]) {
3932 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3935 h->sps = *h0->sps_buffers[h->pps.sps_id];
3937 if(h == h0 && h->dequant_coeff_pps != pps_id){
3938 h->dequant_coeff_pps = pps_id;
3939 init_dequant_tables(h);
3942 s->mb_width= h->sps.mb_width;
3943 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3945 h->b_stride= s->mb_width*4;
3946 h->b8_stride= s->mb_width*2;
3948 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3949 if(h->sps.frame_mbs_only_flag)
3950 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3952 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3954 if (s->context_initialized
3955 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3957 return -1; // width / height changed during parallelized decoding
3961 if (!s->context_initialized) {
3963 return -1; // we cant (re-)initialize context during parallel decoding
3964 if (MPV_common_init(s) < 0)
3968 init_scan_tables(h);
3971 for(i = 1; i < s->avctx->thread_count; i++) {
3973 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3974 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3975 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3978 init_scan_tables(c);
3982 for(i = 0; i < s->avctx->thread_count; i++)
3983 if(context_init(h->thread_context[i]) < 0)
3986 s->avctx->width = s->width;
3987 s->avctx->height = s->height;
3988 s->avctx->sample_aspect_ratio= h->sps.sar;
3989 if(!s->avctx->sample_aspect_ratio.den)
3990 s->avctx->sample_aspect_ratio.den = 1;
3992 if(h->sps.timing_info_present_flag){
3993 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3994 if(h->x264_build > 0 && h->x264_build < 44)
3995 s->avctx->time_base.den *= 2;
3996 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3997 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4001 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4004 h->mb_aff_frame = 0;
4005 last_pic_structure = s0->picture_structure;
4006 if(h->sps.frame_mbs_only_flag){
4007 s->picture_structure= PICT_FRAME;
4009 if(get_bits1(&s->gb)) { //field_pic_flag
4010 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4012 s->picture_structure= PICT_FRAME;
4013 h->mb_aff_frame = h->sps.mb_aff;
4017 if(h0->current_slice == 0){
4018 /* See if we have a decoded first field looking for a pair... */
4019 if (s0->first_field) {
4020 assert(s0->current_picture_ptr);
4021 assert(s0->current_picture_ptr->data[0]);
4022 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4024 /* figure out if we have a complementary field pair */
4025 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4027 * Previous field is unmatched. Don't display it, but let it
4028 * remain for reference if marked as such.
4030 s0->current_picture_ptr = NULL;
4031 s0->first_field = FIELD_PICTURE;
4034 if (h->nal_ref_idc &&
4035 s0->current_picture_ptr->reference &&
4036 s0->current_picture_ptr->frame_num != h->frame_num) {
4038 * This and previous field were reference, but had
4039 * different frame_nums. Consider this field first in
4040 * pair. Throw away previous field except for reference
4043 s0->first_field = 1;
4044 s0->current_picture_ptr = NULL;
4047 /* Second field in complementary pair */
4048 s0->first_field = 0;
4053 /* Frame or first field in a potentially complementary pair */
4054 assert(!s0->current_picture_ptr);
4055 s0->first_field = FIELD_PICTURE;
4058 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4059 s0->first_field = 0;
4066 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4068 assert(s->mb_num == s->mb_width * s->mb_height);
4069 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4070 first_mb_in_slice >= s->mb_num){
4071 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4074 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4075 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4076 if (s->picture_structure == PICT_BOTTOM_FIELD)
4077 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4078 assert(s->mb_y < s->mb_height);
4080 if(s->picture_structure==PICT_FRAME){
4081 h->curr_pic_num= h->frame_num;
4082 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4084 h->curr_pic_num= 2*h->frame_num + 1;
4085 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4088 if(h->nal_unit_type == NAL_IDR_SLICE){
4089 get_ue_golomb(&s->gb); /* idr_pic_id */
4092 if(h->sps.poc_type==0){
4093 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4095 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4096 h->delta_poc_bottom= get_se_golomb(&s->gb);
4100 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4101 h->delta_poc[0]= get_se_golomb(&s->gb);
4103 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4104 h->delta_poc[1]= get_se_golomb(&s->gb);
4109 if(h->pps.redundant_pic_cnt_present){
4110 h->redundant_pic_count= get_ue_golomb(&s->gb);
4113 //set defaults, might be overriden a few line later
4114 h->ref_count[0]= h->pps.ref_count[0];
4115 h->ref_count[1]= h->pps.ref_count[1];
4117 if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
4118 if(h->slice_type == FF_B_TYPE){
4119 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4120 if(FIELD_PICTURE && h->direct_spatial_mv_pred)
4121 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF + spatial direct mode is not implemented\n");
4123 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4125 if(num_ref_idx_active_override_flag){
4126 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4127 if(h->slice_type==FF_B_TYPE)
4128 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4130 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4131 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4132 h->ref_count[0]= h->ref_count[1]= 1;
4136 if(h->slice_type == FF_B_TYPE)
4143 if(!default_ref_list_done){
4144 fill_default_ref_list(h);
4147 if(decode_ref_pic_list_reordering(h) < 0)
4150 if( (h->pps.weighted_pred && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
4151 || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
4152 pred_weight_table(h);
4153 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
4154 implicit_weight_table(h);
4159 decode_ref_pic_marking(h0, &s->gb);
4162 fill_mbaff_ref_list(h);
4164 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
4165 tmp = get_ue_golomb(&s->gb);
4167 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4170 h->cabac_init_idc= tmp;
4173 h->last_qscale_diff = 0;
4174 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4176 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4180 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4181 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4182 //FIXME qscale / qp ... stuff
4183 if(h->slice_type == FF_SP_TYPE){
4184 get_bits1(&s->gb); /* sp_for_switch_flag */
4186 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4187 get_se_golomb(&s->gb); /* slice_qs_delta */
4190 h->deblocking_filter = 1;
4191 h->slice_alpha_c0_offset = 0;
4192 h->slice_beta_offset = 0;
4193 if( h->pps.deblocking_filter_parameters_present ) {
4194 tmp= get_ue_golomb(&s->gb);
4196 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4199 h->deblocking_filter= tmp;
4200 if(h->deblocking_filter < 2)
4201 h->deblocking_filter^= 1; // 1<->0
4203 if( h->deblocking_filter ) {
4204 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4205 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4209 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4210 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
4211 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
4212 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4213 h->deblocking_filter= 0;
4215 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4216 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4217 /* Cheat slightly for speed:
4218 Do not bother to deblock across slices. */
4219 h->deblocking_filter = 2;
4221 h0->max_contexts = 1;
4222 if(!h0->single_decode_warning) {
4223 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4224 h0->single_decode_warning = 1;
4227 return 1; // deblocking switched inside frame
4232 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4233 slice_group_change_cycle= get_bits(&s->gb, ?);
4236 h0->last_slice_type = slice_type;
4237 h->slice_num = ++h0->current_slice;
4239 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4240 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4242 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4243 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4245 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4247 av_get_pict_type_char(h->slice_type),
4248 pps_id, h->frame_num,
4249 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4250 h->ref_count[0], h->ref_count[1],
4252 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4254 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4255 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4265 static inline int get_level_prefix(GetBitContext *gb){
4269 OPEN_READER(re, gb);
4270 UPDATE_CACHE(re, gb);
4271 buf=GET_CACHE(re, gb);
4273 log= 32 - av_log2(buf);
4275 print_bin(buf>>(32-log), log);
4276 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4279 LAST_SKIP_BITS(re, gb, log);
4280 CLOSE_READER(re, gb);
4285 static inline int get_dct8x8_allowed(H264Context *h){
4288 if(!IS_SUB_8X8(h->sub_mb_type[i])
4289 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4296 * decodes a residual block.
4297 * @param n block index
4298 * @param scantable scantable
4299 * @param max_coeff number of coefficients in the block
4300 * @return <0 if an error occurred
4302 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4303 MpegEncContext * const s = &h->s;
4304 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4306 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4308 //FIXME put trailing_onex into the context
4310 if(n == CHROMA_DC_BLOCK_INDEX){
4311 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4312 total_coeff= coeff_token>>2;
4314 if(n == LUMA_DC_BLOCK_INDEX){
4315 total_coeff= pred_non_zero_count(h, 0);
4316 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4317 total_coeff= coeff_token>>2;
4319 total_coeff= pred_non_zero_count(h, n);
4320 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4321 total_coeff= coeff_token>>2;
4322 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4326 //FIXME set last_non_zero?
4330 if(total_coeff > (unsigned)max_coeff) {
4331 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4335 trailing_ones= coeff_token&3;
4336 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4337 assert(total_coeff<=16);
4339 for(i=0; i<trailing_ones; i++){
4340 level[i]= 1 - 2*get_bits1(gb);
4344 int level_code, mask;
4345 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4346 int prefix= get_level_prefix(gb);
4348 //first coefficient has suffix_length equal to 0 or 1
4349 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4351 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4353 level_code= (prefix<<suffix_length); //part
4354 }else if(prefix==14){
4356 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4358 level_code= prefix + get_bits(gb, 4); //part
4360 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4361 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4363 level_code += (1<<(prefix-3))-4096;
4366 if(trailing_ones < 3) level_code += 2;
4371 mask= -(level_code&1);
4372 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4375 //remaining coefficients have suffix_length > 0
4376 for(;i<total_coeff;i++) {
4377 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4378 prefix = get_level_prefix(gb);
4380 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4382 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4384 level_code += (1<<(prefix-3))-4096;
4386 mask= -(level_code&1);
4387 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4388 if(level_code > suffix_limit[suffix_length])
4393 if(total_coeff == max_coeff)
4396 if(n == CHROMA_DC_BLOCK_INDEX)
4397 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4399 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4402 coeff_num = zeros_left + total_coeff - 1;
4403 j = scantable[coeff_num];
4405 block[j] = level[0];
4406 for(i=1;i<total_coeff;i++) {
4409 else if(zeros_left < 7){
4410 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4412 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4414 zeros_left -= run_before;
4415 coeff_num -= 1 + run_before;
4416 j= scantable[ coeff_num ];
4421 block[j] = (level[0] * qmul[j] + 32)>>6;
4422 for(i=1;i<total_coeff;i++) {
4425 else if(zeros_left < 7){
4426 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4428 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4430 zeros_left -= run_before;
4431 coeff_num -= 1 + run_before;
4432 j= scantable[ coeff_num ];
4434 block[j]= (level[i] * qmul[j] + 32)>>6;
4439 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4446 static void predict_field_decoding_flag(H264Context *h){
4447 MpegEncContext * const s = &h->s;
4448 const int mb_xy= h->mb_xy;
4449 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4450 ? s->current_picture.mb_type[mb_xy-1]
4451 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4452 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4454 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4458 * decodes a P_SKIP or B_SKIP macroblock
4460 static void decode_mb_skip(H264Context *h){
4461 MpegEncContext * const s = &h->s;
4462 const int mb_xy= h->mb_xy;
4465 memset(h->non_zero_count[mb_xy], 0, 16);
4466 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4469 mb_type|= MB_TYPE_INTERLACED;
4471 if( h->slice_type == FF_B_TYPE )
4473 // just for fill_caches. pred_direct_motion will set the real mb_type
4474 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4476 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4477 pred_direct_motion(h, &mb_type);
4478 mb_type|= MB_TYPE_SKIP;
4483 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4485 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4486 pred_pskip_motion(h, &mx, &my);
4487 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4488 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4491 write_back_motion(h, mb_type);
4492 s->current_picture.mb_type[mb_xy]= mb_type;
4493 s->current_picture.qscale_table[mb_xy]= s->qscale;
4494 h->slice_table[ mb_xy ]= h->slice_num;
4495 h->prev_mb_skipped= 1;
4499 * decodes a macroblock
4500 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4502 static int decode_mb_cavlc(H264Context *h){
4503 MpegEncContext * const s = &h->s;
4505 int partition_count;
4506 unsigned int mb_type, cbp;
4507 int dct8x8_allowed= h->pps.transform_8x8_mode;
4509 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4511 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4513 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4514 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4516 if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
4517 if(s->mb_skip_run==-1)
4518 s->mb_skip_run= get_ue_golomb(&s->gb);
4520 if (s->mb_skip_run--) {
4521 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4522 if(s->mb_skip_run==0)
4523 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4525 predict_field_decoding_flag(h);
4532 if( (s->mb_y&1) == 0 )
4533 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4535 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4537 h->prev_mb_skipped= 0;
4539 mb_type= get_ue_golomb(&s->gb);
4540 if(h->slice_type == FF_B_TYPE){
4542 partition_count= b_mb_type_info[mb_type].partition_count;
4543 mb_type= b_mb_type_info[mb_type].type;
4546 goto decode_intra_mb;
4548 }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
4550 partition_count= p_mb_type_info[mb_type].partition_count;
4551 mb_type= p_mb_type_info[mb_type].type;
4554 goto decode_intra_mb;
4557 assert(h->slice_type == FF_I_TYPE);
4560 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4564 cbp= i_mb_type_info[mb_type].cbp;
4565 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4566 mb_type= i_mb_type_info[mb_type].type;
4570 mb_type |= MB_TYPE_INTERLACED;
4572 h->slice_table[ mb_xy ]= h->slice_num;
4574 if(IS_INTRA_PCM(mb_type)){
4577 // We assume these blocks are very rare so we do not optimize it.
4578 align_get_bits(&s->gb);
4580 // The pixels are stored in the same order as levels in h->mb array.
4581 for(y=0; y<16; y++){
4582 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4583 for(x=0; x<16; x++){
4584 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4585 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4589 const int index= 256 + 4*(y&3) + 32*(y>>2);
4591 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4592 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4596 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4598 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4599 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4603 // In deblocking, the quantizer is 0
4604 s->current_picture.qscale_table[mb_xy]= 0;
4605 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4606 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4607 // All coeffs are present
4608 memset(h->non_zero_count[mb_xy], 16, 16);
4610 s->current_picture.mb_type[mb_xy]= mb_type;
4615 h->ref_count[0] <<= 1;
4616 h->ref_count[1] <<= 1;
4619 fill_caches(h, mb_type, 0);
4622 if(IS_INTRA(mb_type)){
4624 // init_top_left_availability(h);
4625 if(IS_INTRA4x4(mb_type)){
4628 if(dct8x8_allowed && get_bits1(&s->gb)){
4629 mb_type |= MB_TYPE_8x8DCT;
4633 // fill_intra4x4_pred_table(h);
4634 for(i=0; i<16; i+=di){
4635 int mode= pred_intra_mode(h, i);
4637 if(!get_bits1(&s->gb)){
4638 const int rem_mode= get_bits(&s->gb, 3);
4639 mode = rem_mode + (rem_mode >= mode);
4643 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4645 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4647 write_back_intra_pred_mode(h);
4648 if( check_intra4x4_pred_mode(h) < 0)
4651 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4652 if(h->intra16x16_pred_mode < 0)
4656 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4659 h->chroma_pred_mode= pred_mode;
4660 }else if(partition_count==4){
4661 int i, j, sub_partition_count[4], list, ref[2][4];
4663 if(h->slice_type == FF_B_TYPE){
4665 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4666 if(h->sub_mb_type[i] >=13){
4667 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4670 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4671 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4673 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4674 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4675 pred_direct_motion(h, &mb_type);
4676 h->ref_cache[0][scan8[4]] =
4677 h->ref_cache[1][scan8[4]] =
4678 h->ref_cache[0][scan8[12]] =
4679 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4682 assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
4684 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4685 if(h->sub_mb_type[i] >=4){
4686 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4689 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4690 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4694 for(list=0; list<h->list_count; list++){
4695 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4697 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4698 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4699 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4701 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4713 dct8x8_allowed = get_dct8x8_allowed(h);
4715 for(list=0; list<h->list_count; list++){
4717 if(IS_DIRECT(h->sub_mb_type[i])) {
4718 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4721 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4722 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4724 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4725 const int sub_mb_type= h->sub_mb_type[i];
4726 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4727 for(j=0; j<sub_partition_count[i]; j++){
4729 const int index= 4*i + block_width*j;
4730 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4731 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4732 mx += get_se_golomb(&s->gb);
4733 my += get_se_golomb(&s->gb);
4734 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4736 if(IS_SUB_8X8(sub_mb_type)){
4738 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4740 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4741 }else if(IS_SUB_8X4(sub_mb_type)){
4742 mv_cache[ 1 ][0]= mx;
4743 mv_cache[ 1 ][1]= my;
4744 }else if(IS_SUB_4X8(sub_mb_type)){
4745 mv_cache[ 8 ][0]= mx;
4746 mv_cache[ 8 ][1]= my;
4748 mv_cache[ 0 ][0]= mx;
4749 mv_cache[ 0 ][1]= my;
4752 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4758 }else if(IS_DIRECT(mb_type)){
4759 pred_direct_motion(h, &mb_type);
4760 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4762 int list, mx, my, i;
4763 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4764 if(IS_16X16(mb_type)){
4765 for(list=0; list<h->list_count; list++){
4767 if(IS_DIR(mb_type, 0, list)){
4768 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4769 if(val >= h->ref_count[list]){
4770 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4774 val= LIST_NOT_USED&0xFF;
4775 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4777 for(list=0; list<h->list_count; list++){
4779 if(IS_DIR(mb_type, 0, list)){
4780 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4781 mx += get_se_golomb(&s->gb);
4782 my += get_se_golomb(&s->gb);
4783 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4785 val= pack16to32(mx,my);
4788 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4791 else if(IS_16X8(mb_type)){
4792 for(list=0; list<h->list_count; list++){
4795 if(IS_DIR(mb_type, i, list)){
4796 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4797 if(val >= h->ref_count[list]){
4798 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4802 val= LIST_NOT_USED&0xFF;
4803 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4806 for(list=0; list<h->list_count; list++){
4809 if(IS_DIR(mb_type, i, list)){
4810 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4811 mx += get_se_golomb(&s->gb);
4812 my += get_se_golomb(&s->gb);
4813 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4815 val= pack16to32(mx,my);
4818 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4822 assert(IS_8X16(mb_type));
4823 for(list=0; list<h->list_count; list++){
4826 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4827 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4828 if(val >= h->ref_count[list]){
4829 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4833 val= LIST_NOT_USED&0xFF;
4834 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4837 for(list=0; list<h->list_count; list++){
4840 if(IS_DIR(mb_type, i, list)){
4841 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4842 mx += get_se_golomb(&s->gb);
4843 my += get_se_golomb(&s->gb);
4844 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4846 val= pack16to32(mx,my);
4849 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4855 if(IS_INTER(mb_type))
4856 write_back_motion(h, mb_type);
4858 if(!IS_INTRA16x16(mb_type)){
4859 cbp= get_ue_golomb(&s->gb);
4861 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4865 if(IS_INTRA4x4(mb_type))
4866 cbp= golomb_to_intra4x4_cbp[cbp];
4868 cbp= golomb_to_inter_cbp[cbp];
4872 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4873 if(get_bits1(&s->gb))
4874 mb_type |= MB_TYPE_8x8DCT;
4876 s->current_picture.mb_type[mb_xy]= mb_type;
4878 if(cbp || IS_INTRA16x16(mb_type)){
4879 int i8x8, i4x4, chroma_idx;
4881 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4882 const uint8_t *scan, *scan8x8, *dc_scan;
4884 // fill_non_zero_count_cache(h);
4886 if(IS_INTERLACED(mb_type)){
4887 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4888 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4889 dc_scan= luma_dc_field_scan;
4891 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4892 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4893 dc_scan= luma_dc_zigzag_scan;
4896 dquant= get_se_golomb(&s->gb);
4898 if( dquant > 25 || dquant < -26 ){
4899 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4903 s->qscale += dquant;
4904 if(((unsigned)s->qscale) > 51){
4905 if(s->qscale<0) s->qscale+= 52;
4906 else s->qscale-= 52;
4909 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4910 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4911 if(IS_INTRA16x16(mb_type)){
4912 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4913 return -1; //FIXME continue if partitioned and other return -1 too
4916 assert((cbp&15) == 0 || (cbp&15) == 15);
4919 for(i8x8=0; i8x8<4; i8x8++){
4920 for(i4x4=0; i4x4<4; i4x4++){
4921 const int index= i4x4 + 4*i8x8;
4922 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4928 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4931 for(i8x8=0; i8x8<4; i8x8++){
4932 if(cbp & (1<<i8x8)){
4933 if(IS_8x8DCT(mb_type)){
4934 DCTELEM *buf = &h->mb[64*i8x8];
4936 for(i4x4=0; i4x4<4; i4x4++){
4937 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4938 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4941 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4942 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4944 for(i4x4=0; i4x4<4; i4x4++){
4945 const int index= i4x4 + 4*i8x8;
4947 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4953 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4954 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4960 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4961 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4967 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4968 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4969 for(i4x4=0; i4x4<4; i4x4++){
4970 const int index= 16 + 4*chroma_idx + i4x4;
4971 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4977 uint8_t * const nnz= &h->non_zero_count_cache[0];
4978 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4979 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4982 uint8_t * const nnz= &h->non_zero_count_cache[0];
4983 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4984 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4985 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4987 s->current_picture.qscale_table[mb_xy]= s->qscale;
4988 write_back_non_zero_count(h);
4991 h->ref_count[0] >>= 1;
4992 h->ref_count[1] >>= 1;
4998 static int decode_cabac_field_decoding_flag(H264Context *h) {
4999 MpegEncContext * const s = &h->s;
5000 const int mb_x = s->mb_x;
5001 const int mb_y = s->mb_y & ~1;
5002 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5003 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5005 unsigned int ctx = 0;
5007 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5010 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5014 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5017 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5018 uint8_t *state= &h->cabac_state[ctx_base];
5022 MpegEncContext * const s = &h->s;
5023 const int mba_xy = h->left_mb_xy[0];
5024 const int mbb_xy = h->top_mb_xy;
5026 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5028 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5030 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5031 return 0; /* I4x4 */
5034 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5035 return 0; /* I4x4 */
5038 if( get_cabac_terminate( &h->cabac ) )
5039 return 25; /* PCM */
5041 mb_type = 1; /* I16x16 */
5042 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5043 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5044 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5045 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5046 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5050 static int decode_cabac_mb_type( H264Context *h ) {
5051 MpegEncContext * const s = &h->s;
5053 if( h->slice_type == FF_I_TYPE ) {
5054 return decode_cabac_intra_mb_type(h, 3, 1);
5055 } else if( h->slice_type == FF_P_TYPE ) {
5056 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5058 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5059 /* P_L0_D16x16, P_8x8 */
5060 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5062 /* P_L0_D8x16, P_L0_D16x8 */
5063 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5066 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5068 } else if( h->slice_type == FF_B_TYPE ) {
5069 const int mba_xy = h->left_mb_xy[0];
5070 const int mbb_xy = h->top_mb_xy;
5074 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5076 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5079 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5080 return 0; /* B_Direct_16x16 */
5082 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5083 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5086 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5087 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5088 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5089 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5091 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5092 else if( bits == 13 ) {
5093 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5094 } else if( bits == 14 )
5095 return 11; /* B_L1_L0_8x16 */
5096 else if( bits == 15 )
5097 return 22; /* B_8x8 */
5099 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5100 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5102 /* TODO SI/SP frames? */
5107 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5108 MpegEncContext * const s = &h->s;
5112 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5113 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5116 && h->slice_table[mba_xy] == h->slice_num
5117 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5118 mba_xy += s->mb_stride;
5120 mbb_xy = mb_xy - s->mb_stride;
5122 && h->slice_table[mbb_xy] == h->slice_num
5123 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5124 mbb_xy -= s->mb_stride;
5126 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5128 int mb_xy = h->mb_xy;
5130 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5133 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5135 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5138 if( h->slice_type == FF_B_TYPE )
5140 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5143 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5146 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5149 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5150 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5151 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5153 if( mode >= pred_mode )
5159 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5160 const int mba_xy = h->left_mb_xy[0];
5161 const int mbb_xy = h->top_mb_xy;
5165 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5166 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5169 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5172 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5175 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5177 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5183 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5184 int cbp_b, cbp_a, ctx, cbp = 0;
5186 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5187 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5189 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5190 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5191 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5192 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5193 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5194 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5195 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5196 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5199 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5203 cbp_a = (h->left_cbp>>4)&0x03;
5204 cbp_b = (h-> top_cbp>>4)&0x03;
5207 if( cbp_a > 0 ) ctx++;
5208 if( cbp_b > 0 ) ctx += 2;
5209 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5213 if( cbp_a == 2 ) ctx++;
5214 if( cbp_b == 2 ) ctx += 2;
5215 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5217 static int decode_cabac_mb_dqp( H264Context *h) {
5221 if( h->last_qscale_diff != 0 )
5224 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5230 if(val > 102) //prevent infinite loop
5237 return -(val + 1)/2;
5239 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5240 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5242 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5244 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5248 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5250 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5251 return 0; /* B_Direct_8x8 */
5252 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5253 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5255 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5256 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5257 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5260 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5261 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5265 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5266 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5269 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5270 int refa = h->ref_cache[list][scan8[n] - 1];
5271 int refb = h->ref_cache[list][scan8[n] - 8];
5275 if( h->slice_type == FF_B_TYPE) {
5276 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5278 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5287 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5293 if(ref >= 32 /*h->ref_list[list]*/){
5294 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5295 return 0; //FIXME we should return -1 and check the return everywhere
5301 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5302 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5303 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5304 int ctxbase = (l == 0) ? 40 : 47;
5309 else if( amvd > 32 )
5314 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5319 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5327 while( get_cabac_bypass( &h->cabac ) ) {
5331 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5336 if( get_cabac_bypass( &h->cabac ) )
5340 return get_cabac_bypass_sign( &h->cabac, -mvd );
5343 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5349 nza = h->left_cbp&0x100;
5350 nzb = h-> top_cbp&0x100;
5352 nza = (h->left_cbp>>(6+idx))&0x01;
5353 nzb = (h-> top_cbp>>(6+idx))&0x01;
5357 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5358 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5360 assert(cat == 1 || cat == 2);
5361 nza = h->non_zero_count_cache[scan8[idx] - 1];
5362 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5372 return ctx + 4 * cat;
5375 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5376 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5377 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5378 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5379 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5382 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5383 static const int significant_coeff_flag_offset[2][6] = {
5384 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5385 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5387 static const int last_coeff_flag_offset[2][6] = {
5388 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5389 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5391 static const int coeff_abs_level_m1_offset[6] = {
5392 227+0, 227+10, 227+20, 227+30, 227+39, 426
5394 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5395 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5396 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5397 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5398 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5399 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5400 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5401 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5402 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5404 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5405 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5406 * map node ctx => cabac ctx for level=1 */
5407 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5408 /* map node ctx => cabac ctx for level>1 */
5409 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5410 static const uint8_t coeff_abs_level_transition[2][8] = {
5411 /* update node ctx after decoding a level=1 */
5412 { 1, 2, 3, 3, 4, 5, 6, 7 },
5413 /* update node ctx after decoding a level>1 */
5414 { 4, 4, 4, 4, 5, 6, 7, 7 }
5420 int coeff_count = 0;
5423 uint8_t *significant_coeff_ctx_base;
5424 uint8_t *last_coeff_ctx_base;
5425 uint8_t *abs_level_m1_ctx_base;
5428 #define CABAC_ON_STACK
5430 #ifdef CABAC_ON_STACK
5433 cc.range = h->cabac.range;
5434 cc.low = h->cabac.low;
5435 cc.bytestream= h->cabac.bytestream;
5437 #define CC &h->cabac
5441 /* cat: 0-> DC 16x16 n = 0
5442 * 1-> AC 16x16 n = luma4x4idx
5443 * 2-> Luma4x4 n = luma4x4idx
5444 * 3-> DC Chroma n = iCbCr
5445 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5446 * 5-> Luma8x8 n = 4 * luma8x8idx
5449 /* read coded block flag */
5450 if( is_dc || cat != 5 ) {
5451 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5454 h->non_zero_count_cache[scan8[16+n]] = 0;
5456 h->non_zero_count_cache[scan8[n]] = 0;
5459 #ifdef CABAC_ON_STACK
5460 h->cabac.range = cc.range ;
5461 h->cabac.low = cc.low ;
5462 h->cabac.bytestream= cc.bytestream;
5468 significant_coeff_ctx_base = h->cabac_state
5469 + significant_coeff_flag_offset[MB_FIELD][cat];
5470 last_coeff_ctx_base = h->cabac_state
5471 + last_coeff_flag_offset[MB_FIELD][cat];
5472 abs_level_m1_ctx_base = h->cabac_state
5473 + coeff_abs_level_m1_offset[cat];
5475 if( !is_dc && cat == 5 ) {
5476 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5477 for(last= 0; last < coefs; last++) { \
5478 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5479 if( get_cabac( CC, sig_ctx )) { \
5480 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5481 index[coeff_count++] = last; \
5482 if( get_cabac( CC, last_ctx ) ) { \
5488 if( last == max_coeff -1 ) {\
5489 index[coeff_count++] = last;\
5491 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5492 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5493 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5495 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5497 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5499 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5502 assert(coeff_count > 0);
5506 h->cbp_table[h->mb_xy] |= 0x100;
5508 h->cbp_table[h->mb_xy] |= 0x40 << n;
5511 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5513 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5515 assert( cat == 1 || cat == 2 );
5516 h->non_zero_count_cache[scan8[n]] = coeff_count;
5520 while( coeff_count-- ) {
5521 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5523 int j= scantable[index[coeff_count]];
5525 if( get_cabac( CC, ctx ) == 0 ) {
5526 node_ctx = coeff_abs_level_transition[0][node_ctx];
5528 block[j] = get_cabac_bypass_sign( CC, -1);
5530 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5534 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5535 node_ctx = coeff_abs_level_transition[1][node_ctx];
5537 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5541 if( coeff_abs >= 15 ) {
5543 while( get_cabac_bypass( CC ) ) {
5549 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5555 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5557 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5561 #ifdef CABAC_ON_STACK
5562 h->cabac.range = cc.range ;
5563 h->cabac.low = cc.low ;
5564 h->cabac.bytestream= cc.bytestream;
5569 #ifndef CONFIG_SMALL
5570 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5571 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5574 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5575 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5579 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5581 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5583 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5584 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5588 static inline void compute_mb_neighbors(H264Context *h)
5590 MpegEncContext * const s = &h->s;
5591 const int mb_xy = h->mb_xy;
5592 h->top_mb_xy = mb_xy - s->mb_stride;
5593 h->left_mb_xy[0] = mb_xy - 1;
5595 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5596 const int top_pair_xy = pair_xy - s->mb_stride;
5597 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5598 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5599 const int curr_mb_frame_flag = !MB_FIELD;
5600 const int bottom = (s->mb_y & 1);
5602 ? !curr_mb_frame_flag // bottom macroblock
5603 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5605 h->top_mb_xy -= s->mb_stride;
5607 if (left_mb_frame_flag != curr_mb_frame_flag) {
5608 h->left_mb_xy[0] = pair_xy - 1;
5610 } else if (FIELD_PICTURE) {
5611 h->top_mb_xy -= s->mb_stride;
5617 * decodes a macroblock
5618 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5620 static int decode_mb_cabac(H264Context *h) {
5621 MpegEncContext * const s = &h->s;
5623 int mb_type, partition_count, cbp = 0;
5624 int dct8x8_allowed= h->pps.transform_8x8_mode;
5626 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5628 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5630 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5631 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
5633 /* a skipped mb needs the aff flag from the following mb */
5634 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5635 predict_field_decoding_flag(h);
5636 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5637 skip = h->next_mb_skipped;
5639 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5640 /* read skip flags */
5642 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5643 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5644 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5645 if(h->next_mb_skipped)
5646 predict_field_decoding_flag(h);
5648 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5653 h->cbp_table[mb_xy] = 0;
5654 h->chroma_pred_mode_table[mb_xy] = 0;
5655 h->last_qscale_diff = 0;
5662 if( (s->mb_y&1) == 0 )
5664 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5666 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5668 h->prev_mb_skipped = 0;
5670 compute_mb_neighbors(h);
5671 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5672 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5676 if( h->slice_type == FF_B_TYPE ) {
5678 partition_count= b_mb_type_info[mb_type].partition_count;
5679 mb_type= b_mb_type_info[mb_type].type;
5682 goto decode_intra_mb;
5684 } else if( h->slice_type == FF_P_TYPE ) {
5686 partition_count= p_mb_type_info[mb_type].partition_count;
5687 mb_type= p_mb_type_info[mb_type].type;
5690 goto decode_intra_mb;
5693 assert(h->slice_type == FF_I_TYPE);
5695 partition_count = 0;
5696 cbp= i_mb_type_info[mb_type].cbp;
5697 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5698 mb_type= i_mb_type_info[mb_type].type;
5701 mb_type |= MB_TYPE_INTERLACED;
5703 h->slice_table[ mb_xy ]= h->slice_num;
5705 if(IS_INTRA_PCM(mb_type)) {
5709 // We assume these blocks are very rare so we do not optimize it.
5710 // FIXME The two following lines get the bitstream position in the cabac
5711 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5712 ptr= h->cabac.bytestream;
5713 if(h->cabac.low&0x1) ptr--;
5715 if(h->cabac.low&0x1FF) ptr--;
5718 // The pixels are stored in the same order as levels in h->mb array.
5719 for(y=0; y<16; y++){
5720 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5721 for(x=0; x<16; x++){
5722 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5723 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5727 const int index= 256 + 4*(y&3) + 32*(y>>2);
5729 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5730 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5734 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5736 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5737 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5741 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5743 // All blocks are present
5744 h->cbp_table[mb_xy] = 0x1ef;
5745 h->chroma_pred_mode_table[mb_xy] = 0;
5746 // In deblocking, the quantizer is 0
5747 s->current_picture.qscale_table[mb_xy]= 0;
5748 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5749 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5750 // All coeffs are present
5751 memset(h->non_zero_count[mb_xy], 16, 16);
5752 s->current_picture.mb_type[mb_xy]= mb_type;
5753 h->last_qscale_diff = 0;
5758 h->ref_count[0] <<= 1;
5759 h->ref_count[1] <<= 1;
5762 fill_caches(h, mb_type, 0);
5764 if( IS_INTRA( mb_type ) ) {
5766 if( IS_INTRA4x4( mb_type ) ) {
5767 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5768 mb_type |= MB_TYPE_8x8DCT;
5769 for( i = 0; i < 16; i+=4 ) {
5770 int pred = pred_intra_mode( h, i );
5771 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5772 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5775 for( i = 0; i < 16; i++ ) {
5776 int pred = pred_intra_mode( h, i );
5777 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5779 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5782 write_back_intra_pred_mode(h);
5783 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5785 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5786 if( h->intra16x16_pred_mode < 0 ) return -1;
5788 h->chroma_pred_mode_table[mb_xy] =
5789 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5791 pred_mode= check_intra_pred_mode( h, pred_mode );
5792 if( pred_mode < 0 ) return -1;
5793 h->chroma_pred_mode= pred_mode;
5794 } else if( partition_count == 4 ) {
5795 int i, j, sub_partition_count[4], list, ref[2][4];
5797 if( h->slice_type == FF_B_TYPE ) {
5798 for( i = 0; i < 4; i++ ) {
5799 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5800 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5801 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5803 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5804 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5805 pred_direct_motion(h, &mb_type);
5806 h->ref_cache[0][scan8[4]] =
5807 h->ref_cache[1][scan8[4]] =
5808 h->ref_cache[0][scan8[12]] =
5809 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5810 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5811 for( i = 0; i < 4; i++ )
5812 if( IS_DIRECT(h->sub_mb_type[i]) )
5813 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5817 for( i = 0; i < 4; i++ ) {
5818 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5819 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5820 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5824 for( list = 0; list < h->list_count; list++ ) {
5825 for( i = 0; i < 4; i++ ) {
5826 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5827 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5828 if( h->ref_count[list] > 1 )
5829 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5835 h->ref_cache[list][ scan8[4*i]+1 ]=
5836 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5841 dct8x8_allowed = get_dct8x8_allowed(h);
5843 for(list=0; list<h->list_count; list++){
5845 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5846 if(IS_DIRECT(h->sub_mb_type[i])){
5847 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5851 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5852 const int sub_mb_type= h->sub_mb_type[i];
5853 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5854 for(j=0; j<sub_partition_count[i]; j++){
5857 const int index= 4*i + block_width*j;
5858 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5859 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5860 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5862 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5863 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5864 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5866 if(IS_SUB_8X8(sub_mb_type)){
5868 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5870 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5873 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5875 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5876 }else if(IS_SUB_8X4(sub_mb_type)){
5877 mv_cache[ 1 ][0]= mx;
5878 mv_cache[ 1 ][1]= my;
5880 mvd_cache[ 1 ][0]= mx - mpx;
5881 mvd_cache[ 1 ][1]= my - mpy;
5882 }else if(IS_SUB_4X8(sub_mb_type)){
5883 mv_cache[ 8 ][0]= mx;
5884 mv_cache[ 8 ][1]= my;
5886 mvd_cache[ 8 ][0]= mx - mpx;
5887 mvd_cache[ 8 ][1]= my - mpy;
5889 mv_cache[ 0 ][0]= mx;
5890 mv_cache[ 0 ][1]= my;
5892 mvd_cache[ 0 ][0]= mx - mpx;
5893 mvd_cache[ 0 ][1]= my - mpy;
5896 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5897 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5898 p[0] = p[1] = p[8] = p[9] = 0;
5899 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5903 } else if( IS_DIRECT(mb_type) ) {
5904 pred_direct_motion(h, &mb_type);
5905 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5906 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5907 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5909 int list, mx, my, i, mpx, mpy;
5910 if(IS_16X16(mb_type)){
5911 for(list=0; list<h->list_count; list++){
5912 if(IS_DIR(mb_type, 0, list)){
5913 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5914 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5916 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5918 for(list=0; list<h->list_count; list++){
5919 if(IS_DIR(mb_type, 0, list)){
5920 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5922 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5923 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5924 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5926 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5927 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5929 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5932 else if(IS_16X8(mb_type)){
5933 for(list=0; list<h->list_count; list++){
5935 if(IS_DIR(mb_type, i, list)){
5936 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5937 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5939 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5942 for(list=0; list<h->list_count; list++){
5944 if(IS_DIR(mb_type, i, list)){
5945 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5946 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5947 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5948 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5950 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5951 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5953 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5954 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5959 assert(IS_8X16(mb_type));
5960 for(list=0; list<h->list_count; list++){
5962 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5963 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5964 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5966 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5969 for(list=0; list<h->list_count; list++){
5971 if(IS_DIR(mb_type, i, list)){
5972 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5973 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5974 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5976 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5977 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5978 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5980 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5981 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5988 if( IS_INTER( mb_type ) ) {
5989 h->chroma_pred_mode_table[mb_xy] = 0;
5990 write_back_motion( h, mb_type );
5993 if( !IS_INTRA16x16( mb_type ) ) {
5994 cbp = decode_cabac_mb_cbp_luma( h );
5995 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5998 h->cbp_table[mb_xy] = h->cbp = cbp;
6000 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6001 if( decode_cabac_mb_transform_size( h ) )
6002 mb_type |= MB_TYPE_8x8DCT;
6004 s->current_picture.mb_type[mb_xy]= mb_type;
6006 if( cbp || IS_INTRA16x16( mb_type ) ) {
6007 const uint8_t *scan, *scan8x8, *dc_scan;
6008 const uint32_t *qmul;
6011 if(IS_INTERLACED(mb_type)){
6012 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6013 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6014 dc_scan= luma_dc_field_scan;
6016 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6017 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6018 dc_scan= luma_dc_zigzag_scan;
6021 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6022 if( dqp == INT_MIN ){
6023 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6027 if(((unsigned)s->qscale) > 51){
6028 if(s->qscale<0) s->qscale+= 52;
6029 else s->qscale-= 52;
6031 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6032 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6034 if( IS_INTRA16x16( mb_type ) ) {
6036 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6037 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6040 qmul = h->dequant4_coeff[0][s->qscale];
6041 for( i = 0; i < 16; i++ ) {
6042 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6043 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6046 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6050 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6051 if( cbp & (1<<i8x8) ) {
6052 if( IS_8x8DCT(mb_type) ) {
6053 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6054 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6056 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6057 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6058 const int index = 4*i8x8 + i4x4;
6059 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6061 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6062 //STOP_TIMER("decode_residual")
6066 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6067 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6074 for( c = 0; c < 2; c++ ) {
6075 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6076 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6082 for( c = 0; c < 2; c++ ) {
6083 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6084 for( i = 0; i < 4; i++ ) {
6085 const int index = 16 + 4 * c + i;
6086 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6087 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6091 uint8_t * const nnz= &h->non_zero_count_cache[0];
6092 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6093 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6096 uint8_t * const nnz= &h->non_zero_count_cache[0];
6097 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6098 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6099 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6100 h->last_qscale_diff = 0;
6103 s->current_picture.qscale_table[mb_xy]= s->qscale;
6104 write_back_non_zero_count(h);
6107 h->ref_count[0] >>= 1;
6108 h->ref_count[1] >>= 1;
6115 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6117 const int index_a = qp + h->slice_alpha_c0_offset;
6118 const int alpha = (alpha_table+52)[index_a];
6119 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6124 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6125 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6127 /* 16px edge length, because bS=4 is triggered by being at
6128 * the edge of an intra MB, so all 4 bS are the same */
6129 for( d = 0; d < 16; d++ ) {
6130 const int p0 = pix[-1];
6131 const int p1 = pix[-2];
6132 const int p2 = pix[-3];
6134 const int q0 = pix[0];
6135 const int q1 = pix[1];
6136 const int q2 = pix[2];
6138 if( FFABS( p0 - q0 ) < alpha &&
6139 FFABS( p1 - p0 ) < beta &&
6140 FFABS( q1 - q0 ) < beta ) {
6142 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6143 if( FFABS( p2 - p0 ) < beta)
6145 const int p3 = pix[-4];
6147 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6148 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6149 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6152 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6154 if( FFABS( q2 - q0 ) < beta)
6156 const int q3 = pix[3];
6158 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6159 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6160 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6163 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6167 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6168 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6170 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6176 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6178 const int index_a = qp + h->slice_alpha_c0_offset;
6179 const int alpha = (alpha_table+52)[index_a];
6180 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6185 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6186 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6188 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6192 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6194 for( i = 0; i < 16; i++, pix += stride) {
6200 int bS_index = (i >> 1);
6203 bS_index |= (i & 1);
6206 if( bS[bS_index] == 0 ) {
6210 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6211 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6212 alpha = (alpha_table+52)[index_a];
6213 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6215 if( bS[bS_index] < 4 ) {
6216 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6217 const int p0 = pix[-1];
6218 const int p1 = pix[-2];
6219 const int p2 = pix[-3];
6220 const int q0 = pix[0];
6221 const int q1 = pix[1];
6222 const int q2 = pix[2];
6224 if( FFABS( p0 - q0 ) < alpha &&
6225 FFABS( p1 - p0 ) < beta &&
6226 FFABS( q1 - q0 ) < beta ) {
6230 if( FFABS( p2 - p0 ) < beta ) {
6231 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6234 if( FFABS( q2 - q0 ) < beta ) {
6235 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6239 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6240 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6241 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6242 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6245 const int p0 = pix[-1];
6246 const int p1 = pix[-2];
6247 const int p2 = pix[-3];
6249 const int q0 = pix[0];
6250 const int q1 = pix[1];
6251 const int q2 = pix[2];
6253 if( FFABS( p0 - q0 ) < alpha &&
6254 FFABS( p1 - p0 ) < beta &&
6255 FFABS( q1 - q0 ) < beta ) {
6257 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6258 if( FFABS( p2 - p0 ) < beta)
6260 const int p3 = pix[-4];
6262 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6263 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6264 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6267 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6269 if( FFABS( q2 - q0 ) < beta)
6271 const int q3 = pix[3];
6273 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6274 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6275 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6278 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6282 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6283 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6285 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6290 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6292 for( i = 0; i < 8; i++, pix += stride) {
6300 if( bS[bS_index] == 0 ) {
6304 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6305 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6306 alpha = (alpha_table+52)[index_a];
6307 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6309 if( bS[bS_index] < 4 ) {
6310 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6311 const int p0 = pix[-1];
6312 const int p1 = pix[-2];
6313 const int q0 = pix[0];
6314 const int q1 = pix[1];
6316 if( FFABS( p0 - q0 ) < alpha &&
6317 FFABS( p1 - p0 ) < beta &&
6318 FFABS( q1 - q0 ) < beta ) {
6319 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6321 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6322 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6323 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6326 const int p0 = pix[-1];
6327 const int p1 = pix[-2];
6328 const int q0 = pix[0];
6329 const int q1 = pix[1];
6331 if( FFABS( p0 - q0 ) < alpha &&
6332 FFABS( p1 - p0 ) < beta &&
6333 FFABS( q1 - q0 ) < beta ) {
6335 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6336 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6337 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6343 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6345 const int index_a = qp + h->slice_alpha_c0_offset;
6346 const int alpha = (alpha_table+52)[index_a];
6347 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6348 const int pix_next = stride;
6353 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6354 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6356 /* 16px edge length, see filter_mb_edgev */
6357 for( d = 0; d < 16; d++ ) {
6358 const int p0 = pix[-1*pix_next];
6359 const int p1 = pix[-2*pix_next];
6360 const int p2 = pix[-3*pix_next];
6361 const int q0 = pix[0];
6362 const int q1 = pix[1*pix_next];
6363 const int q2 = pix[2*pix_next];
6365 if( FFABS( p0 - q0 ) < alpha &&
6366 FFABS( p1 - p0 ) < beta &&
6367 FFABS( q1 - q0 ) < beta ) {
6369 const int p3 = pix[-4*pix_next];
6370 const int q3 = pix[ 3*pix_next];
6372 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6373 if( FFABS( p2 - p0 ) < beta) {
6375 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6376 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6377 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6380 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6382 if( FFABS( q2 - q0 ) < beta) {
6384 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6385 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6386 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6389 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6393 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6394 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6396 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6403 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6405 const int index_a = qp + h->slice_alpha_c0_offset;
6406 const int alpha = (alpha_table+52)[index_a];
6407 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6412 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6413 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6415 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6419 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6420 MpegEncContext * const s = &h->s;
6421 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6423 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6427 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6428 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6429 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6430 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6433 assert(!FRAME_MBAFF);
6435 mb_type = s->current_picture.mb_type[mb_xy];
6436 qp = s->current_picture.qscale_table[mb_xy];
6437 qp0 = s->current_picture.qscale_table[mb_xy-1];
6438 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6439 qpc = get_chroma_qp( h, 0, qp );
6440 qpc0 = get_chroma_qp( h, 0, qp0 );
6441 qpc1 = get_chroma_qp( h, 0, qp1 );
6442 qp0 = (qp + qp0 + 1) >> 1;
6443 qp1 = (qp + qp1 + 1) >> 1;
6444 qpc0 = (qpc + qpc0 + 1) >> 1;
6445 qpc1 = (qpc + qpc1 + 1) >> 1;
6446 qp_thresh = 15 - h->slice_alpha_c0_offset;
6447 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6448 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6451 if( IS_INTRA(mb_type) ) {
6452 int16_t bS4[4] = {4,4,4,4};
6453 int16_t bS3[4] = {3,3,3,3};
6454 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6455 if( IS_8x8DCT(mb_type) ) {
6456 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6457 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6458 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6459 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6461 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6462 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6463 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6464 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6465 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6466 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6467 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6468 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6470 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6471 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6472 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6473 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6474 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6475 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6476 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6477 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6480 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6481 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6483 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6485 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6487 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6488 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6489 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6490 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6492 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6493 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6494 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6495 (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
6497 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6498 bSv[0][0] = 0x0004000400040004ULL;
6499 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6500 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6502 #define FILTER(hv,dir,edge)\
6503 if(bSv[dir][edge]) {\
6504 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6506 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6507 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6513 } else if( IS_8x8DCT(mb_type) ) {
6532 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6533 MpegEncContext * const s = &h->s;
6534 const int mb_xy= mb_x + mb_y*s->mb_stride;
6535 const int mb_type = s->current_picture.mb_type[mb_xy];
6536 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6537 int first_vertical_edge_done = 0;
6539 /* FIXME: A given frame may occupy more than one position in
6540 * the reference list. So ref2frm should be populated with
6541 * frame numbers, not indexes. */
6542 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6543 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6545 //for sufficiently low qp, filtering wouldn't do anything
6546 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6548 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6549 int qp = s->current_picture.qscale_table[mb_xy];
6551 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6552 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6558 // left mb is in picture
6559 && h->slice_table[mb_xy-1] != 255
6560 // and current and left pair do not have the same interlaced type
6561 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6562 // and left mb is in the same slice if deblocking_filter == 2
6563 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6564 /* First vertical edge is different in MBAFF frames
6565 * There are 8 different bS to compute and 2 different Qp
6567 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6568 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6573 int mb_qp, mbn0_qp, mbn1_qp;
6575 first_vertical_edge_done = 1;
6577 if( IS_INTRA(mb_type) )
6578 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6580 for( i = 0; i < 8; i++ ) {
6581 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6583 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6585 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6586 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6587 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6594 mb_qp = s->current_picture.qscale_table[mb_xy];
6595 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6596 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6597 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6598 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6599 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6600 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6601 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6602 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6603 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6604 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6605 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6606 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6609 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6610 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6611 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6612 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6613 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6615 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6616 for( dir = 0; dir < 2; dir++ )
6619 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6620 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6621 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6623 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6624 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6625 // how often to recheck mv-based bS when iterating between edges
6626 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6627 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6628 // how often to recheck mv-based bS when iterating along each edge
6629 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6631 if (first_vertical_edge_done) {
6633 first_vertical_edge_done = 0;
6636 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6639 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6640 && !IS_INTERLACED(mb_type)
6641 && IS_INTERLACED(mbm_type)
6643 // This is a special case in the norm where the filtering must
6644 // be done twice (one each of the field) even if we are in a
6645 // frame macroblock.
6647 static const int nnz_idx[4] = {4,5,6,3};
6648 unsigned int tmp_linesize = 2 * linesize;
6649 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6650 int mbn_xy = mb_xy - 2 * s->mb_stride;
6655 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6656 if( IS_INTRA(mb_type) ||
6657 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6658 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6660 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6661 for( i = 0; i < 4; i++ ) {
6662 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6663 mbn_nnz[nnz_idx[i]] != 0 )
6669 // Do not use s->qscale as luma quantizer because it has not the same
6670 // value in IPCM macroblocks.
6671 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6672 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6673 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6674 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6675 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6676 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6677 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6678 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6685 for( edge = start; edge < edges; edge++ ) {
6686 /* mbn_xy: neighbor macroblock */
6687 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6688 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6692 if( (edge&1) && IS_8x8DCT(mb_type) )
6695 if( IS_INTRA(mb_type) ||
6696 IS_INTRA(mbn_type) ) {
6699 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6700 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6709 bS[0] = bS[1] = bS[2] = bS[3] = value;
6714 if( edge & mask_edge ) {
6715 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6718 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6719 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6722 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6723 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6724 int bn_idx= b_idx - (dir ? 8:1);
6726 for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6727 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6728 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6729 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6731 bS[0] = bS[1] = bS[2] = bS[3] = v;
6737 for( i = 0; i < 4; i++ ) {
6738 int x = dir == 0 ? edge : i;
6739 int y = dir == 0 ? i : edge;
6740 int b_idx= 8 + 4 + x + 8*y;
6741 int bn_idx= b_idx - (dir ? 8:1);
6743 if( h->non_zero_count_cache[b_idx] != 0 ||
6744 h->non_zero_count_cache[bn_idx] != 0 ) {
6750 for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6751 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6752 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6753 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6761 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6766 // Do not use s->qscale as luma quantizer because it has not the same
6767 // value in IPCM macroblocks.
6768 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6769 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6770 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6771 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6773 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6774 if( (edge&1) == 0 ) {
6775 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6776 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6777 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6778 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6781 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6782 if( (edge&1) == 0 ) {
6783 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6784 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6785 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6786 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6793 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6794 MpegEncContext * const s = &h->s;
6795 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6799 if( h->pps.cabac ) {
6803 align_get_bits( &s->gb );
6806 ff_init_cabac_states( &h->cabac);
6807 ff_init_cabac_decoder( &h->cabac,
6808 s->gb.buffer + get_bits_count(&s->gb)/8,
6809 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6810 /* calculate pre-state */
6811 for( i= 0; i < 460; i++ ) {
6813 if( h->slice_type == FF_I_TYPE )
6814 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6816 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6819 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6821 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6826 int ret = decode_mb_cabac(h);
6828 //STOP_TIMER("decode_mb_cabac")
6830 if(ret>=0) hl_decode_mb(h);
6832 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6835 if(ret>=0) ret = decode_mb_cabac(h);
6837 if(ret>=0) hl_decode_mb(h);
6840 eos = get_cabac_terminate( &h->cabac );
6842 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6843 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6844 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6848 if( ++s->mb_x >= s->mb_width ) {
6850 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6852 if(FIELD_OR_MBAFF_PICTURE) {
6857 if( eos || s->mb_y >= s->mb_height ) {
6858 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6859 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6866 int ret = decode_mb_cavlc(h);
6868 if(ret>=0) hl_decode_mb(h);
6870 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6872 ret = decode_mb_cavlc(h);
6874 if(ret>=0) hl_decode_mb(h);
6879 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6880 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6885 if(++s->mb_x >= s->mb_width){
6887 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6889 if(FIELD_OR_MBAFF_PICTURE) {
6892 if(s->mb_y >= s->mb_height){
6893 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6895 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6896 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6900 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6907 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6908 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6909 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6910 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6914 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6923 for(;s->mb_y < s->mb_height; s->mb_y++){
6924 for(;s->mb_x < s->mb_width; s->mb_x++){
6925 int ret= decode_mb(h);
6930 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6931 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6936 if(++s->mb_x >= s->mb_width){
6938 if(++s->mb_y >= s->mb_height){
6939 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6940 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6944 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6951 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6952 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6953 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6957 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6964 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6967 return -1; //not reached
6970 static int decode_unregistered_user_data(H264Context *h, int size){
6971 MpegEncContext * const s = &h->s;
6972 uint8_t user_data[16+256];
6978 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6979 user_data[i]= get_bits(&s->gb, 8);
6983 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6984 if(e==1 && build>=0)
6985 h->x264_build= build;
6987 if(s->avctx->debug & FF_DEBUG_BUGS)
6988 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6991 skip_bits(&s->gb, 8);
6996 static int decode_sei(H264Context *h){
6997 MpegEncContext * const s = &h->s;
6999 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7004 type+= show_bits(&s->gb, 8);
7005 }while(get_bits(&s->gb, 8) == 255);
7009 size+= show_bits(&s->gb, 8);
7010 }while(get_bits(&s->gb, 8) == 255);
7014 if(decode_unregistered_user_data(h, size) < 0)
7018 skip_bits(&s->gb, 8*size);
7021 //FIXME check bits here
7022 align_get_bits(&s->gb);
7028 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7029 MpegEncContext * const s = &h->s;
7031 cpb_count = get_ue_golomb(&s->gb) + 1;
7032 get_bits(&s->gb, 4); /* bit_rate_scale */
7033 get_bits(&s->gb, 4); /* cpb_size_scale */
7034 for(i=0; i<cpb_count; i++){
7035 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7036 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7037 get_bits1(&s->gb); /* cbr_flag */
7039 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7040 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7041 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7042 get_bits(&s->gb, 5); /* time_offset_length */
7045 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7046 MpegEncContext * const s = &h->s;
7047 int aspect_ratio_info_present_flag;
7048 unsigned int aspect_ratio_idc;
7049 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7051 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7053 if( aspect_ratio_info_present_flag ) {
7054 aspect_ratio_idc= get_bits(&s->gb, 8);
7055 if( aspect_ratio_idc == EXTENDED_SAR ) {
7056 sps->sar.num= get_bits(&s->gb, 16);
7057 sps->sar.den= get_bits(&s->gb, 16);
7058 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7059 sps->sar= pixel_aspect[aspect_ratio_idc];
7061 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7068 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7070 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7071 get_bits1(&s->gb); /* overscan_appropriate_flag */
7074 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7075 get_bits(&s->gb, 3); /* video_format */
7076 get_bits1(&s->gb); /* video_full_range_flag */
7077 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7078 get_bits(&s->gb, 8); /* colour_primaries */
7079 get_bits(&s->gb, 8); /* transfer_characteristics */
7080 get_bits(&s->gb, 8); /* matrix_coefficients */
7084 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7085 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7086 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7089 sps->timing_info_present_flag = get_bits1(&s->gb);
7090 if(sps->timing_info_present_flag){
7091 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7092 sps->time_scale = get_bits_long(&s->gb, 32);
7093 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7096 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7097 if(nal_hrd_parameters_present_flag)
7098 decode_hrd_parameters(h, sps);
7099 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7100 if(vcl_hrd_parameters_present_flag)
7101 decode_hrd_parameters(h, sps);
7102 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7103 get_bits1(&s->gb); /* low_delay_hrd_flag */
7104 get_bits1(&s->gb); /* pic_struct_present_flag */
7106 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7107 if(sps->bitstream_restriction_flag){
7108 unsigned int num_reorder_frames;
7109 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7110 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7111 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7112 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7113 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7114 num_reorder_frames= get_ue_golomb(&s->gb);
7115 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7117 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7118 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7122 sps->num_reorder_frames= num_reorder_frames;
7128 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7129 const uint8_t *jvt_list, const uint8_t *fallback_list){
7130 MpegEncContext * const s = &h->s;
7131 int i, last = 8, next = 8;
7132 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7133 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7134 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7136 for(i=0;i<size;i++){
7138 next = (last + get_se_golomb(&s->gb)) & 0xff;
7139 if(!i && !next){ /* matrix not written, we use the preset one */
7140 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7143 last = factors[scan[i]] = next ? next : last;
7147 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7148 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7149 MpegEncContext * const s = &h->s;
7150 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7151 const uint8_t *fallback[4] = {
7152 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7153 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7154 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7155 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7157 if(get_bits1(&s->gb)){
7158 sps->scaling_matrix_present |= is_sps;
7159 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7160 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7161 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7162 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7163 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7164 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7165 if(is_sps || pps->transform_8x8_mode){
7166 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7167 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7169 } else if(fallback_sps) {
7170 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7171 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7176 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7179 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7180 const size_t size, const char *name)
7183 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7188 vec[id] = av_mallocz(size);
7190 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7195 static inline int decode_seq_parameter_set(H264Context *h){
7196 MpegEncContext * const s = &h->s;
7197 int profile_idc, level_idc;
7198 unsigned int sps_id, tmp, mb_width, mb_height;
7202 profile_idc= get_bits(&s->gb, 8);
7203 get_bits1(&s->gb); //constraint_set0_flag
7204 get_bits1(&s->gb); //constraint_set1_flag
7205 get_bits1(&s->gb); //constraint_set2_flag
7206 get_bits1(&s->gb); //constraint_set3_flag
7207 get_bits(&s->gb, 4); // reserved
7208 level_idc= get_bits(&s->gb, 8);
7209 sps_id= get_ue_golomb(&s->gb);
7211 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7215 sps->profile_idc= profile_idc;
7216 sps->level_idc= level_idc;
7218 if(sps->profile_idc >= 100){ //high profile
7219 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7220 get_bits1(&s->gb); //residual_color_transform_flag
7221 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7222 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7223 sps->transform_bypass = get_bits1(&s->gb);
7224 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7226 sps->scaling_matrix_present = 0;
7228 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7229 sps->poc_type= get_ue_golomb(&s->gb);
7231 if(sps->poc_type == 0){ //FIXME #define
7232 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7233 } else if(sps->poc_type == 1){//FIXME #define
7234 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7235 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7236 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7237 tmp= get_ue_golomb(&s->gb);
7239 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7240 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7243 sps->poc_cycle_length= tmp;
7245 for(i=0; i<sps->poc_cycle_length; i++)
7246 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7247 }else if(sps->poc_type != 2){
7248 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7252 tmp= get_ue_golomb(&s->gb);
7253 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7254 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7257 sps->ref_frame_count= tmp;
7258 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7259 mb_width= get_ue_golomb(&s->gb) + 1;
7260 mb_height= get_ue_golomb(&s->gb) + 1;
7261 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7262 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7263 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7266 sps->mb_width = mb_width;
7267 sps->mb_height= mb_height;
7269 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7270 if(!sps->frame_mbs_only_flag)
7271 sps->mb_aff= get_bits1(&s->gb);
7275 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7277 #ifndef ALLOW_INTERLACE
7279 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7281 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7282 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7284 sps->crop= get_bits1(&s->gb);
7286 sps->crop_left = get_ue_golomb(&s->gb);
7287 sps->crop_right = get_ue_golomb(&s->gb);
7288 sps->crop_top = get_ue_golomb(&s->gb);
7289 sps->crop_bottom= get_ue_golomb(&s->gb);
7290 if(sps->crop_left || sps->crop_top){
7291 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7293 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7294 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7300 sps->crop_bottom= 0;
7303 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7304 if( sps->vui_parameters_present_flag )
7305 decode_vui_parameters(h, sps);
7307 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7308 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7309 sps_id, sps->profile_idc, sps->level_idc,
7311 sps->ref_frame_count,
7312 sps->mb_width, sps->mb_height,
7313 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7314 sps->direct_8x8_inference_flag ? "8B8" : "",
7315 sps->crop_left, sps->crop_right,
7316 sps->crop_top, sps->crop_bottom,
7317 sps->vui_parameters_present_flag ? "VUI" : ""
7324 build_qp_table(PPS *pps, int t, int index)
7327 for(i = 0; i < 255; i++)
7328 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7331 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7332 MpegEncContext * const s = &h->s;
7333 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7336 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7340 tmp= get_ue_golomb(&s->gb);
7341 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7342 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7347 pps->cabac= get_bits1(&s->gb);
7348 pps->pic_order_present= get_bits1(&s->gb);
7349 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7350 if(pps->slice_group_count > 1 ){
7351 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7352 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7353 switch(pps->mb_slice_group_map_type){
7356 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7357 | run_length[ i ] |1 |ue(v) |
7362 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7364 | top_left_mb[ i ] |1 |ue(v) |
7365 | bottom_right_mb[ i ] |1 |ue(v) |
7373 | slice_group_change_direction_flag |1 |u(1) |
7374 | slice_group_change_rate_minus1 |1 |ue(v) |
7379 | slice_group_id_cnt_minus1 |1 |ue(v) |
7380 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7382 | slice_group_id[ i ] |1 |u(v) |
7387 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7388 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7389 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7390 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7391 pps->ref_count[0]= pps->ref_count[1]= 1;
7395 pps->weighted_pred= get_bits1(&s->gb);
7396 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7397 pps->init_qp= get_se_golomb(&s->gb) + 26;
7398 pps->init_qs= get_se_golomb(&s->gb) + 26;
7399 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7400 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7401 pps->constrained_intra_pred= get_bits1(&s->gb);
7402 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7404 pps->transform_8x8_mode= 0;
7405 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7406 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7407 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7409 if(get_bits_count(&s->gb) < bit_length){
7410 pps->transform_8x8_mode= get_bits1(&s->gb);
7411 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7412 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7414 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7417 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7418 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7419 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7420 h->pps.chroma_qp_diff= 1;
7422 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7424 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7425 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7426 pps_id, pps->sps_id,
7427 pps->cabac ? "CABAC" : "CAVLC",
7428 pps->slice_group_count,
7429 pps->ref_count[0], pps->ref_count[1],
7430 pps->weighted_pred ? "weighted" : "",
7431 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7432 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7433 pps->constrained_intra_pred ? "CONSTR" : "",
7434 pps->redundant_pic_cnt_present ? "REDU" : "",
7435 pps->transform_8x8_mode ? "8x8DCT" : ""
7443 * Call decode_slice() for each context.
7445 * @param h h264 master context
7446 * @param context_count number of contexts to execute
7448 static void execute_decode_slices(H264Context *h, int context_count){
7449 MpegEncContext * const s = &h->s;
7450 AVCodecContext * const avctx= s->avctx;
7454 if(context_count == 1) {
7455 decode_slice(avctx, h);
7457 for(i = 1; i < context_count; i++) {
7458 hx = h->thread_context[i];
7459 hx->s.error_resilience = avctx->error_resilience;
7460 hx->s.error_count = 0;
7463 avctx->execute(avctx, (void *)decode_slice,
7464 (void **)h->thread_context, NULL, context_count);
7466 /* pull back stuff from slices to master context */
7467 hx = h->thread_context[context_count - 1];
7468 s->mb_x = hx->s.mb_x;
7469 s->mb_y = hx->s.mb_y;
7470 s->dropable = hx->s.dropable;
7471 s->picture_structure = hx->s.picture_structure;
7472 for(i = 1; i < context_count; i++)
7473 h->s.error_count += h->thread_context[i]->s.error_count;
7478 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7479 MpegEncContext * const s = &h->s;
7480 AVCodecContext * const avctx= s->avctx;
7482 H264Context *hx; ///< thread context
7483 int context_count = 0;
7485 h->max_contexts = avctx->thread_count;
7488 for(i=0; i<50; i++){
7489 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7492 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7493 h->current_slice = 0;
7494 if (!s->first_field)
7495 s->current_picture_ptr= NULL;
7507 if(buf_index >= buf_size) break;
7509 for(i = 0; i < h->nal_length_size; i++)
7510 nalsize = (nalsize << 8) | buf[buf_index++];
7511 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7516 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7521 // start code prefix search
7522 for(; buf_index + 3 < buf_size; buf_index++){
7523 // This should always succeed in the first iteration.
7524 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7528 if(buf_index+3 >= buf_size) break;
7533 hx = h->thread_context[context_count];
7535 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7536 if (ptr==NULL || dst_length < 0){
7539 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7541 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7543 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7544 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7547 if (h->is_avc && (nalsize != consumed)){
7548 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7552 buf_index += consumed;
7554 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7555 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7560 switch(hx->nal_unit_type){
7562 if (h->nal_unit_type != NAL_IDR_SLICE) {
7563 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7566 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7568 init_get_bits(&hx->s.gb, ptr, bit_length);
7570 hx->inter_gb_ptr= &hx->s.gb;
7571 hx->s.data_partitioning = 0;
7573 if((err = decode_slice_header(hx, h)))
7576 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7577 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7578 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7579 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7580 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7581 && avctx->skip_frame < AVDISCARD_ALL)
7585 init_get_bits(&hx->s.gb, ptr, bit_length);
7587 hx->inter_gb_ptr= NULL;
7588 hx->s.data_partitioning = 1;
7590 err = decode_slice_header(hx, h);
7593 init_get_bits(&hx->intra_gb, ptr, bit_length);
7594 hx->intra_gb_ptr= &hx->intra_gb;
7597 init_get_bits(&hx->inter_gb, ptr, bit_length);
7598 hx->inter_gb_ptr= &hx->inter_gb;
7600 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7601 && s->context_initialized
7603 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7604 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7605 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7606 && avctx->skip_frame < AVDISCARD_ALL)
7610 init_get_bits(&s->gb, ptr, bit_length);
7614 init_get_bits(&s->gb, ptr, bit_length);
7615 decode_seq_parameter_set(h);
7617 if(s->flags& CODEC_FLAG_LOW_DELAY)
7620 if(avctx->has_b_frames < 2)
7621 avctx->has_b_frames= !s->low_delay;
7624 init_get_bits(&s->gb, ptr, bit_length);
7626 decode_picture_parameter_set(h, bit_length);
7630 case NAL_END_SEQUENCE:
7631 case NAL_END_STREAM:
7632 case NAL_FILLER_DATA:
7634 case NAL_AUXILIARY_SLICE:
7637 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7640 if(context_count == h->max_contexts) {
7641 execute_decode_slices(h, context_count);
7646 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7648 /* Slice could not be decoded in parallel mode, copy down
7649 * NAL unit stuff to context 0 and restart. Note that
7650 * rbsp_buffer is not transfered, but since we no longer
7651 * run in parallel mode this should not be an issue. */
7652 h->nal_unit_type = hx->nal_unit_type;
7653 h->nal_ref_idc = hx->nal_ref_idc;
7659 execute_decode_slices(h, context_count);
7664 * returns the number of bytes consumed for building the current frame
7666 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7667 if(s->flags&CODEC_FLAG_TRUNCATED){
7668 pos -= s->parse_context.last_index;
7669 if(pos<0) pos=0; // FIXME remove (unneeded?)
7673 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7674 if(pos+10>buf_size) pos=buf_size; // oops ;)
7680 static int decode_frame(AVCodecContext *avctx,
7681 void *data, int *data_size,
7682 const uint8_t *buf, int buf_size)
7684 H264Context *h = avctx->priv_data;
7685 MpegEncContext *s = &h->s;
7686 AVFrame *pict = data;
7689 s->flags= avctx->flags;
7690 s->flags2= avctx->flags2;
7692 if(s->flags&CODEC_FLAG_TRUNCATED){
7693 const int next= ff_h264_find_frame_end(h, buf, buf_size);
7694 assert((buf_size > 0) || (next == END_NOT_FOUND));
7696 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7698 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7701 /* no supplementary picture */
7702 if (buf_size == 0) {
7706 //FIXME factorize this with the output code below
7707 out = h->delayed_pic[0];
7709 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7710 if(h->delayed_pic[i]->poc < out->poc){
7711 out = h->delayed_pic[i];
7715 for(i=out_idx; h->delayed_pic[i]; i++)
7716 h->delayed_pic[i] = h->delayed_pic[i+1];
7719 *data_size = sizeof(AVFrame);
7720 *pict= *(AVFrame*)out;
7726 if(h->is_avc && !h->got_avcC) {
7727 int i, cnt, nalsize;
7728 unsigned char *p = avctx->extradata;
7729 if(avctx->extradata_size < 7) {
7730 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7734 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7737 /* sps and pps in the avcC always have length coded with 2 bytes,
7738 so put a fake nal_length_size = 2 while parsing them */
7739 h->nal_length_size = 2;
7740 // Decode sps from avcC
7741 cnt = *(p+5) & 0x1f; // Number of sps
7743 for (i = 0; i < cnt; i++) {
7744 nalsize = AV_RB16(p) + 2;
7745 if(decode_nal_units(h, p, nalsize) < 0) {
7746 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7751 // Decode pps from avcC
7752 cnt = *(p++); // Number of pps
7753 for (i = 0; i < cnt; i++) {
7754 nalsize = AV_RB16(p) + 2;
7755 if(decode_nal_units(h, p, nalsize) != nalsize) {
7756 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7761 // Now store right nal length size, that will be use to parse all other nals
7762 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7763 // Do not reparse avcC
7767 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7768 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7772 buf_index=decode_nal_units(h, buf, buf_size);
7776 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7777 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7778 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7782 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7783 Picture *out = s->current_picture_ptr;
7784 Picture *cur = s->current_picture_ptr;
7785 Picture *prev = h->delayed_output_pic;
7786 int i, pics, cross_idr, out_of_order, out_idx;
7790 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7791 s->current_picture_ptr->pict_type= s->pict_type;
7793 h->prev_frame_num_offset= h->frame_num_offset;
7794 h->prev_frame_num= h->frame_num;
7796 h->prev_poc_msb= h->poc_msb;
7797 h->prev_poc_lsb= h->poc_lsb;
7798 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7802 * FIXME: Error handling code does not seem to support interlaced
7803 * when slices span multiple rows
7804 * The ff_er_add_slice calls don't work right for bottom
7805 * fields; they cause massive erroneous error concealing
7806 * Error marking covers both fields (top and bottom).
7807 * This causes a mismatched s->error_count
7808 * and a bad error table. Further, the error count goes to
7809 * INT_MAX when called for bottom field, because mb_y is
7810 * past end by one (callers fault) and resync_mb_y != 0
7811 * causes problems for the first MB line, too.
7818 if (s->first_field) {
7819 /* Wait for second field. */
7823 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7824 /* Derive top_field_first from field pocs. */
7825 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7827 //FIXME do something with unavailable reference frames
7829 #if 0 //decode order
7830 *data_size = sizeof(AVFrame);
7832 /* Sort B-frames into display order */
7834 if(h->sps.bitstream_restriction_flag
7835 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7836 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7841 while(h->delayed_pic[pics]) pics++;
7843 assert(pics <= MAX_DELAYED_PIC_COUNT);
7845 h->delayed_pic[pics++] = cur;
7846 if(cur->reference == 0)
7847 cur->reference = DELAYED_PIC_REF;
7850 for(i=0; h->delayed_pic[i]; i++)
7851 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7854 out = h->delayed_pic[0];
7856 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7857 if(h->delayed_pic[i]->poc < out->poc){
7858 out = h->delayed_pic[i];
7862 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7863 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7865 else if(prev && pics <= s->avctx->has_b_frames)
7867 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7869 ((!cross_idr && prev && out->poc > prev->poc + 2)
7870 || cur->pict_type == FF_B_TYPE)))
7873 s->avctx->has_b_frames++;
7876 else if(out_of_order)
7879 if(out_of_order || pics > s->avctx->has_b_frames){
7880 for(i=out_idx; h->delayed_pic[i]; i++)
7881 h->delayed_pic[i] = h->delayed_pic[i+1];
7887 *data_size = sizeof(AVFrame);
7888 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7889 prev->reference = 0;
7890 h->delayed_output_pic = out;
7894 *pict= *(AVFrame*)out;
7896 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7900 assert(pict->data[0] || !*data_size);
7901 ff_print_debug_info(s, pict);
7902 //printf("out %d\n", (int)pict->data[0]);
7905 /* Return the Picture timestamp as the frame number */
7906 /* we subtract 1 because it is added on utils.c */
7907 avctx->frame_number = s->picture_number - 1;
7909 return get_consumed_bytes(s, buf_index, buf_size);
7912 static inline void fill_mb_avail(H264Context *h){
7913 MpegEncContext * const s = &h->s;
7914 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7917 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7918 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7919 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7925 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7926 h->mb_avail[4]= 1; //FIXME move out
7927 h->mb_avail[5]= 0; //FIXME move out
7935 #define SIZE (COUNT*40)
7941 // int int_temp[10000];
7943 AVCodecContext avctx;
7945 dsputil_init(&dsp, &avctx);
7947 init_put_bits(&pb, temp, SIZE);
7948 printf("testing unsigned exp golomb\n");
7949 for(i=0; i<COUNT; i++){
7951 set_ue_golomb(&pb, i);
7952 STOP_TIMER("set_ue_golomb");
7954 flush_put_bits(&pb);
7956 init_get_bits(&gb, temp, 8*SIZE);
7957 for(i=0; i<COUNT; i++){
7960 s= show_bits(&gb, 24);
7963 j= get_ue_golomb(&gb);
7965 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7968 STOP_TIMER("get_ue_golomb");
7972 init_put_bits(&pb, temp, SIZE);
7973 printf("testing signed exp golomb\n");
7974 for(i=0; i<COUNT; i++){
7976 set_se_golomb(&pb, i - COUNT/2);
7977 STOP_TIMER("set_se_golomb");
7979 flush_put_bits(&pb);
7981 init_get_bits(&gb, temp, 8*SIZE);
7982 for(i=0; i<COUNT; i++){
7985 s= show_bits(&gb, 24);
7988 j= get_se_golomb(&gb);
7989 if(j != i - COUNT/2){
7990 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7993 STOP_TIMER("get_se_golomb");
7997 printf("testing 4x4 (I)DCT\n");
8000 uint8_t src[16], ref[16];
8001 uint64_t error= 0, max_error=0;
8003 for(i=0; i<COUNT; i++){
8005 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8006 for(j=0; j<16; j++){
8007 ref[j]= random()%255;
8008 src[j]= random()%255;
8011 h264_diff_dct_c(block, src, ref, 4);
8014 for(j=0; j<16; j++){
8015 // printf("%d ", block[j]);
8016 block[j]= block[j]*4;
8017 if(j&1) block[j]= (block[j]*4 + 2)/5;
8018 if(j&4) block[j]= (block[j]*4 + 2)/5;
8022 s->dsp.h264_idct_add(ref, block, 4);
8023 /* for(j=0; j<16; j++){
8024 printf("%d ", ref[j]);
8028 for(j=0; j<16; j++){
8029 int diff= FFABS(src[j] - ref[j]);
8032 max_error= FFMAX(max_error, diff);
8035 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8036 printf("testing quantizer\n");
8037 for(qp=0; qp<52; qp++){
8039 src1_block[i]= src2_block[i]= random()%255;
8042 printf("Testing NAL layer\n");
8044 uint8_t bitstream[COUNT];
8045 uint8_t nal[COUNT*2];
8047 memset(&h, 0, sizeof(H264Context));
8049 for(i=0; i<COUNT; i++){
8057 for(j=0; j<COUNT; j++){
8058 bitstream[j]= (random() % 255) + 1;
8061 for(j=0; j<zeros; j++){
8062 int pos= random() % COUNT;
8063 while(bitstream[pos] == 0){
8072 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8074 printf("encoding failed\n");
8078 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8082 if(out_length != COUNT){
8083 printf("incorrect length %d %d\n", out_length, COUNT);
8087 if(consumed != nal_length){
8088 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8092 if(memcmp(bitstream, out, COUNT)){
8093 printf("mismatch\n");
8099 printf("Testing RBSP\n");
8107 static av_cold int decode_end(AVCodecContext *avctx)
8109 H264Context *h = avctx->priv_data;
8110 MpegEncContext *s = &h->s;
8112 av_freep(&h->rbsp_buffer[0]);
8113 av_freep(&h->rbsp_buffer[1]);
8114 free_tables(h); //FIXME cleanup init stuff perhaps
8117 // memset(h, 0, sizeof(H264Context));
8123 AVCodec h264_decoder = {
8127 sizeof(H264Context),
8132 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8134 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),